From 5342c015ba47daa410128dfa5c79e2865e74d244 Mon Sep 17 00:00:00 2001
From: Yuanjian Li <liyuanjian@apache.org>
Date: Sat, 9 Sep 2023 01:24:10 +0000
Subject: [PATCH 001/521] Preparing development version 3.5.1-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 1c093a4a98046..66faa8031c45d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.0
+Version: 3.5.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index a0aca22eab91d..45b68dd81cb9a 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index ce180f49ff128..1b1a8d0066f89 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 8da48076a43aa..54c10a05eed22 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 48e64d21a58b0..92bf5bc07854b 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 2bbacbe71a439..3003927e713c1 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index fca31591b1ef1..43982032a621d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index a93e227655ea7..a54382c0f4d03 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 49f7e2d8c861e..bea8f1ba87c57 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index c200c06a42e69..588fa3950dc5f 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 63b411137ed7a..c24ebad632940 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 8c9d11f64eec8..67227ef38eb8f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index c78c5445e5073..40de7414051bd 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.0</version>
+        <version>3.5.1-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 10deea435d2bd..403255c543727 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 87df8a9ff5bea..1bd6b8e7e4883 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index e7d86b6fd7560..69908e8bb4fb9 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8f41efc15cacb..72b6d7038b7e5 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index b22b937cd821e..e76f5d59d5048 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 825868ebd9581..d6a023392f87d 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 312b9c460777a..90a568d1c3d83 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 134f9c22d7436..666f757996d7d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 7b8b45704a5ef..8021b78141ac4 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index a5870edfc7c81..df3deb35a772b 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index c40f9905245cb..eb4a563f1f31f 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 19cadd69e61ba..afe015b2972da 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.0
-SPARK_VERSION_SHORT: 3.5.0
+SPARK_VERSION: 3.5.1-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.1
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.0"]
+        'facetFilters': ["version:3.5.1"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index c95269dbc4bb0..e85bc9ea02f8b 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 30fe05957d569..19d58f8c8b148 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index f67475ac11bc0..2b19307862060 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d3c52a713911a..18ad615675b6e 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 5ec981a7816be..0660631bb4a9d 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index fe7c3da9c4eb2..e5948ba6eeff2 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 93d696d494e84..8fc4b89a78cc2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.0</version>
+  <version>3.5.1-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index daccb365340b7..64ef604affff2 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.0"
+__version__: str = "3.5.1.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 6214dc2e18555..875d0455ee778 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 11420fe7fa76c..39771c07ca549 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index c7e543137385b..b420eda3eac5e 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 1dda41e085178..4c2fc6ef28aa5 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index fc07d304a038f..4d5cec6aba5c0 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index dd2ebad69a058..0763c5266a2df 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.0</version>
+        <version>3.5.1-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index f9cd2dc677adf..d7fc835fddbcb 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 7313ee5c41340..c46c80ecff70b 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 7222d49ecb020..3659a0f846a5e 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index b04c7565f8a3b..f833b6c34fa94 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 55758d75ce54d..75a98c050b5e0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index a63b2e1062dd8..1805ba06c5b49 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.0</version>
+    <version>3.5.1-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 3c92ecf21e0a00ba96d7934a4e27ec5b8d339762 Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Mon, 11 Sep 2023 08:06:56 +0800
Subject: [PATCH 002/521] [SPARK-45109][SQL][CONNECT] Fix aes_decrypt and ln
 functions in Connect

### What changes were proposed in this pull request?
Fix `aes_descrypt` and `ln` implementations in Spark Connect. The previous `aes_descrypt` reference to `aes_encrypt` is clearly a bug. The `ln` reference to `log` is more like a cosmetic issue, but because `ln` and `log` function implementations are different in Spark SQL we should use the same implementation in Spark Connect too.

### Why are the changes needed?
Bugfix.

### Does this PR introduce _any_ user-facing change?
No, these Spark Connect functions haven't been released.

### How was this patch tested?
Esiting UTs.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #42863 from peter-toth/SPARK-45109-fix-eas_decrypt-and-ln.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
(cherry picked from commit 5e97c79ad82914fc77601ad33cc304c61de93e87)
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 .../scala/org/apache/spark/sql/functions.scala  |  10 +++++-----
 .../function_aes_decrypt.explain                |   2 +-
 .../function_aes_decrypt_with_mode.explain      |   2 +-
 ...nction_aes_decrypt_with_mode_padding.explain |   2 +-
 ...on_aes_decrypt_with_mode_padding_aad.explain |   2 +-
 .../explain-results/function_ln.explain         |   2 +-
 .../queries/function_aes_decrypt.json           |   2 +-
 .../queries/function_aes_decrypt.proto.bin      | Bin 187 -> 187 bytes
 .../queries/function_aes_decrypt_with_mode.json |   2 +-
 .../function_aes_decrypt_with_mode.proto.bin    | Bin 194 -> 194 bytes
 .../function_aes_decrypt_with_mode_padding.json |   2 +-
 ...tion_aes_decrypt_with_mode_padding.proto.bin | Bin 201 -> 201 bytes
 ...ction_aes_decrypt_with_mode_padding_aad.json |   2 +-
 ..._aes_decrypt_with_mode_padding_aad.proto.bin | Bin 208 -> 208 bytes
 .../query-tests/queries/function_ln.json        |   2 +-
 .../query-tests/queries/function_ln.proto.bin   | Bin 172 -> 171 bytes
 16 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index fe992ae6740bf..16e787f825a6f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2624,7 +2624,7 @@ object functions {
    * @group math_funcs
    * @since 3.5.0
    */
-  def ln(e: Column): Column = log(e)
+  def ln(e: Column): Column = Column.fn("ln", e)
 
   /**
    * Computes the natural logarithm of the given value.
@@ -3477,7 +3477,7 @@ object functions {
       mode: Column,
       padding: Column,
       aad: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode, padding, aad)
+    Column.fn("aes_decrypt", input, key, mode, padding, aad)
 
   /**
    * Returns a decrypted value of `input`.
@@ -3489,7 +3489,7 @@ object functions {
    * @since 3.5.0
    */
   def aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode, padding)
+    Column.fn("aes_decrypt", input, key, mode, padding)
 
   /**
    * Returns a decrypted value of `input`.
@@ -3501,7 +3501,7 @@ object functions {
    * @since 3.5.0
    */
   def aes_decrypt(input: Column, key: Column, mode: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode)
+    Column.fn("aes_decrypt", input, key, mode)
 
   /**
    * Returns a decrypted value of `input`.
@@ -3513,7 +3513,7 @@ object functions {
    * @since 3.5.0
    */
   def aes_decrypt(input: Column, key: Column): Column =
-    Column.fn("aes_encrypt", input, key)
+    Column.fn("aes_decrypt", input, key)
 
   /**
    * This is a special version of `aes_decrypt` that performs the same operation, but returns a
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
index 44084a8e60fb0..31e03b79eb987 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, GCM, DEFAULT, , )#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), GCM, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, GCM, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
index 29ccf0c1c833f..fc572e8fe7c67 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, DEFAULT, , )#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, DEFAULT, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, DEFAULT, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
index 5591363426ab5..c6c693013dd0a 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, , )#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, )#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
index 0e8d4df71b38e..97bb528b84b3f 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesEncrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), cast( as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, BinaryType, true, true, true) AS aes_encrypt(g, g, g, g, g, )#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.ExpressionImplUtils, BinaryType, aesDecrypt, cast(g#0 as binary), cast(g#0 as binary), g#0, g#0, cast(g#0 as binary), BinaryType, BinaryType, StringType, StringType, BinaryType, true, true, true) AS aes_decrypt(g, g, g, g, g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
index d3c3743b1ef40..66b782ac8170d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
@@ -1,2 +1,2 @@
-Project [LOG(E(), b#0) AS LOG(E(), b)#0]
+Project [ln(b#0) AS ln(b)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
index 06469d4840547..4204a44b44ce0 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "aes_encrypt",
+        "functionName": "aes_decrypt",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin
index c7a70b51707f321c5e0ca295920fd6a287693c1d..f635e1fc689b1127bebea3b7ad0930f73869006b 100644
GIT binary patch
delta 28
ecmdnZxSMgpGSQUO<f6)g5+PP0W-i8bC<Oq5s0RB0

delta 28
ecmdnZxSMgpGSSq$<f6)g5+PP0W-i8bC<Oq6aR&te

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
index 7eb9b4ed8b4ed..9c630e1253494 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "aes_encrypt",
+        "functionName": "aes_decrypt",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin
index ecd81ae44fcbd94b4cc80e673f3fd5374a0db33e..41d024cdb7eed42bd3f921c0e60d77f2596e9733 100644
GIT binary patch
delta 35
ecmX@ac!+VrGMSXr<f6)g5+PP0W-i8bR0;sMwFj;M

delta 35
ecmX@ac!+VrGMUu8<f6)g5+PP0W-i8bR0;sN#0R(l

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
index 59a6a5e35fd42..8f5be474ab4b3 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "aes_encrypt",
+        "functionName": "aes_decrypt",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin
index 9de01ddc5ea69ea1e7d6afadc38b6de546df6a4f..cd6764581f2caeceb4715a72502bc959c7594c59 100644
GIT binary patch
delta 12
TcmX@fc#?6#GNzQ&i7ONVAt(g!

delta 12
TcmX@fc#?6#GN#nLi7ONVAyEYS

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
index a87ec1b7f4d29..9381042b71886 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "aes_encrypt",
+        "functionName": "aes_decrypt",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "g"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin
index 13da507fe6ff4a868fe434cfeb7c013d8dc9ff58..ca789f04ce1d488073b377c6dd6fed6193462660 100644
GIT binary patch
delta 12
Tcmcb>c!6=kGNzQ&i7PY!A;|>=

delta 12
Tcmcb>c!6=kGN#nLi7PY!A@T(e

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json
index 1b2d0ed0b1447..ababbc52d088d 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "log",
+        "functionName": "ln",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
index 548fb480dd27e65e18dd064ce7d7bfeb5215e293..ecb87a1fc410205cf387950c7e60b407d310d50e 100644
GIT binary patch
delta 29
kcmZ3(xSElTi%Eb{YRN>l>3qCW++0jKc|xp0%v_8~09V}wC;$Ke

delta 30
lcmZ3@xQ3C9i%Eb{YUxC_>HK_BJY38<`RPKeLd;x@NdRB=1(g5*


From ecf507fd976752eb466ccba4a7ed005c1542a22d Mon Sep 17 00:00:00 2001
From: Peter Toth <peter.toth@gmail.com>
Date: Mon, 11 Sep 2023 19:04:41 +0200
Subject: [PATCH 003/521] [SPARK-45109][SQL][CONNECT][FOLLOWUP] Fix log
 function in Connect

### What changes were proposed in this pull request?
This is a follow-up PR to https://github.com/apache/spark/pull/42863, the 1 argument `log` function should also point to `ln`.

### Why are the changes needed?
Bugfix.

### Does this PR introduce _any_ user-facing change?
No, these Spark Connect functions haven't been released.

### How was this patch tested?
Exsiting UTs.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #42869 from peter-toth/SPARK-45109-fix-log.

Authored-by: Peter Toth <peter.toth@gmail.com>
Signed-off-by: Peter Toth <peter.toth@gmail.com>
(cherry picked from commit 6c3d9f5d89dfc974a5f799b73325aebf10f3cf16)
Signed-off-by: Peter Toth <peter.toth@gmail.com>
---
 .../scala/org/apache/spark/sql/functions.scala  |   2 +-
 .../explain-results/function_log.explain        |   2 +-
 .../query-tests/queries/function_log.json       |   2 +-
 .../query-tests/queries/function_log.proto.bin  | Bin 172 -> 171 bytes
 4 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
index 16e787f825a6f..8f55954a63f33 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2632,7 +2632,7 @@ object functions {
    * @group math_funcs
    * @since 3.4.0
    */
-  def log(e: Column): Column = Column.fn("log", e)
+  def log(e: Column): Column = ln(e)
 
   /**
    * Computes the natural logarithm of the given column.
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
index d3c3743b1ef40..66b782ac8170d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
@@ -1,2 +1,2 @@
-Project [LOG(E(), b#0) AS LOG(E(), b)#0]
+Project [ln(b#0) AS ln(b)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log.json
index 1b2d0ed0b1447..ababbc52d088d 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/function_log.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log.json
@@ -13,7 +13,7 @@
     },
     "expressions": [{
       "unresolvedFunction": {
-        "functionName": "log",
+        "functionName": "ln",
         "arguments": [{
           "unresolvedAttribute": {
             "unparsedIdentifier": "b"
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
index 548fb480dd27e65e18dd064ce7d7bfeb5215e293..ecb87a1fc410205cf387950c7e60b407d310d50e 100644
GIT binary patch
delta 29
kcmZ3(xSElTi%Eb{YRN>l>3qCW++0jKc|xp0%v_8~09V}wC;$Ke

delta 30
lcmZ3@xQ3C9i%Eb{YUxC_>HK_BJY38<`RPKeLd;x@NdRB=1(g5*


From 09b14f0968cebe0f2c5c9a369935f27d4ea228f6 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 12 Sep 2023 14:59:44 +0900
Subject: [PATCH 004/521] [SPARK-45124][CONNET] Do not use local user ID for
 Local Relations

### What changes were proposed in this pull request?

This PR removes the use of `userId` and `sessionId` in `CachedLocalRelation` messages and subsequently make `SparkConnectPlanner` use the `userId`/`sessionId` of the active session rather than the user-provided information.

### Why are the changes needed?

Allowing a fetch of a local relation using user-provided information is a potential security risk since this allows users to fetch arbitrary local relations.

### Does this PR introduce _any_ user-facing change?

Virtually no. It will ignore the session id or user id that users set (but instead use internal ones that users cannot manipulate).

### How was this patch tested?

Manually.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #42880 from HyukjinKwon/no-local-user.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 47d801e5e9ded3fb50d274a720ee7874e0b37cc3)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/SparkSession.scala   |   2 -
 .../protobuf/spark/connect/relations.proto    |  10 +-
 .../connect/planner/SparkConnectPlanner.scala |   2 +-
 python/pyspark/sql/connect/plan.py            |   3 -
 .../sql/connect/proto/relations_pb2.py        | 160 +++++++++---------
 .../sql/connect/proto/relations_pb2.pyi       |  15 +-
 6 files changed, 87 insertions(+), 105 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 7882ea6401354..7bd8fa59aea8f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -134,8 +134,6 @@ class SparkSession private[sql] (
         } else {
           val hash = client.cacheLocalRelation(arrowData, encoder.schema.json)
           builder.getCachedLocalRelationBuilder
-            .setUserId(client.userId)
-            .setSessionId(client.sessionId)
             .setHash(hash)
         }
       } else {
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index 8001b3cbcfaa4..f7f1315ede0f8 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -400,11 +400,11 @@ message LocalRelation {
 
 // A local relation that has been cached already.
 message CachedLocalRelation {
-  // (Required) An identifier of the user which created the local relation
-  string userId = 1;
-
-  // (Required) An identifier of the Spark SQL session in which the user created the local relation.
-  string sessionId = 2;
+  // `userId` and `sessionId` fields are deleted since the server must always use the active
+  // session/user rather than arbitrary values provided by the client. It is never valid to access
+  // a local relation from a different session/user.
+  reserved 1, 2;
+  reserved "userId", "sessionId";
 
   // (Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation.
   string hash = 3;
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 2abbacc5a9b7f..641dfc5dcd3c8 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -970,7 +970,7 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
 
   private def transformCachedLocalRelation(rel: proto.CachedLocalRelation): LogicalPlan = {
     val blockManager = session.sparkContext.env.blockManager
-    val blockId = CacheId(rel.getUserId, rel.getSessionId, rel.getHash)
+    val blockId = CacheId(sessionHolder.userId, sessionHolder.sessionId, rel.getHash)
     val bytes = blockManager.getLocalBytes(blockId)
     bytes
       .map { blockData =>
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 84fd013d0144a..196b1f119ba41 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -398,9 +398,6 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         plan = self._create_proto_relation()
         clr = plan.cached_local_relation
 
-        if session._user_id:
-            clr.userId = session._user_id
-        clr.sessionId = session._session_id
         clr.hash = self._hash
 
         return plan
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 3a0a7ff71fd3b..3f7e57949373b 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -35,7 +35,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"_\n\x13\x43\x61\x63hedLocalRelation\x12\x16\n\x06userId\x18\x01 \x01(\tR\x06userId\x12\x1c\n\tsessionId\x18\x02 \x01(\tR\tsessionId\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hash"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -111,85 +111,85 @@
     _LOCALRELATION._serialized_start = 7090
     _LOCALRELATION._serialized_end = 7179
     _CACHEDLOCALRELATION._serialized_start = 7181
-    _CACHEDLOCALRELATION._serialized_end = 7276
-    _CACHEDREMOTERELATION._serialized_start = 7278
-    _CACHEDREMOTERELATION._serialized_end = 7333
-    _SAMPLE._serialized_start = 7336
-    _SAMPLE._serialized_end = 7609
-    _RANGE._serialized_start = 7612
-    _RANGE._serialized_end = 7757
-    _SUBQUERYALIAS._serialized_start = 7759
-    _SUBQUERYALIAS._serialized_end = 7873
-    _REPARTITION._serialized_start = 7876
-    _REPARTITION._serialized_end = 8018
-    _SHOWSTRING._serialized_start = 8021
-    _SHOWSTRING._serialized_end = 8163
-    _HTMLSTRING._serialized_start = 8165
-    _HTMLSTRING._serialized_end = 8279
-    _STATSUMMARY._serialized_start = 8281
-    _STATSUMMARY._serialized_end = 8373
-    _STATDESCRIBE._serialized_start = 8375
-    _STATDESCRIBE._serialized_end = 8456
-    _STATCROSSTAB._serialized_start = 8458
-    _STATCROSSTAB._serialized_end = 8559
-    _STATCOV._serialized_start = 8561
-    _STATCOV._serialized_end = 8657
-    _STATCORR._serialized_start = 8660
-    _STATCORR._serialized_end = 8797
-    _STATAPPROXQUANTILE._serialized_start = 8800
-    _STATAPPROXQUANTILE._serialized_end = 8964
-    _STATFREQITEMS._serialized_start = 8966
-    _STATFREQITEMS._serialized_end = 9091
-    _STATSAMPLEBY._serialized_start = 9094
-    _STATSAMPLEBY._serialized_end = 9403
-    _STATSAMPLEBY_FRACTION._serialized_start = 9295
-    _STATSAMPLEBY_FRACTION._serialized_end = 9394
-    _NAFILL._serialized_start = 9406
-    _NAFILL._serialized_end = 9540
-    _NADROP._serialized_start = 9543
-    _NADROP._serialized_end = 9677
-    _NAREPLACE._serialized_start = 9680
-    _NAREPLACE._serialized_end = 9976
-    _NAREPLACE_REPLACEMENT._serialized_start = 9835
-    _NAREPLACE_REPLACEMENT._serialized_end = 9976
-    _TODF._serialized_start = 9978
-    _TODF._serialized_end = 10066
-    _WITHCOLUMNSRENAMED._serialized_start = 10069
-    _WITHCOLUMNSRENAMED._serialized_end = 10308
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10241
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10308
-    _WITHCOLUMNS._serialized_start = 10310
-    _WITHCOLUMNS._serialized_end = 10429
-    _WITHWATERMARK._serialized_start = 10432
-    _WITHWATERMARK._serialized_end = 10566
-    _HINT._serialized_start = 10569
-    _HINT._serialized_end = 10701
-    _UNPIVOT._serialized_start = 10704
-    _UNPIVOT._serialized_end = 11031
-    _UNPIVOT_VALUES._serialized_start = 10961
-    _UNPIVOT_VALUES._serialized_end = 11020
-    _TOSCHEMA._serialized_start = 11033
-    _TOSCHEMA._serialized_end = 11139
-    _REPARTITIONBYEXPRESSION._serialized_start = 11142
-    _REPARTITIONBYEXPRESSION._serialized_end = 11345
-    _MAPPARTITIONS._serialized_start = 11348
-    _MAPPARTITIONS._serialized_end = 11529
-    _GROUPMAP._serialized_start = 11532
-    _GROUPMAP._serialized_end = 12167
-    _COGROUPMAP._serialized_start = 12170
-    _COGROUPMAP._serialized_end = 12696
-    _APPLYINPANDASWITHSTATE._serialized_start = 12699
-    _APPLYINPANDASWITHSTATE._serialized_end = 13056
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13059
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13303
-    _PYTHONUDTF._serialized_start = 13306
-    _PYTHONUDTF._serialized_end = 13483
-    _COLLECTMETRICS._serialized_start = 13486
-    _COLLECTMETRICS._serialized_end = 13622
-    _PARSE._serialized_start = 13625
-    _PARSE._serialized_end = 14013
+    _CACHEDLOCALRELATION._serialized_end = 7253
+    _CACHEDREMOTERELATION._serialized_start = 7255
+    _CACHEDREMOTERELATION._serialized_end = 7310
+    _SAMPLE._serialized_start = 7313
+    _SAMPLE._serialized_end = 7586
+    _RANGE._serialized_start = 7589
+    _RANGE._serialized_end = 7734
+    _SUBQUERYALIAS._serialized_start = 7736
+    _SUBQUERYALIAS._serialized_end = 7850
+    _REPARTITION._serialized_start = 7853
+    _REPARTITION._serialized_end = 7995
+    _SHOWSTRING._serialized_start = 7998
+    _SHOWSTRING._serialized_end = 8140
+    _HTMLSTRING._serialized_start = 8142
+    _HTMLSTRING._serialized_end = 8256
+    _STATSUMMARY._serialized_start = 8258
+    _STATSUMMARY._serialized_end = 8350
+    _STATDESCRIBE._serialized_start = 8352
+    _STATDESCRIBE._serialized_end = 8433
+    _STATCROSSTAB._serialized_start = 8435
+    _STATCROSSTAB._serialized_end = 8536
+    _STATCOV._serialized_start = 8538
+    _STATCOV._serialized_end = 8634
+    _STATCORR._serialized_start = 8637
+    _STATCORR._serialized_end = 8774
+    _STATAPPROXQUANTILE._serialized_start = 8777
+    _STATAPPROXQUANTILE._serialized_end = 8941
+    _STATFREQITEMS._serialized_start = 8943
+    _STATFREQITEMS._serialized_end = 9068
+    _STATSAMPLEBY._serialized_start = 9071
+    _STATSAMPLEBY._serialized_end = 9380
+    _STATSAMPLEBY_FRACTION._serialized_start = 9272
+    _STATSAMPLEBY_FRACTION._serialized_end = 9371
+    _NAFILL._serialized_start = 9383
+    _NAFILL._serialized_end = 9517
+    _NADROP._serialized_start = 9520
+    _NADROP._serialized_end = 9654
+    _NAREPLACE._serialized_start = 9657
+    _NAREPLACE._serialized_end = 9953
+    _NAREPLACE_REPLACEMENT._serialized_start = 9812
+    _NAREPLACE_REPLACEMENT._serialized_end = 9953
+    _TODF._serialized_start = 9955
+    _TODF._serialized_end = 10043
+    _WITHCOLUMNSRENAMED._serialized_start = 10046
+    _WITHCOLUMNSRENAMED._serialized_end = 10285
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10218
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10285
+    _WITHCOLUMNS._serialized_start = 10287
+    _WITHCOLUMNS._serialized_end = 10406
+    _WITHWATERMARK._serialized_start = 10409
+    _WITHWATERMARK._serialized_end = 10543
+    _HINT._serialized_start = 10546
+    _HINT._serialized_end = 10678
+    _UNPIVOT._serialized_start = 10681
+    _UNPIVOT._serialized_end = 11008
+    _UNPIVOT_VALUES._serialized_start = 10938
+    _UNPIVOT_VALUES._serialized_end = 10997
+    _TOSCHEMA._serialized_start = 11010
+    _TOSCHEMA._serialized_end = 11116
+    _REPARTITIONBYEXPRESSION._serialized_start = 11119
+    _REPARTITIONBYEXPRESSION._serialized_end = 11322
+    _MAPPARTITIONS._serialized_start = 11325
+    _MAPPARTITIONS._serialized_end = 11506
+    _GROUPMAP._serialized_start = 11509
+    _GROUPMAP._serialized_end = 12144
+    _COGROUPMAP._serialized_start = 12147
+    _COGROUPMAP._serialized_end = 12673
+    _APPLYINPANDASWITHSTATE._serialized_start = 12676
+    _APPLYINPANDASWITHSTATE._serialized_end = 13033
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13036
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13280
+    _PYTHONUDTF._serialized_start = 13283
+    _PYTHONUDTF._serialized_end = 13460
+    _COLLECTMETRICS._serialized_start = 13463
+    _COLLECTMETRICS._serialized_end = 13599
+    _PARSE._serialized_start = 13602
+    _PARSE._serialized_end = 13990
     _PARSE_OPTIONSENTRY._serialized_start = 3987
     _PARSE_OPTIONSENTRY._serialized_end = 4045
-    _PARSE_PARSEFORMAT._serialized_start = 13914
-    _PARSE_PARSEFORMAT._serialized_end = 14002
+    _PARSE_PARSEFORMAT._serialized_start = 13891
+    _PARSE_PARSEFORMAT._serialized_end = 13979
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 9cadd4acc5224..007b92ef5f42d 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -1647,28 +1647,15 @@ class CachedLocalRelation(google.protobuf.message.Message):
 
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
-    USERID_FIELD_NUMBER: builtins.int
-    SESSIONID_FIELD_NUMBER: builtins.int
     HASH_FIELD_NUMBER: builtins.int
-    userId: builtins.str
-    """(Required) An identifier of the user which created the local relation"""
-    sessionId: builtins.str
-    """(Required) An identifier of the Spark SQL session in which the user created the local relation."""
     hash: builtins.str
     """(Required) A sha-256 hash of the serialized local relation in proto, see LocalRelation."""
     def __init__(
         self,
         *,
-        userId: builtins.str = ...,
-        sessionId: builtins.str = ...,
         hash: builtins.str = ...,
     ) -> None: ...
-    def ClearField(
-        self,
-        field_name: typing_extensions.Literal[
-            "hash", b"hash", "sessionId", b"sessionId", "userId", b"userId"
-        ],
-    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["hash", b"hash"]) -> None: ...
 
 global___CachedLocalRelation = CachedLocalRelation
 

From 6a2aa1d48c304095dcdf2816a46ec1f5a8af41a2 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Tue, 12 Sep 2023 00:29:38 -0700
Subject: [PATCH 005/521] [SPARK-45110][BUILD] Upgrade rocksdbjni to 8.5.3

### What changes were proposed in this pull request?
This pr aims to upgrade rocksdbjni from 8.3.2 to 8.5.3.

### Why are the changes needed?
1.The full release notes:
- https://github.com/facebook/rocksdb/releases/tag/v8.5.3
- https://github.com/facebook/rocksdb/releases/tag/v8.4.4
- https://github.com/facebook/rocksdb/releases/tag/v8.3.3

2.Bug Fixes:
<img width="841" alt="image" src="https://github.com/apache/spark/assets/15246973/879224c3-6f29-40d7-9c07-0f656fa2ff76">
- Fix a bug where if there is an error reading from offset 0 of a file from L1+ and that the file is not the first file in the sorted run, data can be lost in compaction and read/scan can return incorrect results.
- Fix a bug where iterator may return incorrect result for DeleteRange() users if there was an error reading from a file.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Pass GA.
- Manually test:
```
./build/mvn clean install -pl core -am -Dtest.exclude.tags=org.apache.spark.tags.ExtendedLevelDBTest -fn
...
[INFO] ------------------------------------------------------------------------
[INFO] Reactor Summary for Spark Project Parent POM 4.0.0-SNAPSHOT:
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [  7.121 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 10.181 s]
[INFO] Spark Project Local DB ............................. SUCCESS [ 21.153 s]
[INFO] Spark Project Common Utils ......................... SUCCESS [ 14.960 s]
[INFO] Spark Project Networking ........................... SUCCESS [01:01 min]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [ 16.992 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 14.967 s]
[INFO] Spark Project Launcher ............................. SUCCESS [ 11.737 s]
[INFO] Spark Project Core ................................. SUCCESS [38:06 min]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  40:45 min
[INFO] Finished at: 2023-09-10T17:25:26+08:00
[INFO] ------------------------------------------------------------------------

```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #42862 from panbingkun/SPARK-45110.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit fa2bc21ba1e6cbde31f33faa681f5a1c47219c69)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3         |   2 +-
 pom.xml                                       |   2 +-
 ...BasicOperationsBenchmark-jdk11-results.txt | 120 +++++++++---------
 ...BasicOperationsBenchmark-jdk17-results.txt | 120 +++++++++---------
 ...eStoreBasicOperationsBenchmark-results.txt | 120 +++++++++---------
 5 files changed, 182 insertions(+), 182 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 1d02f8dba567e..3d3f710e74cc4 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -227,7 +227,7 @@ parquet-jackson/1.13.1//parquet-jackson-1.13.1.jar
 pickle/1.3//pickle-1.3.jar
 py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/8.3.2//rocksdbjni-8.3.2.jar
+rocksdbjni/8.5.3//rocksdbjni-8.5.3.jar
 scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar
 scala-compiler/2.12.18//scala-compiler-2.12.18.jar
 scala-library/2.12.18//scala-library-2.12.18.jar
diff --git a/pom.xml b/pom.xml
index 8fc4b89a78cc2..70e1ee7156855 100644
--- a/pom.xml
+++ b/pom.xml
@@ -679,7 +679,7 @@
       <dependency>
         <groupId>org.rocksdb</groupId>
         <artifactId>rocksdbjni</artifactId>
-        <version>8.3.2</version>
+        <version>8.5.3</version>
       </dependency>
       <dependency>
         <groupId>${leveldbjni.group}</groupId>
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
index d5c175a320d3f..70e9849572c51 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            9             11           2          1.1         872.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              61             63           1          0.2        6148.5       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             21             22           0          0.5        2108.9       0.4X
+In-memory                                                            8              9           1          1.3         770.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              62             63           1          0.2        6174.3       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             22             23           1          0.5        2220.7       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.1         872.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            51             53           1          0.2        5134.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2149.6       0.4X
+In-memory                                                          8              9           1          1.3         781.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            52             53           1          0.2        5196.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           22             24           1          0.4        2230.3       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             10           1          1.2         833.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            41             43           1          0.2        4128.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2114.3       0.4X
+In-memory                                                          7              8           1          1.3         747.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            42             44           1          0.2        4224.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           22             23           1          0.4        2222.6       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      8              9           1          1.2         812.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        39             40           1          0.3        3855.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       21             22           0          0.5        2111.9       0.4X
+In-memory                                                      7              8           1          1.3         740.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        40             42           1          0.2        4019.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       22             23           1          0.5        2201.9       0.3X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         15.7          63.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          39             41           0          0.3        3935.3       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         22             22           0          0.5        2158.8       0.0X
+In-memory                                                                                        1              1           0         16.7          59.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          40             41           1          0.3        3993.2       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         22             23           0          0.5        2187.3       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              7           0          1.7         597.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        51             53           0          0.2        5120.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2068.2       0.3X
+In-memory                                                                                      6              6           0          1.8         562.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        50             52           1          0.2        5035.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       22             22           1          0.5        2151.1       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7              8           0          1.5         676.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        60             62           1          0.2        6040.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       21             21           0          0.5        2067.2       0.3X
+In-memory                                                                                      6              7           1          1.6         634.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        59             60           1          0.2        5878.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2144.5       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  7              8           0          1.5         684.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    62             64           1          0.2        6208.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   20             21           0          0.5        2030.6       0.3X
+In-memory                                                                                  6              7           0          1.6         631.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    61             63           1          0.2        6103.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   21             22           0          0.5        2112.1       0.3X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              7           0          1.6         643.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              65             66           1          0.2        6454.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             24             25           0          0.4        2379.3       0.3X
+In-memory                                                                            6              7           0          1.6         635.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              64             65           1          0.2        6371.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             23             24           1          0.4        2346.4       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              6           0          1.8         568.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             34             36           1          0.3        3383.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                            14             14           0          0.7        1390.8       0.4X
+In-memory                                                                           6              6           0          1.8         570.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             34             36           1          0.3        3410.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            14             15           0          0.7        1412.9       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              5           0          2.1         474.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            11             11           0          0.9        1082.8       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            7              7           0          1.4         693.5       0.7X
+In-memory                                                                          5              5           0          2.1         483.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            11             11           0          0.9        1092.6       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                            7              7           0          1.4         698.6       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         14.8          67.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         5              5           0          2.0         501.7       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        5              5           0          2.0         502.3       0.1X
+In-memory                                                                      1              1           0         15.7          63.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         5              5           0          2.0         499.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        5              5           0          2.0         498.3       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
index 5ec60f5beb90e..5424a55cd2012 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            9              9           1          1.2         852.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              60             62           2          0.2        6009.7       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             21             23           1          0.5        2139.2       0.4X
+In-memory                                                           11             15           2          0.9        1067.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              78             85           3          0.1        7772.1       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             24             29           2          0.4        2384.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.2         861.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            50             52           1          0.2        5032.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             23           0          0.5        2110.3       0.4X
+In-memory                                                         10             14           2          1.0         984.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            62             70           3          0.2        6190.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           25             29           2          0.4        2467.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8              9           1          1.2         822.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            40             42           1          0.2        4043.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2070.5       0.4X
+In-memory                                                          9             13           2          1.1         949.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            48             56           3          0.2        4829.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           24             28           2          0.4        2373.0       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      8              9           1          1.2         802.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        38             39           1          0.3        3773.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       21             22           0          0.5        2050.8       0.4X
+In-memory                                                     10             13           2          1.0         992.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        43             51           2          0.2        4331.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       24             28           2          0.4        2372.8       0.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         17.3          57.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          39             40           0          0.3        3903.8       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         21             22           0          0.5        2145.7       0.0X
+In-memory                                                                                        1              1           0         17.2          58.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          43             50           3          0.2        4300.0       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         23             27           2          0.4        2328.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              7           0          1.6         639.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        50             51           1          0.2        4996.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2136.3       0.3X
+In-memory                                                                                      7             10           2          1.4         722.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        62             68           3          0.2        6157.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       23             27           2          0.4        2303.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7              8           0          1.5         688.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        58             59           1          0.2        5769.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       21             22           1          0.5        2111.7       0.3X
+In-memory                                                                                      8             11           2          1.2         829.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        71             80           3          0.1        7110.1       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       23             26           2          0.4        2259.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  7              8           0          1.4         706.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    60             61           0          0.2        6012.9       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   21             22           0          0.5        2135.2       0.3X
+In-memory                                                                                  8             11           2          1.2         806.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    74             83           4          0.1        7354.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   22             27           2          0.5        2197.1       0.4X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            7              8           0          1.4         724.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              61             62           1          0.2        6107.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             23             24           0          0.4        2337.9       0.3X
+In-memory                                                                            8             11           1          1.3         796.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              77             88           3          0.1        7695.2       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             23             27           2          0.4        2296.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           7              7           0          1.5         657.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             33             33           0          0.3        3266.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                            14             14           1          0.7        1366.6       0.5X
+In-memory                                                                           7             10           1          1.4         740.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             40             47           2          0.2        4010.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            13             16           1          0.8        1301.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          6              6           0          1.8         560.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            10             11           0          1.0        1006.6       0.6X
-RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.6         630.5       0.9X
+In-memory                                                                          6              8           1          1.6         624.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            12             15           1          0.8        1228.5       0.5X
+RocksDB (trackTotalNumberOfRows: false)                                            7              9           1          1.5         669.8       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         15.3          65.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              5           0          2.3         431.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                        4              5           0          2.3         431.9       0.2X
+In-memory                                                                      1              1           0         15.8          63.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         5              6           1          2.0         508.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              6           1          2.2         448.4       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index 7a9ef27a3884f..ae9f61457ca64 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            7              9           1          1.4         720.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              79             92           6          0.1        7934.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             23             27           3          0.4        2263.3       0.3X
+In-memory                                                            8              8           1          1.3         759.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              59             60           1          0.2        5873.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             20             21           1          0.5        2008.5       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             12           2          1.2         831.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            65             77           5          0.2        6476.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                           22             28           2          0.4        2235.4       0.4X
+In-memory                                                          8              8           0          1.3         782.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            50             52           1          0.2        5004.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             22           1          0.5        2079.1       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          7             10           2          1.4         728.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            47             57           4          0.2        4715.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           22             27           2          0.5        2207.1       0.3X
+In-memory                                                          8              9           1          1.3         774.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            41             42           1          0.2        4062.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             21           0          0.5        2055.8       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      8             10           1          1.3         750.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        41             51           4          0.2        4116.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       20             25           2          0.5        1962.6       0.4X
+In-memory                                                      7              8           0          1.4         719.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        38             39           1          0.3        3830.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       20             21           0          0.5        2021.5       0.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              1           0         24.5          40.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          42             50           4          0.2        4170.1       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         19             24           2          0.5        1878.5       0.0X
+In-memory                                                                                        0              0           0         30.3          33.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          39             40           0          0.3        3855.9       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         20             21           1          0.5        2006.9       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              8           1          1.7         578.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        57             68           5          0.2        5697.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       19             24           2          0.5        1861.9       0.3X
+In-memory                                                                                      6              6           1          1.8         569.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        48             49           1          0.2        4819.1       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       20             21           0          0.5        1984.7       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              9           2          1.6         635.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        66             80           5          0.2        6605.7       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       19             24           2          0.5        1861.2       0.3X
+In-memory                                                                                      6              7           0          1.6         626.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        57             59           1          0.2        5701.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       20             21           1          0.5        2006.4       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  7              9           1          1.5         651.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    71             83           5          0.1        7108.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   18             24           2          0.6        1812.3       0.4X
+In-memory                                                                                  6              7           1          1.5         648.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    59             61           1          0.2        5927.9       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   20             21           0          0.5        1984.2       0.3X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              9           1          1.6         619.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              74             86           5          0.1        7380.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             22             27           2          0.4        2245.3       0.3X
+In-memory                                                                            6              7           0          1.6         632.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              59             61           1          0.2        5948.2       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             21             22           0          0.5        2123.5       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              8           1          1.7         579.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             40             47           3          0.3        3985.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                            12             15           1          0.8        1214.1       0.5X
+In-memory                                                                           6              6           0          1.8         555.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             31             32           1          0.3        3136.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            13             13           0          0.8        1252.6       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              6           1          2.1         470.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            11             13           1          0.9        1080.6       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            6              7           1          1.8         560.1       0.8X
+In-memory                                                                          5              5           0          2.2         463.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             9             10           0          1.1         937.6       0.5X
+RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.8         562.7       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         18.4          54.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              4           1          2.8         352.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                        3              4           1          3.0         328.0       0.2X
+In-memory                                                                      1              1           0         17.2          58.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         359.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         354.8       0.2X
 
 

From ffa4127c774ea13b4d6bbcc82bc5a9bee23d7156 Mon Sep 17 00:00:00 2001
From: Giambattista Bloisi <gbloisi@gmail.com>
Date: Tue, 12 Sep 2023 16:16:04 +0200
Subject: [PATCH 006/521] [SPARK-45081][SQL] Encoders.bean does no longer work
 with read-only properties

### What changes were proposed in this pull request?
This PR re-enables Encoders.bean to be called against beans having read-only properties, that is properties that have only getters and no setter method. Beans with read only properties are even used in internal tests.
Setter methods of a Java bean encoder are stored within an Option wrapper because they are missing in case of read-only properties. When a java bean has to be initialized, setter methods for the bean properties have to be called: this PR filters out read-only properties from that process.

### Why are the changes needed?
The changes are required to avoid an exception to the thrown by getting the value of a None option object.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
An additional regression test has been added

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42829 from gbloisi-openaire/SPARK-45081.

Authored-by: Giambattista Bloisi <gbloisi@gmail.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
(cherry picked from commit d8298bffd91de01299f9456b37e4454e8b4a6ae8)
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 .../client/arrow/ArrowDeserializer.scala      | 20 ++++++++++---------
 .../catalyst/DeserializerBuildHelper.scala    |  4 +++-
 .../apache/spark/sql/JavaDatasetSuite.java    | 17 ++++++++++++++++
 3 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
index cd54966ccf54d..9429578598712 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
@@ -332,15 +332,17 @@ object ArrowDeserializers {
         val constructor =
           methodLookup.findConstructor(tag.runtimeClass, MethodType.methodType(classOf[Unit]))
         val lookup = createFieldLookup(vectors)
-        val setters = fields.map { field =>
-          val vector = lookup(field.name)
-          val deserializer = deserializerFor(field.enc, vector, timeZoneId)
-          val setter = methodLookup.findVirtual(
-            tag.runtimeClass,
-            field.writeMethod.get,
-            MethodType.methodType(classOf[Unit], field.enc.clsTag.runtimeClass))
-          (bean: Any, i: Int) => setter.invoke(bean, deserializer.get(i))
-        }
+        val setters = fields
+          .filter(_.writeMethod.isDefined)
+          .map { field =>
+            val vector = lookup(field.name)
+            val deserializer = deserializerFor(field.enc, vector, timeZoneId)
+            val setter = methodLookup.findVirtual(
+              tag.runtimeClass,
+              field.writeMethod.get,
+              MethodType.methodType(classOf[Unit], field.enc.clsTag.runtimeClass))
+            (bean: Any, i: Int) => setter.invoke(bean, deserializer.get(i))
+          }
         new StructFieldSerializer[Any](struct) {
           def value(i: Int): Any = {
             val instance = constructor.invoke()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index 16a7d7ff06526..0b88d5a4130e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -390,7 +390,9 @@ object DeserializerBuildHelper {
         CreateExternalRow(convertedFields, enc.schema))
 
     case JavaBeanEncoder(tag, fields) =>
-      val setters = fields.map { f =>
+      val setters = fields
+        .filter(_.writeMethod.isDefined)
+        .map { f =>
         val newTypePath = walkedTypePath.recordField(
           f.enc.clsTag.runtimeClass.getName,
           f.name)
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 4f7cf8da78722..f416d411322ee 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -1783,6 +1783,23 @@ public void testEmptyBean() {
     Assert.assertEquals(1, df.collectAsList().size());
   }
 
+  public static class ReadOnlyPropertyBean implements Serializable {
+      public boolean isEmpty() {
+        return true;
+      }
+  }
+
+  @Test
+  public void testReadOnlyPropertyBean() {
+    ReadOnlyPropertyBean bean = new ReadOnlyPropertyBean();
+    List<ReadOnlyPropertyBean> data = Arrays.asList(bean);
+    Dataset<ReadOnlyPropertyBean> df = spark.createDataset(data,
+            Encoders.bean(ReadOnlyPropertyBean.class));
+    Assert.assertEquals(1, df.schema().length());
+    Assert.assertEquals(1, df.collectAsList().size());
+
+  }
+
   public class CircularReference1Bean implements Serializable {
     private CircularReference2Bean child;
 

From 6a2284feaac4f632d645a93361d29e693eeb9d32 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 12 Sep 2023 08:49:40 -0700
Subject: [PATCH 007/521] Revert "[SPARK-45110][BUILD] Upgrade rocksdbjni to
 8.5.3"

This reverts commit 6a2aa1d48c304095dcdf2816a46ec1f5a8af41a2.
---
 dev/deps/spark-deps-hadoop-3-hive-2.3         |   2 +-
 pom.xml                                       |   2 +-
 ...BasicOperationsBenchmark-jdk11-results.txt | 120 +++++++++---------
 ...BasicOperationsBenchmark-jdk17-results.txt | 120 +++++++++---------
 ...eStoreBasicOperationsBenchmark-results.txt | 120 +++++++++---------
 5 files changed, 182 insertions(+), 182 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 3d3f710e74cc4..1d02f8dba567e 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -227,7 +227,7 @@ parquet-jackson/1.13.1//parquet-jackson-1.13.1.jar
 pickle/1.3//pickle-1.3.jar
 py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/8.5.3//rocksdbjni-8.5.3.jar
+rocksdbjni/8.3.2//rocksdbjni-8.3.2.jar
 scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar
 scala-compiler/2.12.18//scala-compiler-2.12.18.jar
 scala-library/2.12.18//scala-library-2.12.18.jar
diff --git a/pom.xml b/pom.xml
index 70e1ee7156855..8fc4b89a78cc2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -679,7 +679,7 @@
       <dependency>
         <groupId>org.rocksdb</groupId>
         <artifactId>rocksdbjni</artifactId>
-        <version>8.5.3</version>
+        <version>8.3.2</version>
       </dependency>
       <dependency>
         <groupId>${leveldbjni.group}</groupId>
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
index 70e9849572c51..d5c175a320d3f 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            8              9           1          1.3         770.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              62             63           1          0.2        6174.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             22             23           1          0.5        2220.7       0.3X
+In-memory                                                            9             11           2          1.1         872.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              61             63           1          0.2        6148.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             21             22           0          0.5        2108.9       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8              9           1          1.3         781.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            52             53           1          0.2        5196.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           22             24           1          0.4        2230.3       0.4X
+In-memory                                                          9             10           1          1.1         872.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            51             53           1          0.2        5134.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2149.6       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          7              8           1          1.3         747.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            42             44           1          0.2        4224.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           22             23           1          0.4        2222.6       0.3X
+In-memory                                                          8             10           1          1.2         833.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            41             43           1          0.2        4128.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2114.3       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      7              8           1          1.3         740.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        40             42           1          0.2        4019.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       22             23           1          0.5        2201.9       0.3X
+In-memory                                                      8              9           1          1.2         812.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        39             40           1          0.3        3855.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       21             22           0          0.5        2111.9       0.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         16.7          59.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          40             41           1          0.3        3993.2       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         22             23           0          0.5        2187.3       0.0X
+In-memory                                                                                        1              1           0         15.7          63.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          39             41           0          0.3        3935.3       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         22             22           0          0.5        2158.8       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              6           0          1.8         562.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        50             52           1          0.2        5035.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       22             22           1          0.5        2151.1       0.3X
+In-memory                                                                                      6              7           0          1.7         597.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        51             53           0          0.2        5120.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2068.2       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              7           1          1.6         634.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        59             60           1          0.2        5878.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2144.5       0.3X
+In-memory                                                                                      7              8           0          1.5         676.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        60             62           1          0.2        6040.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       21             21           0          0.5        2067.2       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  6              7           0          1.6         631.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    61             63           1          0.2        6103.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   21             22           0          0.5        2112.1       0.3X
+In-memory                                                                                  7              8           0          1.5         684.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    62             64           1          0.2        6208.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   20             21           0          0.5        2030.6       0.3X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              7           0          1.6         635.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              64             65           1          0.2        6371.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             23             24           1          0.4        2346.4       0.3X
+In-memory                                                                            6              7           0          1.6         643.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              65             66           1          0.2        6454.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             24             25           0          0.4        2379.3       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              6           0          1.8         570.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             34             36           1          0.3        3410.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                            14             15           0          0.7        1412.9       0.4X
+In-memory                                                                           6              6           0          1.8         568.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             34             36           1          0.3        3383.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            14             14           0          0.7        1390.8       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              5           0          2.1         483.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            11             11           0          0.9        1092.6       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            7              7           0          1.4         698.6       0.7X
+In-memory                                                                          5              5           0          2.1         474.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            11             11           0          0.9        1082.8       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                            7              7           0          1.4         693.5       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.20.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 11.0.19+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         15.7          63.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         5              5           0          2.0         499.7       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        5              5           0          2.0         498.3       0.1X
+In-memory                                                                      1              1           0         14.8          67.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         5              5           0          2.0         501.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        5              5           0          2.0         502.3       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
index 5424a55cd2012..5ec60f5beb90e 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                           11             15           2          0.9        1067.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              78             85           3          0.1        7772.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             24             29           2          0.4        2384.6       0.4X
+In-memory                                                            9              9           1          1.2         852.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              60             62           2          0.2        6009.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             21             23           1          0.5        2139.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                         10             14           2          1.0         984.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            62             70           3          0.2        6190.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           25             29           2          0.4        2467.9       0.4X
+In-memory                                                          9             10           1          1.2         861.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            50             52           1          0.2        5032.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             23           0          0.5        2110.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             13           2          1.1         949.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            48             56           3          0.2        4829.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           24             28           2          0.4        2373.0       0.4X
+In-memory                                                          8              9           1          1.2         822.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            40             42           1          0.2        4043.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           21             22           0          0.5        2070.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                     10             13           2          1.0         992.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        43             51           2          0.2        4331.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       24             28           2          0.4        2372.8       0.4X
+In-memory                                                      8              9           1          1.2         802.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        38             39           1          0.3        3773.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       21             22           0          0.5        2050.8       0.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         17.2          58.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          43             50           3          0.2        4300.0       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         23             27           2          0.4        2328.2       0.0X
+In-memory                                                                                        1              1           0         17.3          57.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          39             40           0          0.3        3903.8       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         21             22           0          0.5        2145.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7             10           2          1.4         722.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        62             68           3          0.2        6157.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       23             27           2          0.4        2303.3       0.3X
+In-memory                                                                                      6              7           0          1.6         639.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        50             51           1          0.2        4996.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       21             22           0          0.5        2136.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8             11           2          1.2         829.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        71             80           3          0.1        7110.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       23             26           2          0.4        2259.8       0.4X
+In-memory                                                                                      7              8           0          1.5         688.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        58             59           1          0.2        5769.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       21             22           1          0.5        2111.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  8             11           2          1.2         806.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    74             83           4          0.1        7354.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   22             27           2          0.5        2197.1       0.4X
+In-memory                                                                                  7              8           0          1.4         706.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    60             61           0          0.2        6012.9       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   21             22           0          0.5        2135.2       0.3X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8             11           1          1.3         796.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              77             88           3          0.1        7695.2       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             23             27           2          0.4        2296.5       0.3X
+In-memory                                                                            7              8           0          1.4         724.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              61             62           1          0.2        6107.3       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             23             24           0          0.4        2337.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           7             10           1          1.4         740.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             40             47           2          0.2        4010.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                            13             16           1          0.8        1301.1       0.6X
+In-memory                                                                           7              7           0          1.5         657.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             33             33           0          0.3        3266.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            14             14           1          0.7        1366.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          6              8           1          1.6         624.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            12             15           1          0.8        1228.5       0.5X
-RocksDB (trackTotalNumberOfRows: false)                                            7              9           1          1.5         669.8       0.9X
+In-memory                                                                          6              6           0          1.8         560.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            10             11           0          1.0        1006.6       0.6X
+RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.6         630.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.8.1+1 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.7+7 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         15.8          63.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         5              6           1          2.0         508.5       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        4              6           1          2.2         448.4       0.1X
+In-memory                                                                      1              1           0         15.3          65.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              5           0          2.3         431.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                        4              5           0          2.3         431.9       0.2X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index ae9f61457ca64..7a9ef27a3884f 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,110 +2,110 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            8              8           1          1.3         759.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              59             60           1          0.2        5873.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             20             21           1          0.5        2008.5       0.4X
+In-memory                                                            7              9           1          1.4         720.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              79             92           6          0.1        7934.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             23             27           3          0.4        2263.3       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8              8           0          1.3         782.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            50             52           1          0.2        5004.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             22           1          0.5        2079.1       0.4X
+In-memory                                                          8             12           2          1.2         831.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            65             77           5          0.2        6476.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                           22             28           2          0.4        2235.4       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8              9           1          1.3         774.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            41             42           1          0.2        4062.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           21             21           0          0.5        2055.8       0.4X
+In-memory                                                          7             10           2          1.4         728.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            47             57           4          0.2        4715.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           22             27           2          0.5        2207.1       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      7              8           0          1.4         719.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        38             39           1          0.3        3830.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       20             21           0          0.5        2021.5       0.4X
+In-memory                                                      8             10           1          1.3         750.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        41             51           4          0.2        4116.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       20             25           2          0.5        1962.6       0.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              0           0         30.3          33.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          39             40           0          0.3        3855.9       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         20             21           1          0.5        2006.9       0.0X
+In-memory                                                                                        0              1           0         24.5          40.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          42             50           4          0.2        4170.1       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         19             24           2          0.5        1878.5       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              6           1          1.8         569.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        48             49           1          0.2        4819.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       20             21           0          0.5        1984.7       0.3X
+In-memory                                                                                      6              8           1          1.7         578.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        57             68           5          0.2        5697.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       19             24           2          0.5        1861.9       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              7           0          1.6         626.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        57             59           1          0.2        5701.7       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       20             21           1          0.5        2006.4       0.3X
+In-memory                                                                                      6              9           2          1.6         635.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        66             80           5          0.2        6605.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       19             24           2          0.5        1861.2       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  6              7           1          1.5         648.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    59             61           1          0.2        5927.9       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   20             21           0          0.5        1984.2       0.3X
+In-memory                                                                                  7              9           1          1.5         651.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    71             83           5          0.1        7108.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   18             24           2          0.6        1812.3       0.4X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              7           0          1.6         632.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              59             61           1          0.2        5948.2       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             21             22           0          0.5        2123.5       0.3X
+In-memory                                                                            6              9           1          1.6         619.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              74             86           5          0.1        7380.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             22             27           2          0.4        2245.3       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              6           0          1.8         555.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             31             32           1          0.3        3136.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                            13             13           0          0.8        1252.6       0.4X
+In-memory                                                                           6              8           1          1.7         579.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             40             47           3          0.3        3985.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                            12             15           1          0.8        1214.1       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              5           0          2.2         463.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             9             10           0          1.1         937.6       0.5X
-RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.8         562.7       0.8X
+In-memory                                                                          5              6           1          2.1         470.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            11             13           1          0.9        1080.6       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                            6              7           1          1.8         560.1       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_382-b05 on Linux 5.15.0-1045-azure
-Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+OpenJDK 64-Bit Server VM 1.8.0_372-b07 on Linux 5.15.0-1040-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         17.2          58.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         359.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         354.8       0.2X
+In-memory                                                                      1              1           0         18.4          54.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           1          2.8         352.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                        3              4           1          3.0         328.0       0.2X
 
 

From 4e44d929005ac457fc853b256c02fd93f35fcceb Mon Sep 17 00:00:00 2001
From: Supun Nakandala <supun.nakandala@databricks.com>
Date: Tue, 12 Sep 2023 23:52:22 +0800
Subject: [PATCH 008/521] [SPARK-45117][SQL] Implement missing otherCopyArgs
 for the MultiCommutativeOp expression

### What changes were proposed in this pull request?
- This PR implements the missing otherCopyArgs in the MultiCommutativeOp expression

### Why are the changes needed?
- Without this method implementation, calling toJSON will throw an exception from the TreeNode::jsonFields method.
- This is because the jsonFields method has an assertion that the number of fields defined in the constructor is equal to the number of field values (productIterator.toSeq ++ otherCopyArgs).
- The originalRoot field of the MultiCommutativeOp is not part of the productIterator. Hence, it has to be explicitly set in the otherCopyArgs field.

### Does this PR introduce _any_ user-facing change?
- No

### How was this patch tested?
- Added unit test

### Was this patch authored or co-authored using generative AI tooling?
- No

Closes #42873 from db-scnakandala/multi-commutative-op.

Authored-by: Supun Nakandala <supun.nakandala@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d999f622dc68b4fb2734e2ac7cbe203b062c257f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/expressions/Expression.scala |  2 ++
 .../catalyst/expressions/CanonicalizeSuite.scala    | 13 +++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index c2330cdb59dbc..bd7369e57b057 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -1410,4 +1410,6 @@ case class MultiCommutativeOp(
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
     this.copy(operands = newChildren)(originalRoot)
+
+  override protected final def otherCopyArgs: Seq[AnyRef] = originalRoot :: Nil
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 0e22b0d2876d7..89175ea1970cc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -338,4 +338,17 @@ class CanonicalizeSuite extends SparkFunSuite {
 
     SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, default.toString)
   }
+
+  test("toJSON works properly with MultiCommutativeOp") {
+    val default = SQLConf.get.getConf(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD)
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, "1")
+
+    val d = Decimal(1.2)
+    val literal1 = Literal.create(d, DecimalType(2, 1))
+    val literal2 = Literal.create(d, DecimalType(2, 1))
+    val literal3 = Literal.create(d, DecimalType(3, 2))
+    val op = Add(literal1, Add(literal2, literal3))
+    assert(op.canonicalized.toJSON.nonEmpty)
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, default.toString)
+  }
 }

From af8c0b999be746b661efe2439ac015a0c7d12c00 Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Tue, 12 Sep 2023 16:48:26 +0200
Subject: [PATCH 009/521] [SPARK-44872][CONNECT] Server testing infra and
 ReattachableExecuteSuite

### What changes were proposed in this pull request?

Add `SparkConnectServerTest` with infra to test real server with real client in the same process, but communicating over RPC.

Add `ReattachableExecuteSuite` with some tests for reattachable execute.

Two bugs were found by the tests:
* Fix bug in `SparkConnectExecutionManager.createExecuteHolder` when attempting to resubmit an operation that was deemed abandoned. This bug is benign in reattachable execute, because reattachable execute would first send a ReattachExecute, which would be handled correctly in SparkConnectReattachExecuteHandler. For non-reattachable execute (disabled or old client), this is also a very unlikely scenario, because the retrying mechanism should be able to resubmit before the query is declared abandoned, and hence get an INVALID_HANDLE.OPERATION_ALREADY_EXISTS. This bug can manifest only if a non-reattachable execution is retried with so much delay that the operation was declared abandoned.
* In `ExecuteGrpcResponseSender` there was an assertion that assumed that if `sendResponse` did not send, it was because deadline was reached. But it can also be because of interrupt. This would have resulted in interrupt returning an assertion error instead of CURSOR_DISCONNECTED in testing. Outside of testing assertions are not enabled, so this was not a problem outside of testing.

### Why are the changes needed?

Testing of reattachable execute.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Tests added.

Closes #42560 from juliuszsompolski/sc-reattachable-tests.

Authored-by: Juliusz Sompolski <julek@databricks.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
(cherry picked from commit 4b96add471d292ed5c63ccc625489ff78cfb9b25)
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 .../connect/client/CloseableIterator.scala    |  22 +-
 .../CustomSparkConnectBlockingStub.scala      |   2 +-
 ...cutePlanResponseReattachableIterator.scala |  18 +-
 .../client/GrpcExceptionConverter.scala       |   5 +-
 .../sql/connect/client/GrpcRetryHandler.scala |   4 +-
 .../execution/ExecuteGrpcResponseSender.scala |  17 +-
 .../execution/ExecuteResponseObserver.scala   |   8 +-
 .../sql/connect/service/ExecuteHolder.scala   |  10 +
 .../SparkConnectExecutionManager.scala        |  40 +-
 .../sql/connect/SparkConnectServerTest.scala  | 261 +++++++++++++
 .../execution/ReattachableExecuteSuite.scala  | 352 ++++++++++++++++++
 .../org/apache/spark/SparkFunSuite.scala      |  24 ++
 12 files changed, 735 insertions(+), 28 deletions(-)
 create mode 100644 connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
 create mode 100644 connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala

diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
index 891e50ed6e7bd..d3fc9963edc7a 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
@@ -27,6 +27,20 @@ private[sql] trait CloseableIterator[E] extends Iterator[E] with AutoCloseable {
   }
 }
 
+private[sql] abstract class WrappedCloseableIterator[E] extends CloseableIterator[E] {
+
+  def innerIterator: Iterator[E]
+
+  override def next(): E = innerIterator.next()
+
+  override def hasNext(): Boolean = innerIterator.hasNext
+
+  override def close(): Unit = innerIterator match {
+    case it: CloseableIterator[E] => it.close()
+    case _ => // nothing
+  }
+}
+
 private[sql] object CloseableIterator {
 
   /**
@@ -35,12 +49,8 @@ private[sql] object CloseableIterator {
   def apply[T](iterator: Iterator[T]): CloseableIterator[T] = iterator match {
     case closeable: CloseableIterator[T] => closeable
     case _ =>
-      new CloseableIterator[T] {
-        override def next(): T = iterator.next()
-
-        override def hasNext(): Boolean = iterator.hasNext
-
-        override def close() = { /* empty */ }
+      new WrappedCloseableIterator[T] {
+        override def innerIterator = iterator
       }
   }
 }
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
index 73ff01e223f29..80edcfa8be16a 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
@@ -22,7 +22,7 @@ import io.grpc.ManagedChannel
 
 import org.apache.spark.connect.proto._
 
-private[client] class CustomSparkConnectBlockingStub(
+private[connect] class CustomSparkConnectBlockingStub(
     channel: ManagedChannel,
     retryPolicy: GrpcRetryHandler.RetryPolicy) {
 
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
index 9bf7de33da8a7..57a629264be10 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.connect.client
 
 import java.util.UUID
 
+import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
 import io.grpc.{ManagedChannel, StatusRuntimeException}
@@ -50,7 +51,7 @@ class ExecutePlanResponseReattachableIterator(
     request: proto.ExecutePlanRequest,
     channel: ManagedChannel,
     retryPolicy: GrpcRetryHandler.RetryPolicy)
-    extends CloseableIterator[proto.ExecutePlanResponse]
+    extends WrappedCloseableIterator[proto.ExecutePlanResponse]
     with Logging {
 
   val operationId = if (request.hasOperationId) {
@@ -86,14 +87,25 @@ class ExecutePlanResponseReattachableIterator(
   // True after ResultComplete message was seen in the stream.
   // Server will always send this message at the end of the stream, if the underlying iterator
   // finishes without producing one, another iterator needs to be reattached.
-  private var resultComplete: Boolean = false
+  // Visible for testing.
+  private[connect] var resultComplete: Boolean = false
 
   // Initial iterator comes from ExecutePlan request.
   // Note: This is not retried, because no error would ever be thrown here, and GRPC will only
   // throw error on first iter.hasNext() or iter.next()
-  private var iter: Option[java.util.Iterator[proto.ExecutePlanResponse]] =
+  // Visible for testing.
+  private[connect] var iter: Option[java.util.Iterator[proto.ExecutePlanResponse]] =
     Some(rawBlockingStub.executePlan(initialRequest))
 
+  override def innerIterator: Iterator[proto.ExecutePlanResponse] = iter match {
+    case Some(it) => it.asScala
+    case None =>
+      // The iterator is only unset for short moments while retry exception is thrown.
+      // It should only happen in the middle of internal processing. Since this iterator is not
+      // thread safe, no-one should be accessing it at this moment.
+      throw new IllegalStateException("innerIterator unset")
+  }
+
   override def next(): proto.ExecutePlanResponse = synchronized {
     // hasNext will trigger reattach in case the stream completed without resultComplete
     if (!hasNext()) {
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
index c430485bd4184..fe9f6dc2b4a9a 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
@@ -43,7 +43,10 @@ private[client] object GrpcExceptionConverter extends JsonUtils {
   }
 
   def convertIterator[T](iter: CloseableIterator[T]): CloseableIterator[T] = {
-    new CloseableIterator[T] {
+    new WrappedCloseableIterator[T] {
+
+      override def innerIterator: Iterator[T] = iter
+
       override def hasNext: Boolean = {
         convert {
           iter.hasNext
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
index 8791530607c3a..3c0b750fd46e7 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
@@ -48,11 +48,13 @@ private[sql] class GrpcRetryHandler(
    *   The type of the response.
    */
   class RetryIterator[T, U](request: T, call: T => CloseableIterator[U])
-      extends CloseableIterator[U] {
+      extends WrappedCloseableIterator[U] {
 
     private var opened = false // we only retry if it fails on first call when using the iterator
     private var iter = call(request)
 
+    override def innerIterator: Iterator[U] = iter
+
     private def retryIter[V](f: Iterator[U] => V) = {
       if (!opened) {
         opened = true
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
index 6b8fcde1156ed..c3c33a85d6517 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -47,6 +47,9 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
 
   private var interrupted = false
 
+  // Time at which this sender should finish if the response stream is not finished by then.
+  private var deadlineTimeMillis = Long.MaxValue
+
   // Signal to wake up when grpcCallObserver.isReady()
   private val grpcCallObserverReadySignal = new Object
 
@@ -65,6 +68,12 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
     executionObserver.notifyAll()
   }
 
+  // For testing
+  private[connect] def setDeadline(deadlineMs: Long) = executionObserver.synchronized {
+    deadlineTimeMillis = deadlineMs
+    executionObserver.notifyAll()
+  }
+
   def run(lastConsumedStreamIndex: Long): Unit = {
     if (executeHolder.reattachable) {
       // In reattachable execution we use setOnReadyHandler and grpcCallObserver.isReady to control
@@ -150,7 +159,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
     var finished = false
 
     // Time at which this sender should finish if the response stream is not finished by then.
-    val deadlineTimeMillis = if (!executeHolder.reattachable) {
+    deadlineTimeMillis = if (!executeHolder.reattachable) {
       Long.MaxValue
     } else {
       val confSize =
@@ -232,8 +241,8 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           assert(finished == false)
         } else {
           // If it wasn't sent, time deadline must have been reached before stream became available,
-          // will exit in the enxt loop iterattion.
-          assert(deadlineLimitReached)
+          // or it was intterupted. Will exit in the next loop iterattion.
+          assert(deadlineLimitReached || interrupted)
         }
       } else if (streamFinished) {
         // Stream is finished and all responses have been sent
@@ -301,7 +310,7 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
         val sleepStart = System.nanoTime()
         var sleepEnd = 0L
         // Conditions for exiting the inner loop
-        // 1. was detached
+        // 1. was interrupted
         // 2. grpcCallObserver is ready to send more data
         // 3. time deadline is reached
         while (!interrupted &&
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
index d9db07fd228ed..df0fb3ac3a592 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
@@ -73,11 +73,16 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
   /** The index of the last response produced by execution. */
   private var lastProducedIndex: Long = 0 // first response will have index 1
 
+  // For testing
+  private[connect] var releasedUntilIndex: Long = 0
+
   /**
    * Highest response index that was consumed. Keeps track of it to decide which responses needs
    * to be cached, and to assert that all responses are consumed.
+   *
+   * Visible for testing.
    */
-  private var highestConsumedIndex: Long = 0
+  private[connect] var highestConsumedIndex: Long = 0
 
   /**
    * Consumer that waits for available responses. There can be only one at a time, @see
@@ -284,6 +289,7 @@ private[connect] class ExecuteResponseObserver[T <: Message](val executeHolder:
       responses.remove(i)
       i -= 1
     }
+    releasedUntilIndex = index
   }
 
   /**
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index bce0713339228..974c13b08e318 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -183,6 +183,16 @@ private[connect] class ExecuteHolder(
     }
   }
 
+  // For testing.
+  private[connect] def setGrpcResponseSendersDeadline(deadlineMs: Long) = synchronized {
+    grpcResponseSenders.foreach(_.setDeadline(deadlineMs))
+  }
+
+  // For testing
+  private[connect] def interruptGrpcResponseSenders() = synchronized {
+    grpcResponseSenders.foreach(_.interrupt())
+  }
+
   /**
    * For a short period in ExecutePlan after creation and until runGrpcResponseSender is called,
    * there is no attached response sender, but yet we start with lastAttachedRpcTime = None, so we
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
index ce1f6c93f6cfe..21f59bdd68ea5 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
@@ -71,15 +71,14 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
       // The latter is to prevent double execution when a client retries execution, thinking it
       // never reached the server, but in fact it did, and already got removed as abandoned.
       if (executions.get(executeHolder.key).isDefined) {
-        if (getAbandonedTombstone(executeHolder.key).isDefined) {
-          throw new SparkSQLException(
-            errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
-            messageParameters = Map("handle" -> executeHolder.operationId))
-        } else {
-          throw new SparkSQLException(
-            errorClass = "INVALID_HANDLE.OPERATION_ALREADY_EXISTS",
-            messageParameters = Map("handle" -> executeHolder.operationId))
-        }
+        throw new SparkSQLException(
+          errorClass = "INVALID_HANDLE.OPERATION_ALREADY_EXISTS",
+          messageParameters = Map("handle" -> executeHolder.operationId))
+      }
+      if (getAbandonedTombstone(executeHolder.key).isDefined) {
+        throw new SparkSQLException(
+          errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
+          messageParameters = Map("handle" -> executeHolder.operationId))
       }
       sessionHolder.addExecuteHolder(executeHolder)
       executions.put(executeHolder.key, executeHolder)
@@ -141,12 +140,17 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     abandonedTombstones.asMap.asScala.values.toBuffer.toSeq
   }
 
-  private[service] def shutdown(): Unit = executionsLock.synchronized {
+  private[connect] def shutdown(): Unit = executionsLock.synchronized {
     scheduledExecutor.foreach { executor =>
       executor.shutdown()
       executor.awaitTermination(1, TimeUnit.MINUTES)
     }
     scheduledExecutor = None
+    executions.clear()
+    abandonedTombstones.invalidateAll()
+    if (!lastExecutionTime.isDefined) {
+      lastExecutionTime = Some(System.currentTimeMillis())
+    }
   }
 
   /**
@@ -188,7 +192,7 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
       executions.values.foreach { executeHolder =>
         executeHolder.lastAttachedRpcTime match {
           case Some(detached) =>
-            if (detached + timeout < nowMs) {
+            if (detached + timeout <= nowMs) {
               toRemove += executeHolder
             }
           case _ => // execution is active
@@ -206,4 +210,18 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     }
     logInfo("Finished periodic run of SparkConnectExecutionManager maintenance.")
   }
+
+  // For testing.
+  private[connect] def setAllRPCsDeadline(deadlineMs: Long) = executionsLock.synchronized {
+    executions.values.foreach(_.setGrpcResponseSendersDeadline(deadlineMs))
+  }
+
+  // For testing.
+  private[connect] def interruptAllRPCs() = executionsLock.synchronized {
+    executions.values.foreach(_.interruptGrpcResponseSenders())
+  }
+
+  private[connect] def listExecuteHolders = executionsLock.synchronized {
+    executions.values.toBuffer.toSeq
+  }
 }
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
new file mode 100644
index 0000000000000..488858d33ea12
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import java.util.UUID
+
+import org.scalatest.concurrent.{Eventually, TimeLimits}
+import org.scalatest.time.Span
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.connect.client.{CloseableIterator, CustomSparkConnectBlockingStub, ExecutePlanResponseReattachableIterator, GrpcRetryHandler, SparkConnectClient, WrappedCloseableIterator}
+import org.apache.spark.sql.connect.common.config.ConnectCommon
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.connect.dsl.MockRemoteSession
+import org.apache.spark.sql.connect.dsl.plans._
+import org.apache.spark.sql.connect.service.{ExecuteHolder, SparkConnectService}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Base class and utilities for a test suite that starts and tests the real SparkConnectService
+ * with a real SparkConnectClient, communicating over RPC, but both in-process.
+ */
+class SparkConnectServerTest extends SharedSparkSession {
+
+  // Server port
+  val serverPort: Int =
+    ConnectCommon.CONNECT_GRPC_BINDING_PORT + util.Random.nextInt(1000)
+
+  val eventuallyTimeout = 30.seconds
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    // Other suites using mocks leave a mess in the global executionManager,
+    // shut it down so that it's cleared before starting server.
+    SparkConnectService.executionManager.shutdown()
+    // Start the real service.
+    withSparkEnvConfs((Connect.CONNECT_GRPC_BINDING_PORT.key, serverPort.toString)) {
+      SparkConnectService.start(spark.sparkContext)
+    }
+    // register udf directly on the server, we're not testing client UDFs here...
+    val serverSession =
+      SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session
+    serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms }))
+  }
+
+  override def afterAll(): Unit = {
+    SparkConnectService.stop()
+    super.afterAll()
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    clearAllExecutions()
+  }
+
+  override def afterEach(): Unit = {
+    clearAllExecutions()
+    super.afterEach()
+  }
+
+  protected def clearAllExecutions(): Unit = {
+    SparkConnectService.executionManager.listExecuteHolders.foreach(_.close())
+    SparkConnectService.executionManager.periodicMaintenance(0)
+    assertNoActiveExecutions()
+  }
+
+  protected val defaultSessionId = UUID.randomUUID.toString()
+  protected val defaultUserId = UUID.randomUUID.toString()
+
+  // We don't have the real SparkSession/Dataset api available,
+  // so use mock for generating simple query plans.
+  protected val dsl = new MockRemoteSession()
+
+  protected val userContext = proto.UserContext
+    .newBuilder()
+    .setUserId(defaultUserId)
+    .build()
+
+  protected def buildExecutePlanRequest(
+      plan: proto.Plan,
+      sessionId: String = defaultSessionId,
+      operationId: String = UUID.randomUUID.toString) = {
+    proto.ExecutePlanRequest
+      .newBuilder()
+      .setUserContext(userContext)
+      .setSessionId(sessionId)
+      .setOperationId(operationId)
+      .setPlan(plan)
+      .addRequestOptions(
+        proto.ExecutePlanRequest.RequestOption
+          .newBuilder()
+          .setReattachOptions(proto.ReattachOptions.newBuilder().setReattachable(true).build())
+          .build())
+      .build()
+  }
+
+  protected def buildReattachExecuteRequest(operationId: String, responseId: Option[String]) = {
+    val req = proto.ReattachExecuteRequest
+      .newBuilder()
+      .setUserContext(userContext)
+      .setSessionId(defaultSessionId)
+      .setOperationId(operationId)
+
+    if (responseId.isDefined) {
+      req.setLastResponseId(responseId.get)
+    }
+
+    req.build()
+  }
+
+  protected def buildPlan(query: String) = {
+    proto.Plan.newBuilder().setRoot(dsl.sql(query)).build()
+  }
+
+  protected def getReattachableIterator(
+      stubIterator: CloseableIterator[proto.ExecutePlanResponse]) = {
+    // This depends on the wrapping in CustomSparkConnectBlockingStub.executePlanReattachable:
+    // GrpcExceptionConverter.convertIterator
+    stubIterator
+      .asInstanceOf[WrappedCloseableIterator[proto.ExecutePlanResponse]]
+      // ExecutePlanResponseReattachableIterator
+      .innerIterator
+      .asInstanceOf[ExecutePlanResponseReattachableIterator]
+  }
+
+  protected def assertNoActiveRpcs(): Unit = {
+    SparkConnectService.executionManager.listActiveExecutions match {
+      case Left(_) => // nothing running, good
+      case Right(executions) =>
+        // all rpc detached.
+        assert(
+          executions.forall(_.lastAttachedRpcTime.isDefined),
+          s"Expected no RPCs, but got $executions")
+    }
+  }
+
+  protected def assertEventuallyNoActiveRpcs(): Unit = {
+    Eventually.eventually(timeout(eventuallyTimeout)) {
+      assertNoActiveRpcs()
+    }
+  }
+
+  protected def assertNoActiveExecutions(): Unit = {
+    SparkConnectService.executionManager.listActiveExecutions match {
+      case Left(_) => // cleaned up
+      case Right(executions) => fail(s"Expected empty, but got $executions")
+    }
+  }
+
+  protected def assertEventuallyNoActiveExecutions(): Unit = {
+    Eventually.eventually(timeout(eventuallyTimeout)) {
+      assertNoActiveExecutions()
+    }
+  }
+
+  protected def assertExecutionReleased(operationId: String): Unit = {
+    SparkConnectService.executionManager.listActiveExecutions match {
+      case Left(_) => // cleaned up
+      case Right(executions) => assert(!executions.exists(_.operationId == operationId))
+    }
+  }
+
+  protected def assertEventuallyExecutionReleased(operationId: String): Unit = {
+    Eventually.eventually(timeout(eventuallyTimeout)) {
+      assertExecutionReleased(operationId)
+    }
+  }
+
+  // Get ExecutionHolder, assuming that only one execution is active
+  protected def getExecutionHolder: ExecuteHolder = {
+    val executions = SparkConnectService.executionManager.listExecuteHolders
+    assert(executions.length == 1)
+    executions.head
+  }
+
+  protected def withClient(f: SparkConnectClient => Unit): Unit = {
+    val client = SparkConnectClient
+      .builder()
+      .port(serverPort)
+      .sessionId(defaultSessionId)
+      .userId(defaultUserId)
+      .enableReattachableExecute()
+      .build()
+    try f(client)
+    finally {
+      client.shutdown()
+    }
+  }
+
+  protected def withRawBlockingStub(
+      f: proto.SparkConnectServiceGrpc.SparkConnectServiceBlockingStub => Unit): Unit = {
+    val conf = SparkConnectClient.Configuration(port = serverPort)
+    val channel = conf.createChannel()
+    val bstub = proto.SparkConnectServiceGrpc.newBlockingStub(channel)
+    try f(bstub)
+    finally {
+      channel.shutdownNow()
+    }
+  }
+
+  protected def withCustomBlockingStub(
+      retryPolicy: GrpcRetryHandler.RetryPolicy = GrpcRetryHandler.RetryPolicy())(
+      f: CustomSparkConnectBlockingStub => Unit): Unit = {
+    val conf = SparkConnectClient.Configuration(port = serverPort)
+    val channel = conf.createChannel()
+    val bstub = new CustomSparkConnectBlockingStub(channel, retryPolicy)
+    try f(bstub)
+    finally {
+      channel.shutdownNow()
+    }
+  }
+
+  protected def runQuery(plan: proto.Plan, queryTimeout: Span, iterSleep: Long): Unit = {
+    withClient { client =>
+      TimeLimits.failAfter(queryTimeout) {
+        val iter = client.execute(plan)
+        var operationId: Option[String] = None
+        var r: proto.ExecutePlanResponse = null
+        val reattachableIter = getReattachableIterator(iter)
+        while (iter.hasNext) {
+          r = iter.next()
+          operationId match {
+            case None => operationId = Some(r.getOperationId)
+            case Some(id) => assert(r.getOperationId == id)
+          }
+          if (iterSleep > 0) {
+            Thread.sleep(iterSleep)
+          }
+        }
+        // Check that last response had ResultComplete indicator
+        assert(r != null)
+        assert(r.hasResultComplete)
+        // ... that client sent ReleaseExecute based on it
+        assert(reattachableIter.resultComplete)
+        // ... and that the server released the execution.
+        assert(operationId.isDefined)
+        assertEventuallyExecutionReleased(operationId.get)
+      }
+    }
+  }
+
+  protected def runQuery(query: String, queryTimeout: Span, iterSleep: Long = 0): Unit = {
+    val plan = buildPlan(query)
+    runQuery(plan, queryTimeout, iterSleep)
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
new file mode 100644
index 0000000000000..169b15582b698
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -0,0 +1,352 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.execution
+
+import java.util.UUID
+
+import io.grpc.StatusRuntimeException
+import org.scalatest.concurrent.Eventually
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.connect.SparkConnectServerTest
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.connect.service.SparkConnectService
+
+class ReattachableExecuteSuite extends SparkConnectServerTest {
+
+  // Tests assume that this query will result in at least a couple ExecutePlanResponses on the
+  // stream. If this is no longer the case because of changes in how much is returned in a single
+  // ExecutePlanResponse, it may need to be adjusted.
+  val MEDIUM_RESULTS_QUERY = "select * from range(1000000)"
+
+  test("reattach after initial RPC ends") {
+    withClient { client =>
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val reattachableIter = getReattachableIterator(iter)
+      val initialInnerIter = reattachableIter.innerIterator
+
+      // open the iterator
+      iter.next()
+      // expire all RPCs on server
+      SparkConnectService.executionManager.setAllRPCsDeadline(System.currentTimeMillis() - 1)
+      assertEventuallyNoActiveRpcs()
+      // iterator should reattach
+      // (but not necessarily at first next, as there might have been messages buffered client side)
+      while (iter.hasNext && (reattachableIter.innerIterator eq initialInnerIter)) {
+        iter.next()
+      }
+      assert(
+        reattachableIter.innerIterator ne initialInnerIter
+      ) // reattach changed the inner iter
+    }
+  }
+
+  test("raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error") {
+    withRawBlockingStub { stub =>
+      val iter = stub.executePlan(buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY)))
+      iter.next() // open the iterator
+      // interrupt all RPCs on server
+      SparkConnectService.executionManager.interruptAllRPCs()
+      assertEventuallyNoActiveRpcs()
+      val e = intercept[StatusRuntimeException] {
+        while (iter.hasNext) iter.next()
+      }
+      assert(e.getMessage.contains("INVALID_CURSOR.DISCONNECTED"))
+    }
+  }
+
+  test("raw new RPC interrupts previous RPC with INVALID_CURSOR.DISCONNECTED error") {
+    // Raw stub does not have retries, auto reattach etc.
+    withRawBlockingStub { stub =>
+      val operationId = UUID.randomUUID().toString
+      val iter = stub.executePlan(
+        buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY), operationId = operationId))
+      iter.next() // open the iterator
+
+      // send reattach
+      val iter2 = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+      iter2.next() // open the iterator
+
+      // should result in INVALID_CURSOR.DISCONNECTED error on the original iterator
+      val e = intercept[StatusRuntimeException] {
+        while (iter.hasNext) iter.next()
+      }
+      assert(e.getMessage.contains("INVALID_CURSOR.DISCONNECTED"))
+
+      // send another reattach
+      val iter3 = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+      assert(iter3.hasNext)
+      iter3.next() // open the iterator
+
+      // should result in INVALID_CURSOR.DISCONNECTED error on the previous reattach iterator
+      val e2 = intercept[StatusRuntimeException] {
+        while (iter2.hasNext) iter2.next()
+      }
+      assert(e2.getMessage.contains("INVALID_CURSOR.DISCONNECTED"))
+    }
+  }
+
+  test("client INVALID_CURSOR.DISCONNECTED error is retried when rpc sender gets interrupted") {
+    withClient { client =>
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val reattachableIter = getReattachableIterator(iter)
+      val initialInnerIter = reattachableIter.innerIterator
+      val operationId = getReattachableIterator(iter).operationId
+
+      // open the iterator
+      iter.next()
+
+      // interrupt all RPCs on server
+      SparkConnectService.executionManager.interruptAllRPCs()
+      assertEventuallyNoActiveRpcs()
+
+      // Nevertheless, the original iterator will handle the INVALID_CURSOR.DISCONNECTED error
+      iter.next()
+      // iterator changed because it had to reconnect
+      assert(reattachableIter.innerIterator ne initialInnerIter)
+    }
+  }
+
+  test("client INVALID_CURSOR.DISCONNECTED error is retried when other RPC preempts this one") {
+    withClient { client =>
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val reattachableIter = getReattachableIterator(iter)
+      val initialInnerIter = reattachableIter.innerIterator
+      val operationId = getReattachableIterator(iter).operationId
+
+      // open the iterator
+      val response = iter.next()
+
+      // Send another Reattach request, it should preempt this request with an
+      // INVALID_CURSOR.DISCONNECTED error.
+      withRawBlockingStub { stub =>
+        val reattachIter = stub.reattachExecute(
+          buildReattachExecuteRequest(operationId, Some(response.getResponseId)))
+        assert(reattachIter.hasNext)
+        reattachIter.next()
+
+        // Nevertheless, the original iterator will handle the INVALID_CURSOR.DISCONNECTED error
+        iter.next()
+        // iterator changed because it had to reconnect
+        assert(reattachableIter.innerIterator ne initialInnerIter)
+      }
+    }
+  }
+
+  test("abandoned query gets INVALID_HANDLE.OPERATION_ABANDONED error") {
+    withClient { client =>
+      val plan = buildPlan("select * from range(100000)")
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val operationId = getReattachableIterator(iter).operationId
+      // open the iterator
+      iter.next()
+      // disconnect and remove on server
+      SparkConnectService.executionManager.setAllRPCsDeadline(System.currentTimeMillis() - 1)
+      assertEventuallyNoActiveRpcs()
+      SparkConnectService.executionManager.periodicMaintenance(0)
+      assertNoActiveExecutions()
+      // check that it throws abandoned error
+      val e = intercept[SparkException] {
+        while (iter.hasNext) iter.next()
+      }
+      assert(e.getMessage.contains("INVALID_HANDLE.OPERATION_ABANDONED"))
+      // check that afterwards, new operation can't be created with the same operationId.
+      withCustomBlockingStub() { stub =>
+        val executePlanReq = buildExecutePlanRequest(plan, operationId = operationId)
+
+        val iterNonReattachable = stub.executePlan(executePlanReq)
+        val eNonReattachable = intercept[SparkException] {
+          iterNonReattachable.hasNext
+        }
+        assert(eNonReattachable.getMessage.contains("INVALID_HANDLE.OPERATION_ABANDONED"))
+
+        val iterReattachable = stub.executePlanReattachable(executePlanReq)
+        val eReattachable = intercept[SparkException] {
+          iterReattachable.hasNext
+        }
+        assert(eReattachable.getMessage.contains("INVALID_HANDLE.OPERATION_ABANDONED"))
+      }
+    }
+  }
+
+  test("client releases responses directly after consuming them") {
+    withClient { client =>
+      val iter = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      val reattachableIter = getReattachableIterator(iter)
+      val initialInnerIter = reattachableIter.innerIterator
+      val operationId = getReattachableIterator(iter).operationId
+
+      assert(iter.hasNext) // open iterator
+      val execution = getExecutionHolder
+      assert(execution.responseObserver.releasedUntilIndex == 0)
+
+      // get two responses, check on the server that ReleaseExecute releases them afterwards
+      val response1 = iter.next()
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(execution.responseObserver.releasedUntilIndex == 1)
+      }
+
+      val response2 = iter.next()
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(execution.responseObserver.releasedUntilIndex == 2)
+      }
+
+      withRawBlockingStub { stub =>
+        // Reattach after response1 should fail with INVALID_CURSOR.POSITION_NOT_AVAILABLE
+        val reattach1 = stub.reattachExecute(
+          buildReattachExecuteRequest(operationId, Some(response1.getResponseId)))
+        val e = intercept[StatusRuntimeException] {
+          reattach1.hasNext()
+        }
+        assert(e.getMessage.contains("INVALID_CURSOR.POSITION_NOT_AVAILABLE"))
+
+        // Reattach after response2 should work
+        val reattach2 = stub.reattachExecute(
+          buildReattachExecuteRequest(operationId, Some(response2.getResponseId)))
+        val response3 = reattach2.next()
+        val response4 = reattach2.next()
+        val response5 = reattach2.next()
+
+        // The original client iterator will handle the INVALID_CURSOR.DISCONNECTED error,
+        // and reconnect back. Since the raw iterator was not releasing responses, client iterator
+        // should be able to continue where it left off (server shouldn't have released yet)
+        assert(execution.responseObserver.releasedUntilIndex == 2)
+        assert(iter.hasNext)
+
+        val r3 = iter.next()
+        assert(r3.getResponseId == response3.getResponseId)
+        val r4 = iter.next()
+        assert(r4.getResponseId == response4.getResponseId)
+        val r5 = iter.next()
+        assert(r5.getResponseId == response5.getResponseId)
+        // inner iterator changed because it had to reconnect
+        assert(reattachableIter.innerIterator ne initialInnerIter)
+      }
+    }
+  }
+
+  test("server releases responses automatically when client moves ahead") {
+    withRawBlockingStub { stub =>
+      val operationId = UUID.randomUUID().toString
+      val iter = stub.executePlan(
+        buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY), operationId = operationId))
+      var lastSeenResponse: String = null
+
+      iter.hasNext // open iterator
+      val execution = getExecutionHolder
+
+      // after consuming enough from the iterator, server should automatically start releasing
+      var lastSeenIndex = 0
+      while (iter.hasNext && execution.responseObserver.releasedUntilIndex == 0) {
+        val r = iter.next()
+        lastSeenResponse = r.getResponseId()
+        lastSeenIndex += 1
+      }
+      assert(iter.hasNext)
+      assert(execution.responseObserver.releasedUntilIndex > 0)
+
+      // Reattach from the beginning is not available.
+      val reattach = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+      val e = intercept[StatusRuntimeException] {
+        reattach.hasNext()
+      }
+      assert(e.getMessage.contains("INVALID_CURSOR.POSITION_NOT_AVAILABLE"))
+
+      // Original iterator got disconnected by the reattach and gets INVALID_CURSOR.DISCONNECTED
+      val e2 = intercept[StatusRuntimeException] {
+        while (iter.hasNext) iter.next()
+      }
+      assert(e2.getMessage.contains("INVALID_CURSOR.DISCONNECTED"))
+
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        // Even though we didn't consume more from the iterator, the server thinks that
+        // it sent more, because GRPC stream onNext() can push into internal GRPC buffer without
+        // client picking it up.
+        assert(execution.responseObserver.highestConsumedIndex > lastSeenIndex)
+      }
+      // but CONNECT_EXECUTE_REATTACHABLE_OBSERVER_RETRY_BUFFER_SIZE is big enough that the last
+      // response we've seen is still in range
+      assert(execution.responseObserver.releasedUntilIndex < lastSeenIndex)
+
+      // and a new reattach can continue after what there.
+      val reattach2 =
+        stub.reattachExecute(buildReattachExecuteRequest(operationId, Some(lastSeenResponse)))
+      assert(reattach2.hasNext)
+      while (reattach2.hasNext) reattach2.next()
+    }
+  }
+
+  // A few integration tests with large results.
+  // They should run significantly faster than the LARGE_QUERY_TIMEOUT
+  // - big query (4 seconds, 871 milliseconds)
+  // - big query and slow client (7 seconds, 288 milliseconds)
+  // - big query with frequent reattach (1 second, 527 milliseconds)
+  // - big query with frequent reattach and slow client (7 seconds, 365 milliseconds)
+  // - long sleeping query (10 seconds, 805 milliseconds)
+
+  // intentionally smaller than CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION,
+  // so that reattach deadline doesn't "unstuck" if something got stuck.
+  val LARGE_QUERY_TIMEOUT = 100.seconds
+
+  val LARGE_RESULTS_QUERY = s"select id, " +
+    (1 to 20).map(i => s"cast(id as string) c$i").mkString(", ") +
+    s" from range(1000000)"
+
+  test("big query") {
+    // regular query with large results
+    runQuery(LARGE_RESULTS_QUERY, LARGE_QUERY_TIMEOUT)
+    // Check that execution is released on the server.
+    assertEventuallyNoActiveExecutions()
+  }
+
+  test("big query and slow client") {
+    // regular query with large results, but client is slow so sender will need to control flow
+    runQuery(LARGE_RESULTS_QUERY, LARGE_QUERY_TIMEOUT, iterSleep = 50)
+    // Check that execution is released on the server.
+    assertEventuallyNoActiveExecutions()
+  }
+
+  test("big query with frequent reattach") {
+    // will reattach every 100kB
+    withSparkEnvConfs((Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE.key, "100k")) {
+      runQuery(LARGE_RESULTS_QUERY, LARGE_QUERY_TIMEOUT)
+      // Check that execution is released on the server.
+      assertEventuallyNoActiveExecutions()
+    }
+  }
+
+  test("big query with frequent reattach and slow client") {
+    // will reattach every 100kB, and in addition the client is slow,
+    // so sender will need to control flow
+    withSparkEnvConfs((Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_SIZE.key, "100k")) {
+      runQuery(LARGE_RESULTS_QUERY, LARGE_QUERY_TIMEOUT, iterSleep = 50)
+      // Check that execution is released on the server.
+      assertEventuallyNoActiveExecutions()
+    }
+  }
+
+  test("long sleeping query") {
+    // query will be sleeping and not returning results, while having multiple reattach
+    withSparkEnvConfs(
+      (Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION.key, "1s")) {
+      runQuery("select sleep(10000) as s", 30.seconds)
+      // Check that execution is released on the server.
+      assertEventuallyNoActiveExecutions()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index f5819b9508777..1163088c82aa8 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -302,6 +302,30 @@ abstract class SparkFunSuite
     }
   }
 
+  /**
+   * Sets all configurations specified in `pairs` in SparkEnv SparkConf, calls `f`, and then
+   * restores all configurations.
+   */
+  protected def withSparkEnvConfs(pairs: (String, String)*)(f: => Unit): Unit = {
+    val conf = SparkEnv.get.conf
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map { key =>
+      if (conf.getOption(key).isDefined) {
+        Some(conf.get(key))
+      } else {
+        None
+      }
+    }
+    pairs.foreach { kv => conf.set(kv._1, kv._2) }
+    try f
+    finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => conf.set(key, value)
+        case (key, None) => conf.remove(key)
+      }
+    }
+  }
+
   /**
    * Checks an exception with an error class against expected results.
    * @param exception     The exception to check

From 151f88b53e67944d6ca5c635466f50958019c8b4 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 13 Sep 2023 21:20:19 +0900
Subject: [PATCH 010/521] [SPARK-45142][INFRA] Specify the range for Spark
 Connect dependencies in pyspark base image

This PR proposes to pin the dependencies related to Spark Connect in its base image according to the range we support.
See also https://github.com/apache/spark/blob/master/python/docs/source/getting_started/install.rst#dependencies

To properly test the dependency versions we support.

No, dev-only.

In this PR, it will be tested.

No.

Closes #42898 from HyukjinKwon/SPARK-45142.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 61435b42fdc4071f35aba6af9248ff9ad8fc8514)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/infra/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index af8e1a980f93c..d3bae836cc631 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -68,7 +68,7 @@ RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
 RUN python3.9 -m pip install numpy pyarrow 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
 
 # Add Python deps for Spark Connect.
-RUN python3.9 -m pip install grpcio protobuf googleapis-common-protos grpcio-status
+RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
 
 # Add torch as a testing dependency for TorchDistributor
 RUN python3.9 -m pip install torch torchvision torcheval

From e72ae794e69d8182291655d023aee903a913571b Mon Sep 17 00:00:00 2001
From: chenyu-opensource <119398199+chenyu-opensource@users.noreply.github.com>
Date: Wed, 13 Sep 2023 08:48:14 -0500
Subject: [PATCH 011/521] [SPARK-45146][DOCS] Update the default value of
 'spark.submit.deployMode'

**What changes were proposed in this pull request?**
The PR updates the default value of 'spark.submit.deployMode' in configuration.html on the website

**Why are the changes needed?**
The default value of 'spark.submit.deployMode' is 'client', but the website is wrong.

**Does this PR introduce any user-facing change?**
No

**How was this patch tested?**
It doesn't need to.

**Was this patch authored or co-authored using generative AI tooling?**
No

Closes #42902 from chenyu-opensource/branch-SPARK-45146.

Authored-by: chenyu-opensource <119398199+chenyu-opensource@users.noreply.github.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
(cherry picked from commit 076cb7aabac2f0ff11ca77ca530b7b8db5310a5e)
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index dfded480c99d3..1139beb66462f 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -394,7 +394,7 @@ of the most common options to set are:
 </tr>
 <tr>
   <td><code>spark.submit.deployMode</code></td>
-  <td>(none)</td>
+  <td>client</td>
   <td>
     The deploy mode of Spark driver program, either "client" or "cluster",
     Which means to launch driver program locally ("client")

From 0e1a9b65d48389e2bbed11dabfa6c61cca5f41f0 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Thu, 14 Sep 2023 18:23:38 +0800
Subject: [PATCH 012/521] [MINOR][PYTHON][DOCS] Fix default value of parameter
 `barrier` in MapInXXX

### What changes were proposed in this pull request?
Fix default value of parameter `barrier`

### Why are the changes needed?
they default to `False`

### Does this PR introduce _any_ user-facing change?
yes

### How was this patch tested?
CI

### Was this patch authored or co-authored using generative AI tooling?
NO

Closes #42923 from zhengruifeng/45114_followup.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
(cherry picked from commit e1d2372b8916741fe199ee7b154e53af1eb1ba5a)
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 python/pyspark/sql/pandas/map_ops.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index bc26fdede2888..710fc8a9a370a 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -60,11 +60,10 @@ def mapInPandas(
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        barrier : bool, optional, default True
+        barrier : bool, optional, default False
             Use barrier mode execution.
 
-            .. versionchanged: 3.5.0
-                Added ``barrier`` argument.
+            .. versionadded: 3.5.0
 
         Examples
         --------
@@ -139,11 +138,10 @@ def mapInArrow(
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
-        barrier : bool, optional, default True
+        barrier : bool, optional, default False
             Use barrier mode execution.
 
-            .. versionchanged: 3.5.0
-                Added ``barrier`` argument.
+            .. versionadded: 3.5.0
 
         Examples
         --------

From 9c0b803ba124a6e70762aec1e5559b0d66529f4d Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Fri, 15 Sep 2023 13:22:40 +0900
Subject: [PATCH 013/521] [SPARK-45171][SQL] Initialize non-deterministic
 expressions in `GenerateExec`

### What changes were proposed in this pull request?

Before evaluating the generator function in `GenerateExec`, initialize non-deterministic expressions.

### Why are the changes needed?

The following query fails:
```
select *
from explode(
  transform(sequence(0, cast(rand()*1000 as int) + 1), x -> x * 22)
);

23/09/14 09:27:25 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.lang.IllegalArgumentException: requirement failed: Nondeterministic expression org.apache.spark.sql.catalyst.expressions.Rand should be initialized before eval.
	at scala.Predef$.require(Predef.scala:281)
	at org.apache.spark.sql.catalyst.expressions.Nondeterministic.eval(Expression.scala:497)
	at org.apache.spark.sql.catalyst.expressions.Nondeterministic.eval$(Expression.scala:495)
	at org.apache.spark.sql.catalyst.expressions.RDG.eval(randomExpressions.scala:35)
	at org.apache.spark.sql.catalyst.expressions.BinaryArithmetic.eval(arithmetic.scala:384)
	at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:543)
	at org.apache.spark.sql.catalyst.expressions.BinaryArithmetic.eval(arithmetic.scala:384)
	at org.apache.spark.sql.catalyst.expressions.Sequence.eval(collectionOperations.scala:3062)
	at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.eval(higherOrderFunctions.scala:275)
	at org.apache.spark.sql.catalyst.expressions.SimpleHigherOrderFunction.eval$(higherOrderFunctions.scala:274)
	at org.apache.spark.sql.catalyst.expressions.ArrayTransform.eval(higherOrderFunctions.scala:308)
	at org.apache.spark.sql.catalyst.expressions.ExplodeBase.eval(generators.scala:375)
	at org.apache.spark.sql.execution.GenerateExec.$anonfun$doExecute$8(GenerateExec.scala:108)
...
```
However, this query succeeds:
```
select *
from explode(
  sequence(0, cast(rand()*1000 as int) + 1)
);

0
1
2
3
...
801
802
803
```
The difference is that `transform` turns off whole-stage codegen, which exposes a bug in `GenerateExec` in which the non-deterministic expression passed to the generator function is not initialized before being used.

This PR fixes the bug in `GenerateExec`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New unit test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #42933 from bersprockets/nondeterm_issue.

Lead-authored-by: Bruce Robbins <bersprockets@gmail.com>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit e097f916a2769dfe82bfd216fedcd6962e8280c8)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/execution/GenerateExec.scala      | 4 ++++
 .../org/apache/spark/sql/GeneratorFunctionSuite.scala      | 7 +++++++
 2 files changed, 11 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
index f6dbf5fda1816..b99361437e0d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/GenerateExec.scala
@@ -78,6 +78,10 @@ case class GenerateExec(
     // boundGenerator.terminate() should be triggered after all of the rows in the partition
     val numOutputRows = longMetric("numOutputRows")
     child.execute().mapPartitionsWithIndexInternal { (index, iter) =>
+      boundGenerator.foreach {
+        case n: Nondeterministic => n.initialize(index)
+        case _ =>
+      }
       val generatorNullRow = new GenericInternalRow(generator.elementSchema.length)
       val rows = if (requiredChildOutput.nonEmpty) {
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index abec582d43a30..0746a4b92af29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -536,6 +536,13 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df,
       Row(1, 1) :: Row(1, 2) :: Row(2, 2) :: Row(2, 3) :: Row(3, null) :: Nil)
   }
+
+  test("SPARK-45171: Handle evaluated nondeterministic expression") {
+    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+      val df = sql("select explode(array(rand(0)))")
+      checkAnswer(df, Row(0.7604953758285915d))
+    }
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {

From a3f50e742506e07473c281255d1b13ab8ae78cd6 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Sat, 16 Sep 2023 09:04:38 -0500
Subject: [PATCH 014/521] [SPARK-45127][DOCS] Exclude README.md from document
 build

### What changes were proposed in this pull request?
The pr aims to exclude `README.md` from document build.

### Why are the changes needed?
- Currently, our document `README.html` does not have any CSS style applied to it, as shown below:
   https://spark.apache.org/docs/latest/README.html
   <img width="1432" alt="image" src="https://github.com/apache/spark/assets/15246973/1dfe5f69-30d9-4ce4-8d82-1bba5e721ccd">

   **If we do not intend to display the above page to users, we should remove it during the document build process.**

- As we saw in the project `spark-website`, it has already set the following configuration:
   https://github.com/apache/spark-website/blob/642d1fb834817014e1799e73882d53650c1c1662/_config.yml#L7
    <img width="720" alt="image" src="https://github.com/apache/spark/assets/15246973/421b7be5-4ece-407e-9d49-8e7487b74a47">
   Let's stay consistent.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Manually test.
   After this pr, the README.html file will no longer be generated
   ```
    (base) panbingkun:~/Developer/spark/spark-community/docs/_site$ls -al README.html
    ls: README.html: No such file or directory
    ```
- Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #42883 from panbingkun/SPARK-45127.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
(cherry picked from commit 804f741453fb146b5261084fa3baf26631badb79)
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/_config.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/_config.yml b/docs/_config.yml
index afe015b2972da..e346833722b93 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -46,3 +46,5 @@ DOCSEARCH_SCRIPT: |
   });
 
 permalink: 404.html
+
+exclude: ['README.md']

From 723a85eb2dffa69571cba841380eb759a9b89321 Mon Sep 17 00:00:00 2001
From: Jia Fan <fanjiaeminem@qq.com>
Date: Sun, 17 Sep 2023 11:16:24 +0300
Subject: [PATCH 015/521] [SPARK-45078][SQL] Fix `array_insert`
 ImplicitCastInputTypes not work

### What changes were proposed in this pull request?
This PR fix call `array_insert` with different type between array and insert column, will throw exception. Sometimes it should be execute successed.
eg:
```sql
select array_insert(array(1), 2, cast(2 as tinyint))
```
The `ImplicitCastInputTypes` in `ArrayInsert` always return empty array at now. So that Spark can not convert `tinyint` to `int`.

### Why are the changes needed?
Fix error behavior in `array_insert`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new test.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42951 from Hisoka-X/SPARK-45078_arrayinsert_type_mismatch.

Authored-by: Jia Fan <fanjiaeminem@qq.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit e84c66db60c78476806161479344cd32a7606ab1)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/catalyst/expressions/collectionOperations.scala   | 1 -
 .../sql-tests/analyzer-results/ansi/array.sql.out         | 7 +++++++
 .../resources/sql-tests/analyzer-results/array.sql.out    | 7 +++++++
 sql/core/src/test/resources/sql-tests/inputs/array.sql    | 1 +
 .../test/resources/sql-tests/results/ansi/array.sql.out   | 8 ++++++++
 .../src/test/resources/sql-tests/results/array.sql.out    | 8 ++++++++
 6 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index fe9c4015c15ec..ade4a6c5be722 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -4711,7 +4711,6 @@ case class ArrayInsert(
         }
       case (e1, e2, e3) => Seq.empty
     }
-    Seq.empty
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
index cd101c7a524a1..6fc308157933f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
@@ -531,6 +531,13 @@ Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, false) AS array_
 +- OneRowRelation
 
 
+-- !query
+select array_insert(array(1), 2, cast(2 as tinyint))
+-- !query analysis
+Project [array_insert(array(1), 2, cast(cast(2 as tinyint) as int), false) AS array_insert(array(1), 2, CAST(2 AS TINYINT))#x]
++- OneRowRelation
+
+
 -- !query
 set spark.sql.legacy.negativeIndexInArrayInsert=true
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index 8279fb3362e54..e0585b77cb6bd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -531,6 +531,13 @@ Project [array_insert(array(2, 3, cast(null as int), 4), -5, 1, false) AS array_
 +- OneRowRelation
 
 
+-- !query
+select array_insert(array(1), 2, cast(2 as tinyint))
+-- !query analysis
+Project [array_insert(array(1), 2, cast(cast(2 as tinyint) as int), false) AS array_insert(array(1), 2, CAST(2 AS TINYINT))#x]
++- OneRowRelation
+
+
 -- !query
 set spark.sql.legacy.negativeIndexInArrayInsert=true
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 48edc6b474254..52a0906ea7392 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -141,6 +141,7 @@ select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4);
 select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT));
 select array_insert(array(2, 3, NULL, 4), 5, 5);
 select array_insert(array(2, 3, NULL, 4), -5, 1);
+select array_insert(array(1), 2, cast(2 as tinyint));
 
 set spark.sql.legacy.negativeIndexInArrayInsert=true;
 select array_insert(array(1, 3, 4), -2, 2);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 03be0f9d84b1b..49e18411ffa37 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -659,6 +659,14 @@ struct<array_insert(array(2, 3, NULL, 4), -5, 1):array<int>>
 [1,2,3,null,4]
 
 
+-- !query
+select array_insert(array(1), 2, cast(2 as tinyint))
+-- !query schema
+struct<array_insert(array(1), 2, CAST(2 AS TINYINT)):array<int>>
+-- !query output
+[1,2]
+
+
 -- !query
 set spark.sql.legacy.negativeIndexInArrayInsert=true
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 9dbf4fbebc20b..e568f5fa7796d 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -540,6 +540,14 @@ struct<array_insert(array(2, 3, NULL, 4), -5, 1):array<int>>
 [1,2,3,null,4]
 
 
+-- !query
+select array_insert(array(1), 2, cast(2 as tinyint))
+-- !query schema
+struct<array_insert(array(1), 2, CAST(2 AS TINYINT)):array<int>>
+-- !query output
+[1,2]
+
+
 -- !query
 set spark.sql.legacy.negativeIndexInArrayInsert=true
 -- !query schema

From 84a053e72ac9d9cfc91bab777cea94958d3a91da Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 17 Sep 2023 10:34:23 -0700
Subject: [PATCH 016/521] [SPARK-45187][CORE] Fix `WorkerPage` to use the same
 pattern for `logPage` urls

### What changes were proposed in this pull request?

This PR aims to use the same pattern for `logPage` urls of `WorkerPage` to make it work consistently when `spark.ui.reverseProxy=true`.

### Why are the changes needed?

Since Apache Spark 3.2.0 (SPARK-34635, #31753), Apache Spark adds trailing slashes to reduce redirections for `logPage`.

```scala
      s"$workerUrlRef/logPage?driverId=$driverId&logType=stdout")
      s"$workerUrlRef/logPage/?driverId=$driverId&logType=stdout")
...
<a href={s"$workerUrlRef/logPage?appId=${executor
<a href={s"$workerUrlRef/logPage/?appId=${executor
```

This PR aims to fix a leftover in `WorkerPage` to make it work consistently in case of the reverse proxy situation via `spark.ui.reverseProxy`. Currently,  in some proxy environments, `appId` link is working but `driverId` link is broken due to the redirections. This inconsistent behavior makes the users confused.

```
-        <a href={s"$workerUrlRef/logPage?driverId=${driver.driverId}&logType=stdout"}>stdout</a>
-        <a href={s"$workerUrlRef/logPage?driverId=${driver.driverId}&logType=stderr"}>stderr</a>
+        <a href={s"$workerUrlRef/logPage/?driverId=${driver.driverId}&logType=stdout"}>stdout</a>
+        <a href={s"$workerUrlRef/logPage/?driverId=${driver.driverId}&logType=stderr"}>stderr</a>
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual tests because it requires a reverse proxy.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #42959 from dongjoon-hyun/SPARK-45187.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit f8f2735426ee7ad3d7a1f5bd07e72643516f4a35)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
index 3171d3f16e8a0..e740b328dd7b9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerPage.scala
@@ -212,8 +212,8 @@ private[ui] class WorkerPage(parent: WorkerWebUI) extends WebUIPage("") {
       </td>
       <td>{formatResourcesAddresses(driver.resources)}</td>
       <td>
-        <a href={s"$workerUrlRef/logPage?driverId=${driver.driverId}&logType=stdout"}>stdout</a>
-        <a href={s"$workerUrlRef/logPage?driverId=${driver.driverId}&logType=stderr"}>stderr</a>
+        <a href={s"$workerUrlRef/logPage/?driverId=${driver.driverId}&logType=stdout"}>stdout</a>
+        <a href={s"$workerUrlRef/logPage/?driverId=${driver.driverId}&logType=stderr"}>stderr</a>
       </td>
       <td>
         {driver.finalException.getOrElse("")}

From 60073f318313ab2329ea1504ef7538641433852e Mon Sep 17 00:00:00 2001
From: Martin Grund <martin.grund@databricks.com>
Date: Tue, 19 Sep 2023 08:32:21 +0900
Subject: [PATCH 017/521] [SPARK-45167][CONNECT][PYTHON][3.5] Python client
 must call `release_all`

### What changes were proposed in this pull request?

Cherry-pick of https://github.com/apache/spark/pull/42929

Previously the Python client would not call `release_all` after fetching all results and leaving the query dangling. The query would then be removed after the five minute timeout.

This patch adds proper testing for calling release all and release until.

In addition it fixes a test race condition where we would close the SparkSession which would in turn close the GRPC channel which might have dangling async release calls hanging.

### Why are the changes needed?
Stability

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
new UT

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42973 from grundprinzip/SPARK-45167-3.5.

Authored-by: Martin Grund <martin.grund@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/connect/client/core.py     |   1 +
 python/pyspark/sql/connect/client/reattach.py |  37 +++-
 .../sql/tests/connect/client/test_client.py   | 195 +++++++++++++++++-
 3 files changed, 226 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 7b3299d123b97..7b1aafbefebbe 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -1005,6 +1005,7 @@ def close(self) -> None:
         """
         Close the channel.
         """
+        ExecutePlanResponseReattachableIterator.shutdown()
         self._channel.close()
         self._closed = True
 
diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index 7e1e722d5fd8a..e58864b965bd9 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -21,7 +21,9 @@
 import warnings
 import uuid
 from collections.abc import Generator
-from typing import Optional, Dict, Any, Iterator, Iterable, Tuple, Callable, cast
+from typing import Optional, Dict, Any, Iterator, Iterable, Tuple, Callable, cast, Type, ClassVar
+from multiprocessing import RLock
+from multiprocessing.synchronize import RLock as RLockBase
 from multiprocessing.pool import ThreadPool
 import os
 
@@ -53,7 +55,30 @@ class ExecutePlanResponseReattachableIterator(Generator):
     ReleaseExecute RPCs that instruct the server to release responses that it already processed.
     """
 
-    _release_thread_pool = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
+    # Lock to manage the pool
+    _lock: ClassVar[RLockBase] = RLock()
+    _release_thread_pool: Optional[ThreadPool] = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
+
+    @classmethod
+    def shutdown(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
+        """
+        When the channel is closed, this method will be called before, to make sure all
+        outstanding calls are closed.
+        """
+        with cls._lock:
+            if cls._release_thread_pool is not None:
+                cls._release_thread_pool.close()
+                cls._release_thread_pool.join()
+                cls._release_thread_pool = None
+
+    @classmethod
+    def _initialize_pool_if_necessary(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
+        """
+        If the processing pool for the release calls is None, initialize the pool exactly once.
+        """
+        with cls._lock:
+            if cls._release_thread_pool is None:
+                cls._release_thread_pool = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
 
     def __init__(
         self,
@@ -62,6 +87,7 @@ def __init__(
         retry_policy: Dict[str, Any],
         metadata: Iterable[Tuple[str, str]],
     ):
+        ExecutePlanResponseReattachableIterator._initialize_pool_if_necessary()
         self._request = request
         self._retry_policy = retry_policy
         if request.operation_id:
@@ -111,7 +137,6 @@ def send(self, value: Any) -> pb2.ExecutePlanResponse:
 
         self._last_returned_response_id = ret.response_id
         if ret.HasField("result_complete"):
-            self._result_complete = True
             self._release_all()
         else:
             self._release_until(self._last_returned_response_id)
@@ -190,7 +215,8 @@ def target() -> None:
             except Exception as e:
                 warnings.warn(f"ReleaseExecute failed with exception: {e}.")
 
-        ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
+        if ExecutePlanResponseReattachableIterator._release_thread_pool is not None:
+            ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
 
     def _release_all(self) -> None:
         """
@@ -218,7 +244,8 @@ def target() -> None:
             except Exception as e:
                 warnings.warn(f"ReleaseExecute failed with exception: {e}.")
 
-        ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
+        if ExecutePlanResponseReattachableIterator._release_thread_pool is not None:
+            ExecutePlanResponseReattachableIterator._release_thread_pool.apply_async(target)
         self._result_complete = True
 
     def _call_iter(self, iter_fun: Callable) -> Any:
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index 98f68767b8bca..cf43fb16df7a7 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -17,14 +17,20 @@
 
 import unittest
 import uuid
-from typing import Optional
+from collections.abc import Generator
+from typing import Optional, Any
+
+import grpc
 
 from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
 import pyspark.sql.connect.proto as proto
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
 
 from pyspark.sql.connect.client.core import Retrying
-from pyspark.sql.connect.client.reattach import RetryException
+from pyspark.sql.connect.client.reattach import (
+    RetryException,
+    ExecutePlanResponseReattachableIterator,
+)
 
 if should_test_connect:
     import pandas as pd
@@ -120,6 +126,191 @@ def test_channel_builder_with_session(self):
         self.assertEqual(client._session_id, chan.session_id)
 
 
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class SparkConnectClientReattachTestCase(unittest.TestCase):
+    def setUp(self) -> None:
+        self.request = proto.ExecutePlanRequest()
+        self.policy = {
+            "max_retries": 3,
+            "backoff_multiplier": 4.0,
+            "initial_backoff": 10,
+            "max_backoff": 10,
+            "jitter": 10,
+            "min_jitter_threshold": 10,
+        }
+        self.response = proto.ExecutePlanResponse(
+            response_id="1",
+        )
+        self.finished = proto.ExecutePlanResponse(
+            result_complete=proto.ExecutePlanResponse.ResultComplete(),
+            response_id="2",
+        )
+
+    def _stub_with(self, execute=None, attach=None):
+        return MockSparkConnectStub(
+            execute_ops=ResponseGenerator(execute) if execute is not None else None,
+            attach_ops=ResponseGenerator(attach) if attach is not None else None,
+        )
+
+    def assertEventually(self, callable, timeout_ms=1000):
+        """Helper method that will continuously evaluate the callable to not raise an
+        exception."""
+        import time
+
+        limit = time.monotonic_ns() + timeout_ms * 1000 * 1000
+        while time.monotonic_ns() < limit:
+            try:
+                callable()
+                break
+            except Exception:
+                time.sleep(0.1)
+        callable()
+
+    def test_basic_flow(self):
+        stub = self._stub_with([self.response, self.finished])
+        ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
+        for b in ite:
+            pass
+
+        def check_all():
+            self.assertEqual(0, stub.attach_calls)
+            self.assertEqual(1, stub.release_until_calls)
+            self.assertEqual(1, stub.release_calls)
+            self.assertEqual(1, stub.execute_calls)
+
+        self.assertEventually(check_all, timeout_ms=1000)
+
+    def test_fail_during_execute(self):
+        def fatal():
+            raise TestException("Fatal")
+
+        stub = self._stub_with([self.response, fatal])
+        with self.assertRaises(TestException):
+            ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
+            for b in ite:
+                pass
+
+        def check():
+            self.assertEqual(0, stub.attach_calls)
+            self.assertEqual(1, stub.release_calls)
+            self.assertEqual(1, stub.release_until_calls)
+            self.assertEqual(1, stub.execute_calls)
+
+        self.assertEventually(check, timeout_ms=1000)
+
+    def test_fail_and_retry_during_execute(self):
+        def non_fatal():
+            raise TestException("Non Fatal", grpc.StatusCode.UNAVAILABLE)
+
+        stub = self._stub_with(
+            [self.response, non_fatal], [self.response, self.response, self.finished]
+        )
+        ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
+        for b in ite:
+            pass
+
+        def check():
+            self.assertEqual(1, stub.attach_calls)
+            self.assertEqual(1, stub.release_calls)
+            self.assertEqual(3, stub.release_until_calls)
+            self.assertEqual(1, stub.execute_calls)
+
+        self.assertEventually(check, timeout_ms=1000)
+
+    def test_fail_and_retry_during_reattach(self):
+        count = 0
+
+        def non_fatal():
+            nonlocal count
+            if count < 2:
+                count += 1
+                raise TestException("Non Fatal", grpc.StatusCode.UNAVAILABLE)
+            else:
+                return proto.ExecutePlanResponse()
+
+        stub = self._stub_with(
+            [self.response, non_fatal], [self.response, non_fatal, self.response, self.finished]
+        )
+        ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
+        for b in ite:
+            pass
+
+        def check():
+            self.assertEqual(2, stub.attach_calls)
+            self.assertEqual(3, stub.release_until_calls)
+            self.assertEqual(1, stub.release_calls)
+            self.assertEqual(1, stub.execute_calls)
+
+        self.assertEventually(check, timeout_ms=1000)
+
+
+class TestException(grpc.RpcError, grpc.Call):
+    """Exception mock to test retryable exceptions."""
+
+    def __init__(self, msg, code=grpc.StatusCode.INTERNAL):
+        self.msg = msg
+        self._code = code
+
+    def code(self):
+        return self._code
+
+    def __str__(self):
+        return self.msg
+
+    def trailing_metadata(self):
+        return ()
+
+
+class ResponseGenerator(Generator):
+    """This class is used to generate values that are returned by the streaming
+    iterator of the GRPC stub."""
+
+    def __init__(self, funs):
+        self._funs = funs
+        self._iterator = iter(self._funs)
+
+    def send(self, value: Any) -> proto.ExecutePlanResponse:
+        val = next(self._iterator)
+        if callable(val):
+            return val()
+        else:
+            return val
+
+    def throw(self, type: Any = None, value: Any = None, traceback: Any = None) -> Any:
+        super().throw(type, value, traceback)
+
+    def close(self) -> None:
+        return super().close()
+
+
+class MockSparkConnectStub:
+    """Simple mock class for the GRPC stub used by the re-attachable execution."""
+
+    def __init__(self, execute_ops=None, attach_ops=None):
+        self._execute_ops = execute_ops
+        self._attach_ops = attach_ops
+        # Call counters
+        self.execute_calls = 0
+        self.release_calls = 0
+        self.release_until_calls = 0
+        self.attach_calls = 0
+
+    def ExecutePlan(self, *args, **kwargs):
+        self.execute_calls += 1
+        return self._execute_ops
+
+    def ReattachExecute(self, *args, **kwargs):
+        self.attach_calls += 1
+        return self._attach_ops
+
+    def ReleaseExecute(self, req: proto.ReleaseExecuteRequest, *args, **kwargs):
+        if req.HasField("release_all"):
+            self.release_calls += 1
+        elif req.HasField("release_until"):
+            print("increment")
+            self.release_until_calls += 1
+
+
 class MockService:
     # Simplest mock of the SparkConnectService.
     # If this needs more complex logic, it needs to be replaced with Python mocking.

From 2a9dd2b3968da7c2e96c502aaf4c158ee782e5f4 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 18 Sep 2023 13:46:34 +0900
Subject: [PATCH 018/521] [SPARK-45167][CONNECT][PYTHON][FOLLOW-UP] Use lighter
 threading Rlock, and use the existing eventually util function

This PR is a followup of https://github.com/apache/spark/pull/42929 that:
- Use lighter threading `Rlock` instead of multithreading `Rlock`. Multiprocessing does not work with PySpark due to the ser/de problem for socket connections, and many others.
- Use the existing eventually util function `pyspark.testing.eventually` instead of `assertEventually` to deduplicate code.

Mainly for code clean-up.

No.

Existing tests should pass them.

No.

Closes #42965 from HyukjinKwon/SPARK-45167-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit d5ff04da217df483d27011f6e38417df2eaa42bd)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/connect/client/reattach.py |  5 ++--
 .../sql/tests/connect/client/test_client.py   | 23 ++++---------------
 2 files changed, 7 insertions(+), 21 deletions(-)

diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index e58864b965bd9..6addb5bd2c652 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -18,12 +18,11 @@
 
 check_dependencies(__name__)
 
+from threading import RLock
 import warnings
 import uuid
 from collections.abc import Generator
 from typing import Optional, Dict, Any, Iterator, Iterable, Tuple, Callable, cast, Type, ClassVar
-from multiprocessing import RLock
-from multiprocessing.synchronize import RLock as RLockBase
 from multiprocessing.pool import ThreadPool
 import os
 
@@ -56,7 +55,7 @@ class ExecutePlanResponseReattachableIterator(Generator):
     """
 
     # Lock to manage the pool
-    _lock: ClassVar[RLockBase] = RLock()
+    _lock: ClassVar[RLock] = RLock()
     _release_thread_pool: Optional[ThreadPool] = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
 
     @classmethod
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index cf43fb16df7a7..93b7006799b30 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -25,6 +25,7 @@
 from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
 import pyspark.sql.connect.proto as proto
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+from pyspark.testing.utils import eventually
 
 from pyspark.sql.connect.client.core import Retrying
 from pyspark.sql.connect.client.reattach import (
@@ -152,20 +153,6 @@ def _stub_with(self, execute=None, attach=None):
             attach_ops=ResponseGenerator(attach) if attach is not None else None,
         )
 
-    def assertEventually(self, callable, timeout_ms=1000):
-        """Helper method that will continuously evaluate the callable to not raise an
-        exception."""
-        import time
-
-        limit = time.monotonic_ns() + timeout_ms * 1000 * 1000
-        while time.monotonic_ns() < limit:
-            try:
-                callable()
-                break
-            except Exception:
-                time.sleep(0.1)
-        callable()
-
     def test_basic_flow(self):
         stub = self._stub_with([self.response, self.finished])
         ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
@@ -178,7 +165,7 @@ def check_all():
             self.assertEqual(1, stub.release_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        self.assertEventually(check_all, timeout_ms=1000)
+        eventually(timeout=1, catch_assertions=True)(check_all)()
 
     def test_fail_during_execute(self):
         def fatal():
@@ -196,7 +183,7 @@ def check():
             self.assertEqual(1, stub.release_until_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        self.assertEventually(check, timeout_ms=1000)
+        eventually(timeout=1, catch_assertions=True)(check)()
 
     def test_fail_and_retry_during_execute(self):
         def non_fatal():
@@ -215,7 +202,7 @@ def check():
             self.assertEqual(3, stub.release_until_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        self.assertEventually(check, timeout_ms=1000)
+        eventually(timeout=1, catch_assertions=True)(check)()
 
     def test_fail_and_retry_during_reattach(self):
         count = 0
@@ -241,7 +228,7 @@ def check():
             self.assertEqual(1, stub.release_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        self.assertEventually(check, timeout_ms=1000)
+        eventually(timeout=1, catch_assertions=True)(check)()
 
 
 class TestException(grpc.RpcError, grpc.Call):

From 555c8def51e5951c7bf5165a332795e9e330ec9d Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 19 Sep 2023 10:18:18 +0900
Subject: [PATCH 019/521] Revert "Revert "[SPARK-44742][PYTHON][DOCS] Add Spark
 version drop down to the PySpark doc site""

This reverts commit bbe12e148eb1f289cfb1f4412525f4c4381c10a9.
---
 python/docs/source/_static/css/pyspark.css    | 13 ++++
 python/docs/source/_static/versions.json      | 22 ++++++
 .../source/_templates/version-switcher.html   | 77 +++++++++++++++++++
 python/docs/source/conf.py                    |  9 ++-
 4 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 python/docs/source/_static/versions.json
 create mode 100644 python/docs/source/_templates/version-switcher.html

diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
index 89b7c65f27a51..ccfe60f2bca64 100644
--- a/python/docs/source/_static/css/pyspark.css
+++ b/python/docs/source/_static/css/pyspark.css
@@ -95,3 +95,16 @@ u.bd-sidebar .nav>li>ul>.active:hover>a,.bd-sidebar .nav>li>ul>.active>a {
 .spec_table tr, td, th {
     border-top: none!important;
 }
+
+/* Styling to the version dropdown */
+#version-button {
+  padding-left: 0.2rem;
+  padding-right: 3.2rem;
+}
+
+#version_switcher {
+  height: auto;
+  max-height: 300px;
+  width: 165px;
+  overflow-y: auto;
+}
diff --git a/python/docs/source/_static/versions.json b/python/docs/source/_static/versions.json
new file mode 100644
index 0000000000000..3d0bd14818064
--- /dev/null
+++ b/python/docs/source/_static/versions.json
@@ -0,0 +1,22 @@
+[
+    {
+        "name": "3.4.1",
+        "version": "3.4.1"
+    },
+    {
+        "name": "3.4.0",
+        "version": "3.4.0"
+    },
+    {
+        "name": "3.3.2",
+        "version": "3.3.2"
+    },
+    {
+        "name": "3.3.1",
+        "version": "3.3.1"
+    },
+    {
+        "name": "3.3.0",
+        "version": "3.3.0"
+    }
+]
diff --git a/python/docs/source/_templates/version-switcher.html b/python/docs/source/_templates/version-switcher.html
new file mode 100644
index 0000000000000..16c443229f4be
--- /dev/null
+++ b/python/docs/source/_templates/version-switcher.html
@@ -0,0 +1,77 @@
+<!--
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+-->
+
+<div id="version-button" class="dropdown">
+    <button type="button" class="btn btn-secondary btn-sm navbar-btn dropdown-toggle" id="version_switcher_button" data-toggle="dropdown">
+        {{ release }}
+        <span class="caret"></span>
+    </button>
+    <div id="version_switcher" class="dropdown-menu list-group-flush py-0" aria-labelledby="version_switcher_button">
+    <!-- dropdown will be populated by javascript on page load -->
+    </div>
+</div>
+
+<script type="text/javascript">
+// Function to construct the target URL from the JSON components
+function buildURL(entry) {
+    var template = "{{ switcher_template_url }}";  // supplied by jinja
+    template = template.replace("{version}", entry.version);
+    return template;
+}
+
+// Function to check if corresponding page path exists in other version of docs
+// and, if so, go there instead of the homepage of the other docs version
+function checkPageExistsAndRedirect(event) {
+    const currentFilePath = "{{ pagename }}.html",
+          otherDocsHomepage = event.target.getAttribute("href");
+    let tryUrl = `${otherDocsHomepage}${currentFilePath}`;
+    $.ajax({
+        type: 'HEAD',
+        url: tryUrl,
+        // if the page exists, go there
+        success: function() {
+            location.href = tryUrl;
+        }
+    }).fail(function() {
+        location.href = otherDocsHomepage;
+    });
+    return false;
+}
+
+// Function to populate the version switcher
+(function () {
+    // get JSON config
+    $.getJSON("{{ switcher_json_url }}", function(data, textStatus, jqXHR) {
+        // create the nodes first (before AJAX calls) to ensure the order is
+        // correct (for now, links will go to doc version homepage)
+        $.each(data, function(index, entry) {
+            // if no custom name specified (e.g., "latest"), use version string
+            if (!("name" in entry)) {
+                entry.name = entry.version;
+            }
+            // construct the appropriate URL, and add it to the dropdown
+            entry.url = buildURL(entry);
+            const node = document.createElement("a");
+            node.setAttribute("class", "list-group-item list-group-item-action py-1");
+            node.setAttribute("href", `${entry.url}`);
+            node.textContent = `${entry.name}`;
+            node.onclick = checkPageExistsAndRedirect;
+            $("#version_switcher").append(node);
+        });
+    });
+})();
+</script>
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 38c331048e7b6..0f57cb37ceeb1 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -177,10 +177,17 @@
 # a list of builtin themes.
 html_theme = 'pydata_sphinx_theme'
 
+html_context = {
+    "switcher_json_url": "_static/versions.json",
+    "switcher_template_url": "https://spark.apache.org/docs/{version}/api/python/index.html",
+}
+
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+html_theme_options = {
+    "navbar_end": ["version-switcher"]
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
 #html_theme_path = []

From 6a498087361ecbd653821fc283b9ea0fa703c820 Mon Sep 17 00:00:00 2001
From: Giambattista Bloisi <gbloisi@gmail.com>
Date: Mon, 18 Sep 2023 21:37:09 -0700
Subject: [PATCH 020/521] [SPARK-44910][SQL] Encoders.bean does not support
 superclasses with generic type arguments

### What changes were proposed in this pull request?
This pull request adds Encoders.bean support for beans having a superclass declared with generic type arguments.
For example:

```
class JavaBeanWithGenericsA<T> {
    public T getPropertyA() {
        return null;
    }

    public void setPropertyA(T a) {

    }
}

class JavaBeanWithGenericBase extends JavaBeanWithGenericsA<String> {
}

Encoders.bean(JavaBeanWithGenericBase.class); // Exception
```

That feature had to be part of [PR 42327](https://github.com/apache/spark/commit/1f5d78b5952fcc6c7d36d3338a5594070e3a62dd) but was missing as I was focusing on nested beans only (hvanhovell )

### Why are the changes needed?
JavaTypeInference.encoderFor did not solve TypeVariable objects for superclasses so when managing a case like in the example above an exception was thrown.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing tests have been extended, new specific tests have been added

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42634 from gbloisi-openaire/SPARK-44910.

Lead-authored-by: Giambattista Bloisi <gbloisi@gmail.com>
Co-authored-by: gbloisi-openaire <141144100+gbloisi-openaire@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 7e14c8cc33f0ed0a9c53a888e8a3b17dd2a5d493)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/JavaTypeInference.scala      |  5 +-
 ...erics.java => JavaTypeInferenceBeans.java} | 51 +++++++++++++++++--
 .../sql/catalyst/JavaTypeInferenceSuite.scala | 41 +++++++++++++--
 3 files changed, 88 insertions(+), 9 deletions(-)
 rename sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/{JavaBeanWithGenerics.java => JavaTypeInferenceBeans.java} (54%)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 3d536b735db59..191ccc5254404 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -130,10 +130,13 @@ object JavaTypeInference {
       // TODO: we should only collect properties that have getter and setter. However, some tests
       //   pass in scala case class as java bean class which doesn't have getter and setter.
       val properties = getJavaBeanReadableProperties(c)
+      // add type variables from inheritance hierarchy of the class
+      val classTV = JavaTypeUtils.getTypeArguments(c, classOf[Object]).asScala.toMap ++
+        typeVariables
       // Note that the fields are ordered by name.
       val fields = properties.map { property =>
         val readMethod = property.getReadMethod
-        val encoder = encoderFor(readMethod.getGenericReturnType, seenTypeSet + c, typeVariables)
+        val encoder = encoderFor(readMethod.getGenericReturnType, seenTypeSet + c, classTV)
         // The existence of `javax.annotation.Nonnull`, means this field is not nullable.
         val hasNonNull = readMethod.isAnnotationPresent(classOf[Nonnull])
         EncoderField(
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaBeanWithGenerics.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaTypeInferenceBeans.java
similarity index 54%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaBeanWithGenerics.java
rename to sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaTypeInferenceBeans.java
index b84a3122cf84c..cc3540717ee7d 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaBeanWithGenerics.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/JavaTypeInferenceBeans.java
@@ -17,25 +17,66 @@
 
 package org.apache.spark.sql.catalyst;
 
-class JavaBeanWithGenerics<T,A> {
+public class JavaTypeInferenceBeans {
+
+  static class JavaBeanWithGenericsA<T> {
+    public T getPropertyA() {
+      return null;
+    }
+
+    public void setPropertyA(T a) {
+
+    }
+  }
+
+  static class JavaBeanWithGenericsAB<T> extends JavaBeanWithGenericsA<String> {
+    public T getPropertyB() {
+      return null;
+    }
+
+    public void setPropertyB(T a) {
+
+    }
+  }
+
+  static class JavaBeanWithGenericsABC<T> extends JavaBeanWithGenericsAB<Long> {
+    public T getPropertyC() {
+      return null;
+    }
+
+    public void setPropertyC(T a) {
+
+    }
+  }
+
+  static class JavaBeanWithGenerics<T, A> {
     private A attribute;
 
     private T value;
 
     public A getAttribute() {
-        return attribute;
+      return attribute;
     }
 
     public void setAttribute(A attribute) {
-        this.attribute = attribute;
+      this.attribute = attribute;
     }
 
     public T getValue() {
-        return value;
+      return value;
     }
 
     public void setValue(T value) {
-        this.value = value;
+      this.value = value;
     }
+  }
+
+  static class JavaBeanWithGenericBase extends JavaBeanWithGenerics<String, String> {
+
+  }
+
+  static class JavaBeanWithGenericHierarchy extends JavaBeanWithGenericsABC<Integer> {
+
+  }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala
index 6439997609766..f7c1043d1cb8f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala
@@ -24,6 +24,7 @@ import scala.beans.{BeanProperty, BooleanBeanProperty}
 import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.JavaTypeInferenceBeans.{JavaBeanWithGenericBase, JavaBeanWithGenericHierarchy, JavaBeanWithGenericsABC}
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, UDTCaseClass, UDTForCaseClass}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
 import org.apache.spark.sql.types.{DecimalType, MapType, Metadata, StringType, StructField, StructType}
@@ -66,7 +67,8 @@ class LeafBean {
   @BeanProperty var period: java.time.Period = _
   @BeanProperty var enum: java.time.Month = _
   @BeanProperty val readOnlyString = "read-only"
-  @BeanProperty var genericNestedBean: JavaBeanWithGenerics[String, String] = _
+  @BeanProperty var genericNestedBean: JavaBeanWithGenericBase = _
+  @BeanProperty var genericNestedBean2: JavaBeanWithGenericsABC[Integer] = _
 
   var nonNullString: String = "value"
   @javax.annotation.Nonnull
@@ -186,8 +188,18 @@ class JavaTypeInferenceSuite extends SparkFunSuite {
       encoderField("duration", DayTimeIntervalEncoder),
       encoderField("enum", JavaEnumEncoder(classTag[java.time.Month])),
       encoderField("genericNestedBean", JavaBeanEncoder(
-        ClassTag(classOf[JavaBeanWithGenerics[String, String]]),
-        Seq(encoderField("attribute", StringEncoder), encoderField("value", StringEncoder)))),
+        ClassTag(classOf[JavaBeanWithGenericBase]),
+        Seq(
+          encoderField("attribute", StringEncoder),
+          encoderField("value", StringEncoder)
+        ))),
+      encoderField("genericNestedBean2", JavaBeanEncoder(
+        ClassTag(classOf[JavaBeanWithGenericsABC[Integer]]),
+        Seq(
+          encoderField("propertyA", StringEncoder),
+          encoderField("propertyB", BoxedLongEncoder),
+          encoderField("propertyC", BoxedIntEncoder)
+        ))),
       encoderField("instant", STRICT_INSTANT_ENCODER),
       encoderField("localDate", STRICT_LOCAL_DATE_ENCODER),
       encoderField("localDateTime", LocalDateTimeEncoder),
@@ -224,4 +236,27 @@ class JavaTypeInferenceSuite extends SparkFunSuite {
     ))
     assert(encoder === expected)
   }
+
+  test("SPARK-44910: resolve bean with generic base class") {
+    val encoder =
+      JavaTypeInference.encoderFor(classOf[JavaBeanWithGenericBase])
+    val expected =
+      JavaBeanEncoder(ClassTag(classOf[JavaBeanWithGenericBase]), Seq(
+        encoderField("attribute", StringEncoder),
+        encoderField("value", StringEncoder)
+      ))
+    assert(encoder === expected)
+  }
+
+  test("SPARK-44910: resolve bean with hierarchy of generic classes") {
+    val encoder =
+      JavaTypeInference.encoderFor(classOf[JavaBeanWithGenericHierarchy])
+    val expected =
+      JavaBeanEncoder(ClassTag(classOf[JavaBeanWithGenericHierarchy]), Seq(
+        encoderField("propertyA", StringEncoder),
+        encoderField("propertyB", BoxedLongEncoder),
+        encoderField("propertyC", BoxedIntEncoder)
+      ))
+    assert(encoder === expected)
+  }
 }

From f357f93fa2cf941c1f1e2745a10865bb12d5ab56 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 19 Sep 2023 16:56:38 +0800
Subject: [PATCH 021/521] [SPARK-45211][CONNECT] Eliminated ambiguous
 references in `CloseableIterator#apply` to fix Scala 2.13 daily test

### What changes were proposed in this pull request?
This pr eliminated an ambiguous references in `org.apache.spark.sql.connect.client.CloseableIterator#apply` function to make the test case `abandoned query gets INVALID_HANDLE.OPERATION_ABANDONED error` can test pass with Scala 2.13.

### Why are the changes needed?
`abandoned query gets INVALID_HANDLE.OPERATION_ABANDONED error`  failed in the daily test of Scala 2.13:
- https://github.com/apache/spark/actions/runs/6215331575/job/16868131377

<img width="1190" alt="image" src="https://github.com/apache/spark/assets/1475305/466370fd-15e5-4ffd-9407-2e4a2fc4efe7">

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Pass GitHub Actions
- Manual check

run

```
dev/change-scala-version.sh 2.13
build/sbt "connect/testOnly org.apache.spark.sql.connect.execution.ReattachableExecuteSuite" -Pscala-2.13
```

**Before**

```
[info] ReattachableExecuteSuite:
[info] - reattach after initial RPC ends (2 seconds, 258 milliseconds)
[info] - raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error (30 milliseconds)
[info] - raw new RPC interrupts previous RPC with INVALID_CURSOR.DISCONNECTED error (21 milliseconds)
[info] - client INVALID_CURSOR.DISCONNECTED error is retried when rpc sender gets interrupted (602 milliseconds)
[info] - client INVALID_CURSOR.DISCONNECTED error is retried when other RPC preempts this one (637 milliseconds)
[info] - abandoned query gets INVALID_HANDLE.OPERATION_ABANDONED error *** FAILED *** (70 milliseconds)
[info]   Expected exception org.apache.spark.SparkException to be thrown, but java.lang.StackOverflowError was thrown (ReattachableExecuteSuite.scala:172)
[info]   org.scalatest.exceptions.TestFailedException:
[info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
[info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
[info]   at org.scalatest.funsuite.AnyFunSuite.newAssertionFailedException(AnyFunSuite.scala:1564)
[info]   at org.scalatest.Assertions.intercept(Assertions.scala:756)
[info]   at org.scalatest.Assertions.intercept$(Assertions.scala:746)
[info]   at org.scalatest.funsuite.AnyFunSuite.intercept(AnyFunSuite.scala:1564)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.$anonfun$new$18(ReattachableExecuteSuite.scala:172)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.$anonfun$new$18$adapted(ReattachableExecuteSuite.scala:168)
[info]   at org.apache.spark.sql.connect.SparkConnectServerTest.withCustomBlockingStub(SparkConnectServerTest.scala:222)
[info]   at org.apache.spark.sql.connect.SparkConnectServerTest.withCustomBlockingStub$(SparkConnectServerTest.scala:216)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.withCustomBlockingStub(ReattachableExecuteSuite.scala:30)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.$anonfun$new$16(ReattachableExecuteSuite.scala:168)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.$anonfun$new$16$adapted(ReattachableExecuteSuite.scala:151)
[info]   at org.apache.spark.sql.connect.SparkConnectServerTest.withClient(SparkConnectServerTest.scala:199)
[info]   at org.apache.spark.sql.connect.SparkConnectServerTest.withClient$(SparkConnectServerTest.scala:191)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.withClient(ReattachableExecuteSuite.scala:30)
[info]   at org.apache.spark.sql.connect.execution.ReattachableExecuteSuite.$anonfun$new$15(ReattachableExecuteSuite.scala:151)
[info]   at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
[info]   at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
[info]   at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
[info]   at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
[info]   at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
[info]   at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
[info]   at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
[info]   at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
[info]   at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
[info]   at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
[info]   at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
[info]   at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
[info]   at scala.collection.immutable.List.foreach(List.scala:333)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
[info]   at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
[info]   at org.scalatest.Suite.run(Suite.scala:1114)
[info]   at org.scalatest.Suite.run$(Suite.scala:1096)
[info]   at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
[info]   at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
[info]   at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
[info]   at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
[info]   at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
[info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:321)
[info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:517)
[info]   at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:414)
[info]   at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[info]   at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
[info]   at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
[info]   at java.lang.Thread.run(Thread.java:750)
[info]   Cause: java.lang.StackOverflowError:
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
[info]   at org.apache.spark.sql.connect.client.WrappedCloseableIterator.hasNext(CloseableIterator.scala:36)
...
[info] - client releases responses directly after consuming them (236 milliseconds)
[info] - server releases responses automatically when client moves ahead (336 milliseconds)
[info] - big query (863 milliseconds)
[info] - big query and slow client (7 seconds, 14 milliseconds)
[info] - big query with frequent reattach (735 milliseconds)
[info] - big query with frequent reattach and slow client (7 seconds, 606 milliseconds)
[info] - long sleeping query (10 seconds, 156 milliseconds)
[info] Run completed in 34 seconds, 522 milliseconds.
[info] Total number of tests run: 13
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 12, failed 1, canceled 0, ignored 0, pending 0
[info] *** 1 TEST FAILED ***
[error] Failed tests:
[error] 	org.apache.spark.sql.connect.execution.ReattachableExecuteSuite
```

**After**

```
[info] ReattachableExecuteSuite:
[info] - reattach after initial RPC ends (2 seconds, 134 milliseconds)
[info] - raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error (26 milliseconds)
[info] - raw new RPC interrupts previous RPC with INVALID_CURSOR.DISCONNECTED error (19 milliseconds)
[info] - client INVALID_CURSOR.DISCONNECTED error is retried when rpc sender gets interrupted (328 milliseconds)
[info] - client INVALID_CURSOR.DISCONNECTED error is retried when other RPC preempts this one (562 milliseconds)
[info] - abandoned query gets INVALID_HANDLE.OPERATION_ABANDONED error (46 milliseconds)
[info] - client releases responses directly after consuming them (231 milliseconds)
[info] - server releases responses automatically when client moves ahead (359 milliseconds)
[info] - big query (978 milliseconds)
[info] - big query and slow client (7 seconds, 50 milliseconds)
[info] - big query with frequent reattach (703 milliseconds)
[info] - big query with frequent reattach and slow client (7 seconds, 626 milliseconds)
[info] - long sleeping query (10 seconds, 141 milliseconds)
[info] Run completed in 33 seconds, 844 milliseconds.
[info] Total number of tests run: 13
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 13, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
```
### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42981 from LuciferYang/CloseableIterator-apply.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit eec090755aa5b7e6048fc004264a8f5d3591df1a)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../apache/spark/sql/connect/client/CloseableIterator.scala   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
index d3fc9963edc7a..810158b2ac8b3 100644
--- a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
@@ -48,9 +48,9 @@ private[sql] object CloseableIterator {
    */
   def apply[T](iterator: Iterator[T]): CloseableIterator[T] = iterator match {
     case closeable: CloseableIterator[T] => closeable
-    case _ =>
+    case iter =>
       new WrappedCloseableIterator[T] {
-        override def innerIterator = iterator
+        override def innerIterator: Iterator[T] = iter
       }
   }
 }

From b2aead9f98d900d139cff41d53f79a37e1e09e81 Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Fri, 15 Sep 2023 19:06:58 -0700
Subject: [PATCH 022/521] [SPARK-44872][CONNECT][FOLLOWUP] Deflake
 ReattachableExecuteSuite and increase retry buffer

### What changes were proposed in this pull request?

Deflake tests in ReattachableExecuteSuite and increase CONNECT_EXECUTE_REATTACHABLE_OBSERVER_RETRY_BUFFER_SIZE.

### Why are the changes needed?

Two tests could be flaky with errors `INVALID_CURSOR.POSITION_NOT_AVAILABLE`.
This is caused when a server releases the response when it falls more than CONNECT_EXECUTE_REATTACHABLE_OBSERVER_RETRY_BUFFER_SIZE behind the latest response it sent. However, because of HTTP2 flow control, the responses could still be in transit. In the test suite, we were explicitly disconnecting the iterators and later reconnect... In some cases they could not reconnect, because the response they last seen have fallen too fare behind.

This not only changes the suite, but also adjust the default config. This potentially makes the reconnecting more robust. In normal situation, it should not lead to increased memory pressure, because the clients also release the responses using ReleaseExecute as soon as they are received. Normally, buffered responses should be freed by ReleaseExecute and this retry buffer is only a fallback mechanism. Therefore, it is safe to increase the default.

In practice, this would only have effect in cases where there are actual network errors, and the increased buffer size should make the reconnects more robust in these cases.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

ReattachableExecuteSuite.
Did more manual experiments of how far the response sent by client can be behind the response sent by server (because of HTTP2 flow control window)

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #42908 from juliuszsompolski/SPARK-44872-followup.

Authored-by: Juliusz Sompolski <julek@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/connect/config/Connect.scala    |  2 +-
 .../sql/connect/SparkConnectServerTest.scala  |  2 +-
 .../execution/ReattachableExecuteSuite.scala  | 26 ++++++++++++-------
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
index 7b8b05ce11a82..253ac38f9cf9e 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
@@ -133,7 +133,7 @@ object Connect {
           "With any value greater than 0, the last sent response will always be buffered.")
       .version("3.5.0")
       .bytesConf(ByteUnit.BYTE)
-      .createWithDefaultString("1m")
+      .createWithDefaultString("10m")
 
   val CONNECT_EXTENSIONS_RELATION_CLASSES =
     buildStaticConf("spark.connect.extensions.relation.classes")
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
index 488858d33ea12..eddd1c6be72b1 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.test.SharedSparkSession
  * Base class and utilities for a test suite that starts and tests the real SparkConnectService
  * with a real SparkConnectClient, communicating over RPC, but both in-process.
  */
-class SparkConnectServerTest extends SharedSparkSession {
+trait SparkConnectServerTest extends SharedSparkSession {
 
   // Server port
   val serverPort: Int =
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index 169b15582b698..0e29a07b719af 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -22,7 +22,7 @@ import io.grpc.StatusRuntimeException
 import org.scalatest.concurrent.Eventually
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.sql.connect.SparkConnectServerTest
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.service.SparkConnectService
@@ -32,7 +32,7 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
   // Tests assume that this query will result in at least a couple ExecutePlanResponses on the
   // stream. If this is no longer the case because of changes in how much is returned in a single
   // ExecutePlanResponse, it may need to be adjusted.
-  val MEDIUM_RESULTS_QUERY = "select * from range(1000000)"
+  val MEDIUM_RESULTS_QUERY = "select * from range(10000000)"
 
   test("reattach after initial RPC ends") {
     withClient { client =>
@@ -138,13 +138,12 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
         val reattachIter = stub.reattachExecute(
           buildReattachExecuteRequest(operationId, Some(response.getResponseId)))
         assert(reattachIter.hasNext)
-        reattachIter.next()
-
-        // Nevertheless, the original iterator will handle the INVALID_CURSOR.DISCONNECTED error
-        iter.next()
-        // iterator changed because it had to reconnect
-        assert(reattachableIter.innerIterator ne initialInnerIter)
       }
+
+      // Nevertheless, the original iterator will handle the INVALID_CURSOR.DISCONNECTED error
+      iter.next()
+      // iterator changed because it had to reconnect
+      assert(reattachableIter.innerIterator ne initialInnerIter)
     }
   }
 
@@ -246,19 +245,26 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
       val iter = stub.executePlan(
         buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY), operationId = operationId))
       var lastSeenResponse: String = null
+      val serverRetryBuffer = SparkEnv.get.conf
+        .get(Connect.CONNECT_EXECUTE_REATTACHABLE_OBSERVER_RETRY_BUFFER_SIZE)
+        .toLong
 
       iter.hasNext // open iterator
       val execution = getExecutionHolder
 
       // after consuming enough from the iterator, server should automatically start releasing
       var lastSeenIndex = 0
-      while (iter.hasNext && execution.responseObserver.releasedUntilIndex == 0) {
+      var totalSizeSeen = 0
+      while (iter.hasNext && totalSizeSeen <= 1.1 * serverRetryBuffer) {
         val r = iter.next()
         lastSeenResponse = r.getResponseId()
+        totalSizeSeen += r.getSerializedSize
         lastSeenIndex += 1
       }
       assert(iter.hasNext)
-      assert(execution.responseObserver.releasedUntilIndex > 0)
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(execution.responseObserver.releasedUntilIndex > 0)
+      }
 
       // Reattach from the beginning is not available.
       val reattach = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))

From 71d5b110660f028bf3b097e6b5805fe68126b4cd Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 19 Sep 2023 22:56:44 -0700
Subject: [PATCH 023/521] [SPARK-45189][3.5][SQL] Creating UnresolvedRelation
 from TableIdentifier should include the catalog field

### What changes were proposed in this pull request?

Creating UnresolvedRelation from TableIdentifier should include the catalog field

### Why are the changes needed?

Fix a issue in a utility method for UnresolvedRelation

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New unit test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #42998 from gengliangwang/backportTableId.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/catalyst/analysis/unresolved.scala     |  5 ++---
 .../spark/sql/catalyst/parser/PlanParserSuite.scala  | 12 ++++++++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 1c72ec0d69980..b1dcb465b4778 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -104,12 +104,11 @@ object UnresolvedRelation {
       tableIdentifier: TableIdentifier,
       extraOptions: CaseInsensitiveStringMap,
       isStreaming: Boolean): UnresolvedRelation = {
-    UnresolvedRelation(
-      tableIdentifier.database.toSeq :+ tableIdentifier.table, extraOptions, isStreaming)
+    UnresolvedRelation(tableIdentifier.nameParts, extraOptions, isStreaming)
   }
 
   def apply(tableIdentifier: TableIdentifier): UnresolvedRelation =
-    UnresolvedRelation(tableIdentifier.database.toSeq :+ tableIdentifier.table)
+    UnresolvedRelation(tableIdentifier.nameParts)
 }
 
 /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 4a5d0a0ae29fa..13474fe29de98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{Decimal, DecimalType, IntegerType, LongType, StringType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Parser test cases for rules defined in [[CatalystSqlParser]] / [[AstBuilder]].
@@ -1758,4 +1759,15 @@ class PlanParserSuite extends AnalysisTest {
       parsePlan("SELECT * FROM a LIMIT ?"),
       table("a").select(star()).limit(PosParameter(22)))
   }
+
+  test("SPARK-45189: Creating UnresolvedRelation from TableIdentifier should include the" +
+    " catalog field") {
+    val tableId = TableIdentifier("t", Some("db"), Some("cat"))
+    val unresolvedRelation = UnresolvedRelation(tableId)
+    assert(unresolvedRelation.multipartIdentifier == Seq("cat", "db", "t"))
+    val unresolvedRelation2 = UnresolvedRelation(tableId, CaseInsensitiveStringMap.empty, true)
+    assert(unresolvedRelation2.multipartIdentifier == Seq("cat", "db", "t"))
+    assert(unresolvedRelation2.options == CaseInsensitiveStringMap.empty)
+    assert(unresolvedRelation2.isStreaming)
+  }
 }

From 6ab870f6a9915da2d9f231586b9b85b8faf94e2e Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Wed, 20 Sep 2023 17:45:22 +0800
Subject: [PATCH 024/521] [SPARK-43979][SQL][FOLLOWUP] Handle non alias-only
 project case

### What changes were proposed in this pull request?

`simplifyPlanForCollectedMetrics ` still could need to handle non alias-only project case where the project contains a mixed of aliases and attributes. In such case `simplifyPlanForCollectedMetrics` should also drop the extra project for the plan check when it contains CollectedMetrics.

### Why are the changes needed?

Improve `simplifyPlanForCollectedMetrics` so it handles more plan pattern.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

UT

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #42971 from amaliujia/improve_simplification.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d92d6f60342ca4d005cc2c1db94dc3b107f5d89b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  4 +++-
 .../sql/catalyst/analysis/AnalysisSuite.scala      | 14 ++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 43546bcaa421a..139fa34a1dfcf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -1109,7 +1109,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
    * remove extra project which only re-assign expr ids from the plan so that we can identify exact
    * duplicates metric definition.
    */
-  private def simplifyPlanForCollectedMetrics(plan: LogicalPlan): LogicalPlan = {
+  def simplifyPlanForCollectedMetrics(plan: LogicalPlan): LogicalPlan = {
     plan.resolveOperators {
       case p: Project if p.projectList.size == p.child.output.size =>
         val assignExprIdOnly = p.projectList.zipWithIndex.forall {
@@ -1118,6 +1118,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             // ordinal of this attribute in the child outputs. So an alias-only Project means the
             // the id of the aliased attribute is the same as its index in the project list.
             attr.exprId.id == index
+          case (left: AttributeReference, index) =>
+            left.exprId.id == index
           case _ => false
         }
         if (assignExprIdOnly) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 06c3e3eb0405a..57b37e67b32b8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1667,4 +1667,18 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze)
     }
   }
+
+  test("simplifyPlanForCollectedMetrics should handle non alias-only project case") {
+    val inner = Project(
+      Seq(
+        Alias(testRelation2.output(0), "a")(),
+        testRelation2.output(1),
+        Alias(testRelation2.output(2), "c")(),
+        testRelation2.output(3),
+        testRelation2.output(4)
+      ),
+      testRelation2)
+    val actualPlan = getAnalyzer.simplifyPlanForCollectedMetrics(inner.canonicalized)
+    assert(actualPlan == testRelation2.canonicalized)
+  }
 }

From 326f8297224a0c02b2db8c8cb3f4b92cdc0dafb4 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 20 Sep 2023 08:46:53 -0700
Subject: [PATCH 025/521] [SPARK-45237][DOCS] Change the default value of
 `spark.history.store.hybridStore.diskBackend` in `monitoring.md` to `ROCKSDB`

### What changes were proposed in this pull request?
This pr change the default value of `spark.history.store.hybridStore.diskBackend` in `monitoring.md` to `ROCKSDB`

### Why are the changes needed?
SPARK-42277 change to use `RocksDB` for `spark.history.store.hybridStore.diskBackend` by default, but in `monitoring.md`, the default value is still set as `LEVELDB`.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43015 from LuciferYang/SPARK-45237.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit f1bc0f938162485a96de5788f53f9fa4fb37a3b1)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/monitoring.md b/docs/monitoring.md
index ebd8781fd0071..91b158bf85d26 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -414,7 +414,7 @@ Security options for the Spark History Server are covered more detail in the
   </tr>
   <tr>
     <td>spark.history.store.hybridStore.diskBackend</td>
-    <td>LEVELDB</td>
+    <td>ROCKSDB</td>
     <td>
       Specifies a disk-based store used in hybrid store; LEVELDB or ROCKSDB.
     </td>

From 5af0819654aca896d73c16875b07b2143cb1132c Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Fri, 22 Sep 2023 11:07:25 +0800
Subject: [PATCH 026/521] [SPARK-41086][SQL] Use DataFrame ID to semantically
 validate CollectMetrics

### What changes were proposed in this pull request?

In existing code, plan matching is used to validate if two CollectMetrics have the same name but different semantic. However, plan matching approach is fragile. A better way to tackle this is to just utilize the unique DataFrame Id. This is because observe API is only supported by DataFrame API. SQL does not have such syntax.

So two CollectMetric are semantic the same if and only if they have same name and same DataFrame id.

### Why are the changes needed?

This is to use a more stable approach to replace a fragile approach.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

UT

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #43010 from amaliujia/another_approch_for_collect_metrics.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7c3c7c5a4bd94c9e05b5e680a5242c2485875633)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../connect/planner/SparkConnectPlanner.scala |  6 +-
 python/pyspark/sql/connect/plan.py            |  1 +
 .../sql/catalyst/analysis/Analyzer.scala      |  4 +-
 .../sql/catalyst/analysis/CheckAnalysis.scala | 36 ++----------
 .../plans/logical/basicLogicalOperators.scala |  3 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala | 55 +++++++------------
 .../scala/org/apache/spark/sql/Dataset.scala  |  2 +-
 .../spark/sql/execution/SparkStrategies.scala |  2 +-
 8 files changed, 35 insertions(+), 74 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 641dfc5dcd3c8..50a55f5e6411d 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -164,7 +164,7 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
       case proto.Relation.RelTypeCase.CACHED_REMOTE_RELATION =>
         transformCachedRemoteRelation(rel.getCachedRemoteRelation)
       case proto.Relation.RelTypeCase.COLLECT_METRICS =>
-        transformCollectMetrics(rel.getCollectMetrics)
+        transformCollectMetrics(rel.getCollectMetrics, rel.getCommon.getPlanId)
       case proto.Relation.RelTypeCase.PARSE => transformParse(rel.getParse)
       case proto.Relation.RelTypeCase.RELTYPE_NOT_SET =>
         throw new IndexOutOfBoundsException("Expected Relation to be set, but is empty.")
@@ -1054,12 +1054,12 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
       numPartitionsOpt)
   }
 
-  private def transformCollectMetrics(rel: proto.CollectMetrics): LogicalPlan = {
+  private def transformCollectMetrics(rel: proto.CollectMetrics, planId: Long): LogicalPlan = {
     val metrics = rel.getMetricsList.asScala.toSeq.map { expr =>
       Column(transformExpression(expr))
     }
 
-    CollectMetrics(rel.getName, metrics.map(_.named), transformRelation(rel.getInput))
+    CollectMetrics(rel.getName, metrics.map(_.named), transformRelation(rel.getInput), planId)
   }
 
   private def transformDeduplicate(rel: proto.Deduplicate): LogicalPlan = {
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 196b1f119ba41..b7ea1f9499354 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1196,6 +1196,7 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         assert self._child is not None
 
         plan = proto.Relation()
+        plan.common.plan_id = self._child._plan_id
         plan.collect_metrics.input.CopyFrom(self._child.plan(session))
         plan.collect_metrics.name = self._name
         plan.collect_metrics.metrics.extend([self.col_to_expr(x, session) for x in self._exprs])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 6c5d19f58ac25..8e3c9b30c61bf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -3796,9 +3796,9 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
       Window(cleanedWindowExprs, partitionSpec.map(trimAliases),
         orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child)
 
-    case CollectMetrics(name, metrics, child) =>
+    case CollectMetrics(name, metrics, child, dataframeId) =>
       val cleanedMetrics = metrics.map(trimNonTopLevelAliases)
-      CollectMetrics(name, cleanedMetrics, child)
+      CollectMetrics(name, cleanedMetrics, child, dataframeId)
 
     case Unpivot(ids, values, aliases, variableColumnName, valueColumnNames, child) =>
       val cleanedIds = ids.map(_.map(trimNonTopLevelAliases))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 139fa34a1dfcf..511f3622e7e35 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -484,7 +484,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             groupingExprs.foreach(checkValidGroupingExprs)
             aggregateExprs.foreach(checkValidAggregateExpression)
 
-          case CollectMetrics(name, metrics, _) =>
+          case CollectMetrics(name, metrics, _, _) =>
             if (name == null || name.isEmpty) {
               operator.failAnalysis(
                 errorClass = "INVALID_OBSERVED_METRICS.MISSING_NAME",
@@ -1075,17 +1075,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
    * are allowed (e.g. self-joins).
    */
   private def checkCollectedMetrics(plan: LogicalPlan): Unit = {
-    val metricsMap = mutable.Map.empty[String, LogicalPlan]
+    val metricsMap = mutable.Map.empty[String, CollectMetrics]
     def check(plan: LogicalPlan): Unit = plan.foreach { node =>
       node match {
-        case metrics @ CollectMetrics(name, _, _) =>
-          val simplifiedMetrics = simplifyPlanForCollectedMetrics(metrics.canonicalized)
+        case metrics @ CollectMetrics(name, _, _, dataframeId) =>
           metricsMap.get(name) match {
             case Some(other) =>
-              val simplifiedOther = simplifyPlanForCollectedMetrics(other.canonicalized)
               // Exact duplicates are allowed. They can be the result
               // of a CTE that is used multiple times or a self join.
-              if (simplifiedMetrics != simplifiedOther) {
+              if (dataframeId != other.dataframeId) {
                 failAnalysis(
                   errorClass = "DUPLICATED_METRICS_NAME",
                   messageParameters = Map("metricName" -> name))
@@ -1104,32 +1102,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     check(plan)
   }
 
-  /**
-   * This method is only used for checking collected metrics. This method tries to
-   * remove extra project which only re-assign expr ids from the plan so that we can identify exact
-   * duplicates metric definition.
-   */
-  def simplifyPlanForCollectedMetrics(plan: LogicalPlan): LogicalPlan = {
-    plan.resolveOperators {
-      case p: Project if p.projectList.size == p.child.output.size =>
-        val assignExprIdOnly = p.projectList.zipWithIndex.forall {
-          case (Alias(attr: AttributeReference, _), index) =>
-            // The input plan of this method is already canonicalized. The attribute id becomes the
-            // ordinal of this attribute in the child outputs. So an alias-only Project means the
-            // the id of the aliased attribute is the same as its index in the project list.
-            attr.exprId.id == index
-          case (left: AttributeReference, index) =>
-            left.exprId.id == index
-          case _ => false
-        }
-        if (assignExprIdOnly) {
-          p.child
-        } else {
-          p
-        }
-    }
-  }
-
   /**
    * Validates to make sure the outer references appearing inside the subquery
    * are allowed.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 4bb830662a33f..96b67fc52e0d7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1952,7 +1952,8 @@ trait SupportsSubquery extends LogicalPlan
 case class CollectMetrics(
     name: String,
     metrics: Seq[NamedExpression],
-    child: LogicalPlan)
+    child: LogicalPlan,
+    dataframeId: Long)
   extends UnaryNode {
 
   override lazy val resolved: Boolean = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 57b37e67b32b8..802b6d471a65c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -771,34 +771,35 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val literal = Literal(1).as("lit")
 
     // Ok
-    assert(CollectMetrics("event", literal :: sum :: random_sum :: Nil, testRelation).resolved)
+    assert(CollectMetrics("event", literal :: sum :: random_sum :: Nil, testRelation, 0).resolved)
 
     // Bad name
-    assert(!CollectMetrics("", sum :: Nil, testRelation).resolved)
+    assert(!CollectMetrics("", sum :: Nil, testRelation, 0).resolved)
     assertAnalysisErrorClass(
-      CollectMetrics("", sum :: Nil, testRelation),
+      CollectMetrics("", sum :: Nil, testRelation, 0),
       expectedErrorClass = "INVALID_OBSERVED_METRICS.MISSING_NAME",
       expectedMessageParameters = Map(
-        "operator" -> "'CollectMetrics , [sum(a#x) AS sum#xL]\n+- LocalRelation <empty>, [a#x]\n")
+        "operator" ->
+          "'CollectMetrics , [sum(a#x) AS sum#xL], 0\n+- LocalRelation <empty>, [a#x]\n")
     )
 
     // No columns
-    assert(!CollectMetrics("evt", Nil, testRelation).resolved)
+    assert(!CollectMetrics("evt", Nil, testRelation, 0).resolved)
 
     def checkAnalysisError(exprs: Seq[NamedExpression], errors: String*): Unit = {
-      assertAnalysisError(CollectMetrics("event", exprs, testRelation), errors)
+      assertAnalysisError(CollectMetrics("event", exprs, testRelation, 0), errors)
     }
 
     // Unwrapped attribute
     assertAnalysisErrorClass(
-      CollectMetrics("event", a :: Nil, testRelation),
+      CollectMetrics("event", a :: Nil, testRelation, 0),
       expectedErrorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
       expectedMessageParameters = Map("expr" -> "\"a\"")
     )
 
     // Unwrapped non-deterministic expression
     assertAnalysisErrorClass(
-      CollectMetrics("event", Rand(10).as("rnd") :: Nil, testRelation),
+      CollectMetrics("event", Rand(10).as("rnd") :: Nil, testRelation, 0),
       expectedErrorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_NON_DETERMINISTIC",
       expectedMessageParameters = Map("expr" -> "\"rand(10) AS rnd\"")
     )
@@ -808,7 +809,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       CollectMetrics(
         "event",
         Sum(a).toAggregateExpression(isDistinct = true).as("sum") :: Nil,
-        testRelation),
+        testRelation, 0),
       expectedErrorClass =
         "INVALID_OBSERVED_METRICS.AGGREGATE_EXPRESSION_WITH_DISTINCT_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(DISTINCT a) AS sum\"")
@@ -819,7 +820,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       CollectMetrics(
         "event",
         Sum(Sum(a).toAggregateExpression()).toAggregateExpression().as("sum") :: Nil,
-        testRelation),
+        testRelation, 0),
       expectedErrorClass = "INVALID_OBSERVED_METRICS.NESTED_AGGREGATES_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(sum(a)) AS sum\"")
     )
@@ -830,7 +831,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       WindowSpecDefinition(Nil, a.asc :: Nil,
         SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))
     assertAnalysisErrorClass(
-      CollectMetrics("event", windowExpr.as("rn") :: Nil, testRelation),
+      CollectMetrics("event", windowExpr.as("rn") :: Nil, testRelation, 0),
       expectedErrorClass = "INVALID_OBSERVED_METRICS.WINDOW_EXPRESSIONS_UNSUPPORTED",
       expectedMessageParameters = Map(
         "expr" ->
@@ -848,14 +849,14 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     // Same result - duplicate names are allowed
     assertAnalysisSuccess(Union(
-      CollectMetrics("evt1", count :: Nil, testRelation) ::
-      CollectMetrics("evt1", count :: Nil, testRelation) :: Nil))
+      CollectMetrics("evt1", count :: Nil, testRelation, 0) ::
+      CollectMetrics("evt1", count :: Nil, testRelation, 0) :: Nil))
 
     // Same children, structurally different metrics - fail
     assertAnalysisErrorClass(
       Union(
-        CollectMetrics("evt1", count :: Nil, testRelation) ::
-          CollectMetrics("evt1", sum :: Nil, testRelation) :: Nil),
+        CollectMetrics("evt1", count :: Nil, testRelation, 0) ::
+          CollectMetrics("evt1", sum :: Nil, testRelation, 1) :: Nil),
       expectedErrorClass = "DUPLICATED_METRICS_NAME",
       expectedMessageParameters = Map("metricName" -> "evt1")
     )
@@ -865,17 +866,17 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val tblB = LocalRelation(b)
     assertAnalysisErrorClass(
       Union(
-        CollectMetrics("evt1", count :: Nil, testRelation) ::
-          CollectMetrics("evt1", count :: Nil, tblB) :: Nil),
+        CollectMetrics("evt1", count :: Nil, testRelation, 0) ::
+          CollectMetrics("evt1", count :: Nil, tblB, 1) :: Nil),
       expectedErrorClass = "DUPLICATED_METRICS_NAME",
       expectedMessageParameters = Map("metricName" -> "evt1")
     )
 
     // Subquery different tree - fail
-    val subquery = Aggregate(Nil, sum :: Nil, CollectMetrics("evt1", count :: Nil, testRelation))
+    val subquery = Aggregate(Nil, sum :: Nil, CollectMetrics("evt1", count :: Nil, testRelation, 0))
     val query = Project(
       b :: ScalarSubquery(subquery, Nil).as("sum") :: Nil,
-      CollectMetrics("evt1", count :: Nil, tblB))
+      CollectMetrics("evt1", count :: Nil, tblB, 1))
     assertAnalysisErrorClass(
       query,
       expectedErrorClass = "DUPLICATED_METRICS_NAME",
@@ -887,7 +888,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       case a: AggregateExpression => a.copy(filter = Some(true))
     }.asInstanceOf[NamedExpression]
     assertAnalysisErrorClass(
-      CollectMetrics("evt1", sumWithFilter :: Nil, testRelation),
+      CollectMetrics("evt1", sumWithFilter :: Nil, testRelation, 0),
       expectedErrorClass =
         "INVALID_OBSERVED_METRICS.AGGREGATE_EXPRESSION_WITH_FILTER_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(a) FILTER (WHERE true) AS sum\"")
@@ -1667,18 +1668,4 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze)
     }
   }
-
-  test("simplifyPlanForCollectedMetrics should handle non alias-only project case") {
-    val inner = Project(
-      Seq(
-        Alias(testRelation2.output(0), "a")(),
-        testRelation2.output(1),
-        Alias(testRelation2.output(2), "c")(),
-        testRelation2.output(3),
-        testRelation2.output(4)
-      ),
-      testRelation2)
-    val actualPlan = getAnalyzer.simplifyPlanForCollectedMetrics(inner.canonicalized)
-    assert(actualPlan == testRelation2.canonicalized)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index fd8421fa096cc..e047b927b9057 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2189,7 +2189,7 @@ class Dataset[T] private[sql](
   */
   @varargs
   def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withTypedPlan {
-    CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan)
+    CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan, id)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 903565a6d591b..d851eacd5ab92 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -935,7 +935,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("UPDATE TABLE")
       case _: MergeIntoTable =>
         throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("MERGE INTO TABLE")
-      case logical.CollectMetrics(name, metrics, child) =>
+      case logical.CollectMetrics(name, metrics, child, _) =>
         execution.CollectMetricsExec(name, metrics, planLater(child)) :: Nil
       case WriteFiles(child, fileFormat, partitionColumns, bucket, options, staticPartitions) =>
         WriteFilesExec(planLater(child), fileFormat, partitionColumns, bucket, options,

From 609306ff5daa8ff7c2212088d33c0911ad0f4989 Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Sun, 24 Sep 2023 14:17:55 -0500
Subject: [PATCH 027/521] [SPARK-45286][DOCS] Add back Matomo analytics

### What changes were proposed in this pull request?

Add analytics to doc pages using the ASF's Matomo service

### Why are the changes needed?

We had previously removed Google Analytics from the website and release docs, per ASF policy: https://github.com/apache/spark/pull/36310

We just restored analytics using the ASF-hosted Matomo service on the website:
https://github.com/apache/spark-website/commit/a1548627b48a62c2e51870d1488ca3e09397bd30

This change would put the same new tracking code back into the release docs. It would let us see what docs and resources are most used, I suppose.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43063 from srowen/SPARK-45286.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
(cherry picked from commit a881438114ea3e8e918d981ef89ed1ab956d6fca)
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/_layouts/global.html | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 9b7c469246165..8c4435fdf31d9 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -32,6 +32,25 @@
         <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
         <link rel="stylesheet" href="css/docsearch.css">
 
+        {% production %}
+        <!-- Matomo -->
+        <script>
+            var _paq = window._paq = window._paq || [];
+            /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
+            _paq.push(["disableCookies"]);
+            _paq.push(['trackPageView']);
+            _paq.push(['enableLinkTracking']);
+            (function() {
+              var u="https://analytics.apache.org/";
+              _paq.push(['setTrackerUrl', u+'matomo.php']);
+              _paq.push(['setSiteId', '40']);
+              var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
+              g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
+            })();
+        </script>
+        <!-- End Matomo Code -->
+        {% endproduction %}
+
     </head>
     <body class="global">
         <!--[if lt IE 7]>

From 94661758c3072a279a29d0c493ce419af0414d3a Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 25 Sep 2023 14:23:46 +0800
Subject: [PATCH 028/521] [SPARK-45291][SQL][REST] Use unknown query execution
 id instead of no such app when id is invalid

### What changes were proposed in this pull request?

This PR fixes `/api/v1/applications/{appId}/sql/{executionId}` API when the executionId is invalid.

Before this, we get `no such app: $appId`; after this, we get `unknown query execution id: $executionId`

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

no, bugfix

### How was this patch tested?

new test
### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43073 from yaooqinn/SPARK-45291.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 5d422155f1dae09f1631375d09e2f3c8dffba9a5)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/status/api/v1/sql/SqlResource.scala | 3 +--
 .../api/v1/sql/SqlResourceWithActualMetricsSuite.scala   | 9 +++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
index 3c96f612da6bb..fa5bea5f9bbe3 100644
--- a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -56,10 +56,9 @@ private[v1] class SqlResource extends BaseAppResource {
       planDescription: Boolean): ExecutionData = {
     withUI { ui =>
       val sqlStore = new SQLAppStatusStore(ui.store.store)
-      val graph = sqlStore.planGraph(execId)
       sqlStore
         .execution(execId)
-        .map(prepareExecutionData(_, graph, details, planDescription))
+        .map(prepareExecutionData(_, sqlStore.planGraph(execId), details, planDescription))
         .getOrElse(throw new NotFoundException("unknown query execution id: " + execId))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
index 658f79fc28942..c63c748953f1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.status.api.v1.sql
 
 import java.net.URL
 import java.text.SimpleDateFormat
+import javax.servlet.http.HttpServletResponse
 
 import org.json4s.DefaultFormats
 import org.json4s.jackson.JsonMethods
@@ -148,4 +149,12 @@ class SqlResourceWithActualMetricsSuite
     }
   }
 
+  test("SPARK-45291: Use unknown query execution id instead of no such app when id is invalid") {
+    val url = new URL(spark.sparkContext.ui.get.webUrl +
+      s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/${Long.MaxValue}")
+    val (code, resultOpt, error) = getContentAndCode(url)
+    assert(code === HttpServletResponse.SC_NOT_FOUND)
+    assert(resultOpt.isEmpty)
+    assert(error.get === s"unknown query execution id: ${Long.MaxValue}")
+  }
 }

From 3551f8b89f1d70a9218b8c0331bddc06c5020e95 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 25 Sep 2023 19:06:02 +0800
Subject: [PATCH 029/521] [SPARK-45306][SQL][TESTS] Make
 `InMemoryColumnarBenchmark` use AQE-aware utils to collect plans

### What changes were proposed in this pull request?
This pr makes `InMemoryColumnarBenchmark` inherit from AdaptiveSparkPlanHelper and use the `AdaptiveSparkPlanHelper#collect` function to collect plans, enabling `InMemoryColumnarBenchmark` to run successfully.

### Why are the changes needed?
After SPARK-42768 merged, the default value of `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` has changed from false to true, so `InMemoryColumnarBenchmark ` should use AQE-aware utils to collect plans.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Manual verification.

run `build/sbt "sql/Test/runMain org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark"`

**Before**

```
[error] Exception in thread "main" java.lang.IndexOutOfBoundsException: 0
[error] 	at scala.collection.LinearSeqOps.apply(LinearSeq.scala:131)
[error] 	at scala.collection.LinearSeqOps.apply$(LinearSeq.scala:128)
[error] 	at scala.collection.immutable.List.apply(List.scala:79)
[error] 	at org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark$.intCache(InMemoryColumnarBenchmark.scala:47)
[error] 	at org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark$.$anonfun$runBenchmarkSuite$1(InMemoryColumnarBenchmark.scala:68)
[error] 	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
[error] 	at org.apache.spark.benchmark.BenchmarkBase.runBenchmark(BenchmarkBase.scala:42)
[error] 	at org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark$.runBenchmarkSuite(InMemoryColumnarBenchmark.scala:68)
[error] 	at org.apache.spark.benchmark.BenchmarkBase.main(BenchmarkBase.scala:72)
[error] 	at org.apache.spark.sql.execution.columnar.InMemoryColumnarBenchmark.main(InMemoryColumnarBenchmark.scala)
[error] Nonzero exit code returned from runner: 1
[error] (sql / Test / runMain) Nonzero exit code returned from runner: 1
```

**After**

```
[info] OpenJDK 64-Bit Server VM 17.0.8+7-LTS on Mac OS X 13.5.2
[info] Apple M2 Max
[info] Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
[info] --------------------------------------------------------------------------------------------------------------------------
[info] columnar deserialization + columnar-to-row             95            116          34         10.5          95.4       1.0X
[info] row-based deserialization                              85             99          22         11.8          85.1       1.1X
```
### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43093 from LuciferYang/fix-InMemoryColumnarBenchmark.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7e9666be15b5210db00231faacd3cfa15ed71907)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/execution/columnar/InMemoryColumnarBenchmark.scala   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala
index 55d9fb2731799..1f132dabd2878 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarBenchmark.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.execution.ColumnarToRowExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 
 /**
@@ -33,11 +34,11 @@ import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
  *      Results will be written to "benchmarks/InMemoryColumnarBenchmark-results.txt".
  * }}}
  */
-object InMemoryColumnarBenchmark extends SqlBasedBenchmark {
+object InMemoryColumnarBenchmark extends SqlBasedBenchmark with AdaptiveSparkPlanHelper {
   def intCache(rowsNum: Long, numIters: Int): Unit = {
     val data = spark.range(0, rowsNum, 1, 1).toDF("i").cache()
 
-    val inMemoryScan = data.queryExecution.executedPlan.collect {
+    val inMemoryScan = collect(data.queryExecution.executedPlan) {
       case m: InMemoryTableScanExec => m
     }
 

From 252970bab65d80020bae5f86f35d29a75fe54804 Mon Sep 17 00:00:00 2001
From: mayurb <mayurb@uber.com>
Date: Tue, 26 Sep 2023 11:04:07 +0800
Subject: [PATCH 030/521] [SPARK-45182][CORE] Ignore task completion from old
 stage after retrying indeterminate stages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
[SPARK-25342](https://issues.apache.org/jira/browse/SPARK-25342) Added a support for rolling back shuffle map stage so that all tasks of the stage can be retried when the stage output is indeterminate. This is done by clearing all map outputs at the time of stage submission. This approach workouts well except for this case:

Assume both Shuffle 1 and 2 are indeterminate

ShuffleMapStage1 ––> Shuffle 1 ---–> ShuffleMapStage2 ----> Shuffle 2 ----> ResultStage

- ShuffleMapStage1 is complete
- A task from ShuffleMapStage2 fails with FetchFailed. Other tasks are still running
- Both ShuffleMapStage1 and ShuffleMapStage2 are retried
- ShuffleMapStage1 is retried and completes
- ShuffleMapStage2 reattempt is scheduled for execution
- Before all tasks of ShuffleMapStage2 reattempt could finish, one/more laggard tasks from the original attempt of ShuffleMapStage2 finish and ShuffleMapStage2 also gets marked as complete
- Result Stage gets scheduled and finishes

After this change, such laggard tasks from the old attempt of the indeterminate stage will be ignored

### Why are the changes needed?
This can give wrong result when indeterminate stages needs to be retried under the circumstances mentioned above

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
A new test case

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42950 from mayurdb/rollbackFix.

Authored-by: mayurb <mayurb@uber.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7ffc0b71aa3e416a9b21e0975a169b2a8a8403a8)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/scheduler/DAGScheduler.scala |  29 +++--
 .../spark/scheduler/DAGSchedulerSuite.scala   | 104 ++++++++++++++++++
 2 files changed, 122 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index fc83439454dcf..d73bb6339015b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1903,19 +1903,26 @@ private[spark] class DAGScheduler(
 
           case smt: ShuffleMapTask =>
             val shuffleStage = stage.asInstanceOf[ShuffleMapStage]
-            shuffleStage.pendingPartitions -= task.partitionId
-            val status = event.result.asInstanceOf[MapStatus]
-            val execId = status.location.executorId
-            logDebug("ShuffleMapTask finished on " + execId)
-            if (executorFailureEpoch.contains(execId) &&
+            // Ignore task completion for old attempt of indeterminate stage
+            val ignoreIndeterminate = stage.isIndeterminate &&
+              task.stageAttemptId < stage.latestInfo.attemptNumber()
+            if (!ignoreIndeterminate) {
+              shuffleStage.pendingPartitions -= task.partitionId
+              val status = event.result.asInstanceOf[MapStatus]
+              val execId = status.location.executorId
+              logDebug("ShuffleMapTask finished on " + execId)
+              if (executorFailureEpoch.contains(execId) &&
                 smt.epoch <= executorFailureEpoch(execId)) {
-              logInfo(s"Ignoring possibly bogus $smt completion from executor $execId")
+                logInfo(s"Ignoring possibly bogus $smt completion from executor $execId")
+              } else {
+                // The epoch of the task is acceptable (i.e., the task was launched after the most
+                // recent failure we're aware of for the executor), so mark the task's output as
+                // available.
+                mapOutputTracker.registerMapOutput(
+                  shuffleStage.shuffleDep.shuffleId, smt.partitionId, status)
+              }
             } else {
-              // The epoch of the task is acceptable (i.e., the task was launched after the most
-              // recent failure we're aware of for the executor), so mark the task's output as
-              // available.
-              mapOutputTracker.registerMapOutput(
-                shuffleStage.shuffleDep.shuffleId, smt.partitionId, status)
+              logInfo(s"Ignoring $smt completion from an older attempt of indeterminate stage")
             }
 
             if (runningStages.contains(shuffleStage) && shuffleStage.pendingPartitions.isEmpty) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index c7e4994e328f4..e351f8b95bbb0 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -3041,6 +3041,27 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     (shuffleId1, shuffleId2)
   }
 
+  private def constructTwoIndeterminateStage(): (Int, Int) = {
+    val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)
+
+    val shuffleDep1 = new ShuffleDependency(shuffleMapRdd1, new HashPartitioner(2))
+    val shuffleId1 = shuffleDep1.shuffleId
+    val shuffleMapRdd2 = new MyRDD(sc, 2, List(shuffleDep1), tracker = mapOutputTracker,
+      indeterminate = true)
+
+    val shuffleDep2 = new ShuffleDependency(shuffleMapRdd2, new HashPartitioner(2))
+    val shuffleId2 = shuffleDep2.shuffleId
+    val finalRdd = new MyRDD(sc, 2, List(shuffleDep2), tracker = mapOutputTracker)
+
+    submit(finalRdd, Array(0, 1))
+
+    // Finish the first shuffle map stage.
+    completeShuffleMapStageSuccessfully(0, 0, 2)
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    (shuffleId1, shuffleId2)
+  }
+
   test("SPARK-25341: abort stage while using old fetch protocol") {
     conf.set(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL.key, "true")
     // Construct the scenario of indeterminate stage fetch failed.
@@ -3099,6 +3120,89 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertDataStructuresEmpty()
   }
 
+  test("SPARK-45182: Ignore task completion from old stage after retrying indeterminate stages") {
+    val (shuffleId1, shuffleId2) = constructTwoIndeterminateStage()
+
+    // shuffleMapStage0 -> shuffleId1 -> shuffleMapStage1 -> shuffleId2 -> resultStage
+    val shuffleMapStage1 = scheduler.stageIdToStage(1).asInstanceOf[ShuffleMapStage]
+    val resultStage = scheduler.stageIdToStage(2).asInstanceOf[ResultStage]
+
+    // Shuffle map stage 0 is done
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) == Some(Seq.empty))
+    // Shuffle map stage 1 is still waiting for its 2 tasks to complete
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) == Some(Seq(0, 1)))
+    // The result stage is still waiting for its 2 tasks to complete
+    assert(resultStage.findMissingPartitions() == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // The first task of the shuffle map stage 1 fails with fetch failure
+    runEvent(makeCompletionEvent(
+      taskSets(1).tasks(0),
+      FetchFailed(makeBlockManagerId("hostA"), shuffleId1, 0L, 0, 0, "ignored"),
+      null))
+
+    // Both the stages should have been resubmitted
+    val newFailedStages = scheduler.failedStages.toSeq
+    assert(newFailedStages.map(_.id) == Seq(0, 1))
+
+    scheduler.resubmitFailedStages()
+
+    // Since shuffleId1 is indeterminate, all tasks of shuffle map stage 0 should be ran
+    assert(taskSets(2).stageId == 0)
+    assert(taskSets(2).stageAttemptId == 1)
+    assert(taskSets(2).tasks.length == 2)
+
+    // Complete the re-attempt of shuffle map stage 0
+    completeShuffleMapStageSuccessfully(0, 1, 2)
+    assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
+
+    // Since shuffleId2 is indeterminate, all tasks of shuffle map stage 1 should be ran
+    assert(taskSets(3).stageId == 1)
+    assert(taskSets(3).stageAttemptId == 1)
+    assert(taskSets(3).tasks.length == 2)
+
+    // The first task of the shuffle map stage 1 from 2nd attempt succeeds
+    runEvent(makeCompletionEvent(
+      taskSets(3).tasks(0),
+      Success,
+      makeMapStatus("hostB",
+        2)))
+
+    // The second task of the  shuffle map stage 1 from 1st attempt succeeds
+    runEvent(makeCompletionEvent(
+      taskSets(1).tasks(1),
+      Success,
+      makeMapStatus("hostC",
+        2)))
+
+    // This task completion should get ignored and partition 1 should be missing
+    // for shuffle map stage 1
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) == Some(Seq(1)))
+
+    // The second task of the shuffle map stage 1 from 2nd attempt succeeds
+    runEvent(makeCompletionEvent(
+      taskSets(3).tasks(1),
+      Success,
+      makeMapStatus("hostD",
+        2)))
+
+    // The shuffle map stage 1 should be done
+    assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
+
+    // The shuffle map outputs for shuffleId1 should be from latest attempt of shuffle map stage 1
+    assert(mapOutputTracker.getMapLocation(shuffleMapStage1.shuffleDep, 0, 2)
+      === Seq("hostB", "hostD"))
+
+    // Complete result stage
+    complete(taskSets(4), Seq((Success, 11), (Success, 12)))
+
+    // Job successfully ended
+    assert(results === Map(0 -> 11, 1 -> 12))
+    results.clear()
+    assertDataStructuresEmpty()
+  }
+
   test("SPARK-25341: continuous indeterminate stage roll back") {
     // shuffleMapRdd1/2/3 are all indeterminate.
     val shuffleMapRdd1 = new MyRDD(sc, 2, Nil, indeterminate = true)

From eb6b68daa4ef237d92575f799bafd97b1c3615b5 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 27 Sep 2023 16:00:11 +0800
Subject: [PATCH 031/521] [SPARK-45346][SQL] Parquet schema inference should
 respect case sensitive flag when merging schema

### What changes were proposed in this pull request?

Currently, when we infer schema from parquet files and try to merge the schema, it's always case-sensitive. Then a check fails later which tries to make sure the data schema of parquet fields does not have duplicated columns, in a case-insensitive way (the default).

This PR fixes the problem and make the schema merging respect the case sensitivity flag.

### Why are the changes needed?

bug fix

### Does this PR introduce _any_ user-facing change?

yes, spark can read some parquet files now.

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43134 from cloud-fan/merge-schema.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 1cbc424ae2acaf4d82f928cfea2767c81425305e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/types/StructType.scala   | 28 +++++++++++++------
 .../datasources/SchemaMergeUtils.scala        |  5 ++--
 .../parquet/ParquetSchemaSuite.scala          | 21 ++++++++++++++
 3 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 8edc7cf370b7d..8fd7f47b34624 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.types
 
+import java.util.Locale
+
 import scala.collection.{mutable, Map}
 import scala.util.Try
 import scala.util.control.NonFatal
@@ -476,8 +478,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    * 4. Otherwise, `this` and `that` are considered as conflicting schemas and an exception would be
    *    thrown.
    */
-  private[sql] def merge(that: StructType): StructType =
-    StructType.merge(this, that).asInstanceOf[StructType]
+  private[sql] def merge(that: StructType, caseSensitive: Boolean = true): StructType =
+    StructType.merge(this, that, caseSensitive).asInstanceOf[StructType]
 
   override private[spark] def asNullable: StructType = {
     val newFields = fields.map {
@@ -561,16 +563,20 @@ object StructType extends AbstractDataType {
       StructType(newFields)
     })
 
-  private[sql] def merge(left: DataType, right: DataType): DataType =
+  private[sql] def merge(left: DataType, right: DataType, caseSensitive: Boolean = true): DataType =
     mergeInternal(left, right, (s1: StructType, s2: StructType) => {
       val leftFields = s1.fields
       val rightFields = s2.fields
       val newFields = mutable.ArrayBuffer.empty[StructField]
 
-      val rightMapped = fieldsMap(rightFields)
+      def normalize(name: String): String = {
+        if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
+      }
+
+      val rightMapped = fieldsMap(rightFields, caseSensitive)
       leftFields.foreach {
         case leftField @ StructField(leftName, leftType, leftNullable, _) =>
-          rightMapped.get(leftName)
+          rightMapped.get(normalize(leftName))
             .map { case rightField @ StructField(rightName, rightType, rightNullable, _) =>
               try {
                 leftField.copy(
@@ -588,9 +594,9 @@ object StructType extends AbstractDataType {
             .foreach(newFields += _)
       }
 
-      val leftMapped = fieldsMap(leftFields)
+      val leftMapped = fieldsMap(leftFields, caseSensitive)
       rightFields
-        .filterNot(f => leftMapped.get(f.name).nonEmpty)
+        .filterNot(f => leftMapped.contains(normalize(f.name)))
         .foreach { f =>
           newFields += f
         }
@@ -643,11 +649,15 @@ object StructType extends AbstractDataType {
         throw DataTypeErrors.cannotMergeIncompatibleDataTypesError(left, right)
     }
 
-  private[sql] def fieldsMap(fields: Array[StructField]): Map[String, StructField] = {
+  private[sql] def fieldsMap(
+      fields: Array[StructField],
+      caseSensitive: Boolean = true): Map[String, StructField] = {
     // Mimics the optimization of breakOut, not present in Scala 2.13, while working in 2.12
     val map = mutable.Map[String, StructField]()
     map.sizeHint(fields.length)
-    fields.foreach(s => map.put(s.name, s))
+    fields.foreach { s =>
+      if (caseSensitive) map.put(s.name, s) else map.put(s.name.toLowerCase(Locale.ROOT), s)
+    }
     map
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
index 35d9b5d60348d..cf0e67ecc30fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
@@ -64,6 +64,7 @@ object SchemaMergeUtils extends Logging {
 
     val ignoreCorruptFiles =
       new FileSourceOptions(CaseInsensitiveMap(parameters)).ignoreCorruptFiles
+    val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
 
     // Issues a Spark job to read Parquet/ORC schema in parallel.
     val partiallyMergedSchemas =
@@ -84,7 +85,7 @@ object SchemaMergeUtils extends Logging {
             var mergedSchema = schemas.head
             schemas.tail.foreach { schema =>
               try {
-                mergedSchema = mergedSchema.merge(schema)
+                mergedSchema = mergedSchema.merge(schema, caseSensitive)
               } catch { case cause: SparkException =>
                 throw QueryExecutionErrors.failedMergingSchemaError(mergedSchema, schema, cause)
               }
@@ -99,7 +100,7 @@ object SchemaMergeUtils extends Logging {
       var finalSchema = partiallyMergedSchemas.head
       partiallyMergedSchemas.tail.foreach { schema =>
         try {
-          finalSchema = finalSchema.merge(schema)
+          finalSchema = finalSchema.merge(schema, caseSensitive)
         } catch { case cause: SparkException =>
           throw QueryExecutionErrors.failedMergingSchemaError(finalSchema, schema, cause)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 30f46a3cac2d3..facc9b90ff778 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -996,6 +996,27 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     }
   }
 
+  test("SPARK-45346: merge schema should respect case sensitivity") {
+    import testImplicits._
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        withTempPath { path =>
+          Seq(1).toDF("col").write.mode("append").parquet(path.getCanonicalPath)
+          Seq(2).toDF("COL").write.mode("append").parquet(path.getCanonicalPath)
+          val df = spark.read.option("mergeSchema", "true").parquet(path.getCanonicalPath)
+          if (caseSensitive) {
+            assert(df.columns.toSeq.sorted == Seq("COL", "col"))
+            assert(df.collect().length == 2)
+          } else {
+            // The final column name depends on which file is listed first, and is a bit random.
+            assert(df.columns.toSeq.map(_.toLowerCase(java.util.Locale.ROOT)) == Seq("col"))
+            assert(df.collect().length == 2)
+          }
+        }
+      }
+    }
+  }
+
   // =======================================
   // Tests for parquet schema mismatch error
   // =======================================

From e6e0c074636c1a43fa5957f906a881469515393a Mon Sep 17 00:00:00 2001
From: zeruibao <zerui.bao@databricks.com>
Date: Wed, 27 Sep 2023 16:42:35 +0800
Subject: [PATCH 032/521] [SPARK-43380][SQL] Fix Avro data type conversion
 issues without causing performance regression

### What changes were proposed in this pull request?
My last PR https://github.com/apache/spark/pull/41052 causes AVRO read performance regression since I change the code structure. I turn one match case into a nested match case. So I fix the Avro data type conversion issues in anther way to avoid this regression.

Original Change:
We introduce the SQLConf `spark.sql.legacy.avro.allowReadingWithIncompatibleSchema` to prevent reading interval types as date or timestamp types to avoid getting corrupt dates as well as reading decimal types with incorrect precision.

### Why are the changes needed?
We found the following issues with open source Avro:

- Interval types can be read as date or timestamp types that would lead to wildly different results
   For example, `Duration.ofDays(1).plusSeconds(1)` will be read as `1972-09-27`, which is weird.
- Decimal types can be read with lower precision, that leads to data being read as `null` instead of suggesting that a wider decimal format should be provided

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Old unit test

Closes #42503 from zeruibao/SPARK-4380-real-fix-regression.

Lead-authored-by: zeruibao <zerui.bao@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit f8c87f03297e2770e2944e8e8fe097b75f9e8fea)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/resources/error/error-classes.json   |   5 +
 .../spark/sql/avro/AvroDeserializer.scala     |  46 ++++-
 .../org/apache/spark/sql/avro/AvroSuite.scala | 158 ++++++++++++++++++
 docs/sql-error-conditions.md                  |   6 +
 docs/sql-migration-guide.md                   |   1 +
 .../sql/errors/QueryCompilationErrors.scala   |  16 ++
 .../apache/spark/sql/internal/SQLConf.scala   |  12 ++
 7 files changed, 235 insertions(+), 9 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 477fe9b3f614e..9bc65ae32a276 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -69,6 +69,11 @@
       }
     }
   },
+  "AVRO_INCOMPATIBLE_READ_TYPE" : {
+    "message" : [
+      "Cannot convert Avro <avroPath> to SQL <sqlPath> because the original encoded data type is <avroType>, however you're trying to read the field as <sqlType>, which would lead to an incorrect answer. To allow reading this field, enable the SQL configuration: \"spark.sql.legacy.avro.allowIncompatibleSchema\"."
+    ]
+  },
   "BATCH_METADATA_NOT_FOUND" : {
     "message" : [
       "Unable to find batch <batchMetadataFile>."
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index a78ee89a3e933..e82116eec1e9c 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -35,8 +35,9 @@ import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArr
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_DAY
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
-import org.apache.spark.sql.internal.LegacyBehaviorPolicy
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -117,6 +118,10 @@ private[sql] class AvroDeserializer(
     val incompatibleMsg = errorPrefix +
         s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
 
+    val realDataType = SchemaConverters.toSqlType(avroType).dataType
+    val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA
+    val preventReadingIncorrectType = !SQLConf.get.getConf(confKey)
+
     (avroType.getType, catalystType) match {
       case (NULL, NullType) => (updater, ordinal, _) =>
         updater.setNullAt(ordinal)
@@ -128,9 +133,19 @@ private[sql] class AvroDeserializer(
       case (INT, IntegerType) => (updater, ordinal, value) =>
         updater.setInt(ordinal, value.asInstanceOf[Int])
 
+      case (INT, dt: DatetimeType)
+        if preventReadingIncorrectType && realDataType.isInstanceOf[YearMonthIntervalType] =>
+        throw QueryCompilationErrors.avroIncompatibleReadError(toFieldStr(avroPath),
+          toFieldStr(catalystPath), realDataType.catalogString, dt.catalogString)
+
       case (INT, DateType) => (updater, ordinal, value) =>
         updater.setInt(ordinal, dateRebaseFunc(value.asInstanceOf[Int]))
 
+      case (LONG, dt: DatetimeType)
+        if preventReadingIncorrectType && realDataType.isInstanceOf[DayTimeIntervalType] =>
+        throw QueryCompilationErrors.avroIncompatibleReadError(toFieldStr(avroPath),
+          toFieldStr(catalystPath), realDataType.catalogString, dt.catalogString)
+
       case (LONG, LongType) => (updater, ordinal, value) =>
         updater.setLong(ordinal, value.asInstanceOf[Long])
 
@@ -204,17 +219,30 @@ private[sql] class AvroDeserializer(
         }
         updater.set(ordinal, bytes)
 
-      case (FIXED, _: DecimalType) => (updater, ordinal, value) =>
+      case (FIXED, dt: DecimalType) =>
         val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
-        val bigDecimal = decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType, d)
-        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
-        updater.setDecimal(ordinal, decimal)
+        if (preventReadingIncorrectType &&
+          d.getPrecision - d.getScale > dt.precision - dt.scale) {
+          throw QueryCompilationErrors.avroIncompatibleReadError(toFieldStr(avroPath),
+            toFieldStr(catalystPath), realDataType.catalogString, dt.catalogString)
+        }
+        (updater, ordinal, value) =>
+          val bigDecimal =
+            decimalConversions.fromFixed(value.asInstanceOf[GenericFixed], avroType, d)
+          val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+          updater.setDecimal(ordinal, decimal)
 
-      case (BYTES, _: DecimalType) => (updater, ordinal, value) =>
+      case (BYTES, dt: DecimalType) =>
         val d = avroType.getLogicalType.asInstanceOf[LogicalTypes.Decimal]
-        val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType, d)
-        val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
-        updater.setDecimal(ordinal, decimal)
+        if (preventReadingIncorrectType &&
+          d.getPrecision - d.getScale > dt.precision - dt.scale) {
+          throw QueryCompilationErrors.avroIncompatibleReadError(toFieldStr(avroPath),
+            toFieldStr(catalystPath), realDataType.catalogString, dt.catalogString)
+        }
+        (updater, ordinal, value) =>
+          val bigDecimal = decimalConversions.fromBytes(value.asInstanceOf[ByteBuffer], avroType, d)
+          val decimal = createDecimal(bigDecimal, d.getPrecision, d.getScale)
+          updater.setDecimal(ordinal, decimal)
 
       case (RECORD, st: StructType) =>
         // Avro datasource doesn't accept filters with nested attributes. See SPARK-32328.
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index d22a2d3697579..ffb0a49641b59 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -32,6 +32,7 @@ import org.apache.avro.file.{DataFileReader, DataFileWriter}
 import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
 import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed}
 import org.apache.commons.io.FileUtils
+import org.apache.commons.lang3.exception.ExceptionUtils
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf, SparkException, SparkUpgradeException}
 import org.apache.spark.TestUtils.assertExceptionMsg
@@ -814,6 +815,163 @@ abstract class AvroSuite
     }
   }
 
+  test("SPARK-43380: Fix Avro data type conversion" +
+    " of decimal type to avoid producing incorrect results") {
+    withTempPath { path =>
+      val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA.key
+      sql("SELECT 13.1234567890 a").write.format("avro").save(path.toString)
+      // With the flag disabled, we will throw an exception if there is a mismatch
+      withSQLConf(confKey -> "false") {
+        val e = intercept[SparkException] {
+          spark.read.schema("a DECIMAL(4, 3)").format("avro").load(path.toString).collect()
+        }
+        ExceptionUtils.getRootCause(e) match {
+          case ex: AnalysisException =>
+            checkError(
+              exception = ex,
+              errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+              parameters = Map("avroPath" -> "field 'a'",
+                "sqlPath" -> "field 'a'",
+                "avroType" -> "decimal\\(12,10\\)",
+                "sqlType" -> "\"DECIMAL\\(4,3\\)\""),
+              matchPVals = true
+            )
+          case other =>
+            fail(s"Received unexpected exception", other)
+        }
+      }
+      // The following used to work, so it should still work with the flag enabled
+      checkAnswer(
+        spark.read.schema("a DECIMAL(5, 3)").format("avro").load(path.toString),
+        Row(new java.math.BigDecimal("13.123"))
+      )
+      withSQLConf(confKey -> "true") {
+        // With the flag enabled, we return a null silently, which isn't great
+        checkAnswer(
+          spark.read.schema("a DECIMAL(4, 3)").format("avro").load(path.toString),
+          Row(null)
+        )
+        checkAnswer(
+          spark.read.schema("a DECIMAL(5, 3)").format("avro").load(path.toString),
+          Row(new java.math.BigDecimal("13.123"))
+        )
+      }
+    }
+  }
+
+  test("SPARK-43380: Fix Avro data type conversion" +
+    " of DayTimeIntervalType to avoid producing incorrect results") {
+    withTempPath { path =>
+      val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA.key
+      val schema = StructType(Array(StructField("a", DayTimeIntervalType(), false)))
+      val data = Seq(Row(java.time.Duration.ofDays(1).plusSeconds(1)))
+
+      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)
+      df.write.format("avro").save(path.getCanonicalPath)
+
+      withSQLConf(confKey -> "false") {
+        Seq("DATE", "TIMESTAMP", "TIMESTAMP_NTZ").foreach { sqlType =>
+          val e = intercept[SparkException] {
+            spark.read.schema(s"a $sqlType").format("avro").load(path.toString).collect()
+          }
+
+          ExceptionUtils.getRootCause(e) match {
+            case ex: AnalysisException =>
+              checkError(
+                exception = ex,
+                errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+                parameters = Map("avroPath" -> "field 'a'",
+                  "sqlPath" -> "field 'a'",
+                  "avroType" -> "interval day to second",
+                  "sqlType" -> s""""$sqlType""""),
+                matchPVals = true
+              )
+            case other =>
+              fail(s"Received unexpected exception", other)
+          }
+        }
+      }
+
+      withSQLConf(confKey -> "true") {
+        // Allow conversion and do not need to check result
+        spark.read.schema("a Date").format("avro").load(path.toString)
+        spark.read.schema("a timestamp").format("avro").load(path.toString)
+        spark.read.schema("a timestamp_ntz").format("avro").load(path.toString)
+      }
+    }
+  }
+
+  test("SPARK-43380: Fix Avro data type conversion" +
+    " of YearMonthIntervalType to avoid producing incorrect results") {
+    withTempPath { path =>
+      val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA.key
+      val schema = StructType(Array(StructField("a", YearMonthIntervalType(), false)))
+      val data = Seq(Row(java.time.Period.of(1, 1, 0)))
+
+      val df = spark.createDataFrame(sparkContext.parallelize(data), schema)
+      df.write.format("avro").save(path.getCanonicalPath)
+
+      withSQLConf(confKey -> "false") {
+        Seq("DATE", "TIMESTAMP", "TIMESTAMP_NTZ").foreach { sqlType =>
+          val e = intercept[SparkException] {
+            spark.read.schema(s"a $sqlType").format("avro").load(path.toString).collect()
+          }
+
+          ExceptionUtils.getRootCause(e) match {
+            case ex: AnalysisException =>
+              checkError(
+                exception = ex,
+                errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+                parameters = Map("avroPath" -> "field 'a'",
+                  "sqlPath" -> "field 'a'",
+                  "avroType" -> "interval year to month",
+                  "sqlType" -> s""""$sqlType""""),
+                matchPVals = true
+              )
+            case other =>
+              fail(s"Received unexpected exception", other)
+          }
+        }
+      }
+
+      withSQLConf(confKey -> "true") {
+        // Allow conversion and do not need to check result
+        spark.read.schema("a Date").format("avro").load(path.toString)
+        spark.read.schema("a timestamp").format("avro").load(path.toString)
+        spark.read.schema("a timestamp_ntz").format("avro").load(path.toString)
+      }
+    }
+  }
+
+  Seq(
+    "time-millis",
+    "time-micros",
+    "timestamp-micros",
+    "timestamp-millis",
+    "local-timestamp-millis",
+    "local-timestamp-micros"
+  ).foreach { timeLogicalType =>
+    test(s"converting $timeLogicalType type to long in avro") {
+      withTempPath { path =>
+        val df = Seq(100L)
+          .toDF("dt")
+        val avroSchema =
+          s"""
+             |{
+             |  "type" : "record",
+             |  "name" : "test_schema",
+             |  "fields" : [
+             |    {"name": "dt", "type": {"type": "long", "logicalType": "$timeLogicalType"}}
+             |  ]
+             |}""".stripMargin
+        df.write.format("avro").option("avroSchema", avroSchema).save(path.getCanonicalPath)
+        checkAnswer(
+          spark.read.schema(s"dt long").format("avro").load(path.toString),
+          Row(100L))
+      }
+    }
+  }
+
   test("converting some specific sparkSQL types to avro") {
     withTempPath { tempDir =>
       val testSchema = StructType(Seq(
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index e7df1aa9a4f9c..90d21f9758573 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -87,6 +87,12 @@ Invalid as-of join.
 
 For more details see [AS_OF_JOIN](sql-error-conditions-as-of-join-error-class.html)
 
+### AVRO_INCOMPATIBLE_READ_TYPE
+
+SQLSTATE: none assigned
+
+Cannot convert Avro `<avroPath>` to SQL `<sqlPath>` because the original encoded data type is `<avroType>`, however you're trying to read the field as `<sqlType>`, which would lead to an incorrect answer. To allow reading this field, enable the SQL configuration: "spark.sql.legacy.avro.allowIncompatibleSchema".
+
 ### BATCH_METADATA_NOT_FOUND
 
 [SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5fc323ec1b0ea..5cf0b28982c24 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -30,6 +30,7 @@ license: |
 - Since Spark 3.5, the `plan` field is moved from `AnalysisException` to `EnhancedAnalysisException`.
 - Since Spark 3.5, `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` is enabled by default. To restore the previous behavior, set `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` to `false`.
 - Since Spark 3.5, the `array_insert` function is 1-based for negative indexes. It inserts new element at the end of input arrays for the index -1. To restore the previous behavior, set `spark.sql.legacy.negativeIndexInArrayInsert` to `true`.
+- Since Spark 3.5, the Avro will throw `AnalysisException` when reading Interval types as Date or Timestamp types, or reading Decimal types with lower precision. To restore the legacy behavior, set `spark.sql.legacy.avro.allowIncompatibleSchema` to `true`
 
 ## Upgrading from Spark SQL 3.3 to 3.4
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 14882a7006173..9dca2c5f2822e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -3672,6 +3672,22 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
+  def avroIncompatibleReadError(
+      avroPath: String,
+      sqlPath: String,
+      avroType: String,
+      sqlType: String): Throwable = {
+    new AnalysisException(
+      errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+      messageParameters = Map(
+        "avroPath" -> avroPath,
+        "sqlPath" -> sqlPath,
+        "avroType" -> avroType,
+        "sqlType" -> toSQLType(sqlType)
+      )
+    )
+  }
+
   def optionMustBeLiteralString(key: String): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2e0ce7c4dea9d..73d3756ef6b93 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4251,6 +4251,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA =
+    buildConf("spark.sql.legacy.avro.allowIncompatibleSchema")
+      .internal()
+      .doc("When set to false, if types in Avro are encoded in the same format, but " +
+        "the type in the Avro schema explicitly says that the data types are different, " +
+        "reject reading the data type in the format to avoid returning incorrect results. " +
+        "When set to true, it restores the legacy behavior of allow reading the data in the" +
+        " format, which may return incorrect results.")
+      .version("3.5.1")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME =
     buildConf("spark.sql.legacy.v1IdentifierNoCatalog")
       .internal()

From 917bc8cb92728267fb93891f4ef9da13c06e4589 Mon Sep 17 00:00:00 2001
From: Yihong He <yihong.he@databricks.com>
Date: Thu, 28 Sep 2023 12:58:07 -0400
Subject: [PATCH 033/521] [SPARK-45360][SQL][CONNECT] Initialize spark session
 builder configuration from SPARK_REMOTE

### What changes were proposed in this pull request?

- Initialize spark session builder configuration from SPARK_REMOTE

### Why are the changes needed?

- `SparkSession.builder().getOrCreate()` should follow the behavior documents [here](https://github.com/apache/spark/blob/2cc1ee4d3a05a641d7a245f015ef824d8f7bae8b/docs/spark-connect-overview.md?plain=1#L241-L244) and support initialization from SPARK_REMOTE

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

- `build/sbt "connect-client-jvm/testOnly *SparkConnectClientSuite"`

### Was this patch authored or co-authored using generative AI tooling?

Closes #43153 from heyihong/SPARK-45360.

Authored-by: Yihong He <yihong.he@databricks.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
(cherry picked from commit 183a3d761f36d35572cfb37ab921b6a86f8f28ed)
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 .../org/apache/spark/sql/SparkSession.scala   |  5 +-
 .../client/SparkConnectClientSuite.scala      | 61 +++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 7bd8fa59aea8f..421f37b9e8a62 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -783,7 +783,10 @@ object SparkSession extends Logging {
   }
 
   class Builder() extends Logging {
-    private val builder = SparkConnectClient.builder()
+    // Initialize the connection string of the Spark Connect client builder from SPARK_REMOTE
+    // by default, if it exists. The connection string can be overridden using
+    // the remote() function, as it takes precedence over the SPARK_REMOTE environment variable.
+    private val builder = SparkConnectClient.builder().loadFromEnvironment()
     private var client: SparkConnectClient = _
     private[this] val options = new scala.collection.mutable.HashMap[String, String]
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
index 80e245ec78b7d..89acc2c60ac21 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -86,6 +86,24 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
     assert(response.getSessionId === "abc123")
   }
 
+  private def withEnvs(pairs: (String, String)*)(f: => Unit): Unit = {
+    val readonlyEnv = System.getenv()
+    val field = readonlyEnv.getClass.getDeclaredField("m")
+    field.setAccessible(true)
+    val modifiableEnv = field.get(readonlyEnv).asInstanceOf[java.util.Map[String, String]]
+    try {
+      for ((k, v) <- pairs) {
+        assert(!modifiableEnv.containsKey(k))
+        modifiableEnv.put(k, v)
+      }
+      f
+    } finally {
+      for ((k, _) <- pairs) {
+        modifiableEnv.remove(k)
+      }
+    }
+  }
+
   test("Test connection") {
     testClientConnection() { testPort => SparkConnectClient.builder().port(testPort).build() }
   }
@@ -112,6 +130,49 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
     }
   }
 
+  test("SparkSession create with SPARK_REMOTE") {
+    startDummyServer(0)
+
+    withEnvs("SPARK_REMOTE" -> s"sc://localhost:${server.getPort}") {
+      val session = SparkSession.builder().create()
+      val df = session.range(10)
+      df.analyze // Trigger RPC
+      assert(df.plan === service.getAndClearLatestInputPlan())
+
+      val session2 = SparkSession.builder().create()
+      assert(session != session2)
+    }
+  }
+
+  test("SparkSession getOrCreate with SPARK_REMOTE") {
+    startDummyServer(0)
+
+    withEnvs("SPARK_REMOTE" -> s"sc://localhost:${server.getPort}") {
+      val session = SparkSession.builder().getOrCreate()
+
+      val df = session.range(10)
+      df.analyze // Trigger RPC
+      assert(df.plan === service.getAndClearLatestInputPlan())
+
+      val session2 = SparkSession.builder().getOrCreate()
+      assert(session === session2)
+    }
+  }
+
+  test("Builder.remote takes precedence over SPARK_REMOTE") {
+    startDummyServer(0)
+    val incorrectUrl = s"sc://localhost:${server.getPort + 1}"
+
+    withEnvs("SPARK_REMOTE" -> incorrectUrl) {
+      val session =
+        SparkSession.builder().remote(s"sc://localhost:${server.getPort}").getOrCreate()
+
+      val df = session.range(10)
+      df.analyze // Trigger RPC
+      assert(df.plan === service.getAndClearLatestInputPlan())
+    }
+  }
+
   test("SparkSession initialisation with connection string") {
     startDummyServer(0)
     client = SparkConnectClient

From 3a723a171b1b77a8a8b2ccce2bd489acb8db00a3 Mon Sep 17 00:00:00 2001
From: Hasnain Lakhani <hasnain.lakhani@databricks.com>
Date: Thu, 28 Sep 2023 18:16:49 -0500
Subject: [PATCH 034/521] [SPARK-44937][CORE] Mark connection as timedOut in
 TransportClient.close

### What changes were proposed in this pull request?

This PR avoids a race condition where a connection which is in the process of being closed could be returned by the TransportClientFactory only to be immediately closed and cause errors upon use.

This race condition is rare and not easily triggered, but with the upcoming changes to introduce SSL connection support, connection closing can take just a slight bit longer and it's much easier to trigger this issue.

Looking at the history of the code I believe this was an oversight in https://github.com/apache/spark/pull/9853.

### Why are the changes needed?

Without this change, some of the new tests added in https://github.com/apache/spark/pull/42685 would fail

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests were run in CI.
Without this change, some of the new tests added in https://github.com/apache/spark/pull/42685 fail

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43162 from hasnain-db/spark-tls-timeout.

Authored-by: Hasnain Lakhani <hasnain.lakhani@databricks.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
(cherry picked from commit 2a88feadd4b7cec9e01bc744e589783e3390e5ce)
Signed-off-by: Mridul Muralidharan <mridulatgmail.com>
---
 .../org/apache/spark/network/client/TransportClient.java     | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index 4a0a156699852..40825e06b82fd 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -325,7 +325,10 @@ public TransportResponseHandler getHandler() {
 
   @Override
   public void close() {
-    // close is a local operation and should finish with milliseconds; timeout just to be safe
+    // Mark the connection as timed out, so we do not return a connection that's being closed
+    // from the TransportClientFactory if closing takes some time (e.g. with SSL)
+    this.timedOut = true;
+    // close should not take this long; use a timeout just to be safe
     channel.close().awaitUninterruptibly(10, TimeUnit.SECONDS);
   }
 

From aeba488ccd9213d28e6401d1bf7eadfdb9d955c3 Mon Sep 17 00:00:00 2001
From: Warren Zhu <warren.zhu25@gmail.com>
Date: Thu, 28 Sep 2023 18:51:33 -0500
Subject: [PATCH 035/521] [SPARK-45057][CORE] Avoid acquire read lock when
 keepReadLock is false

### What changes were proposed in this pull request?
Add `keepReadLock` parameter in `lockNewBlockForWriting()`. When `keepReadLock` is `false`, skip `lockForReading()` to avoid block on read Lock or potential deadlock issue.

When 2 tasks try to compute same rdd with replication level of 2 and running on only 2 executors. Deadlock will happen. Details refer [SPARK-45057]

Task thread hold write lock and waiting for replication to remote executor while shuffle server thread which handling block upload request waiting on `lockForReading` in [BlockInfoManager.scala](https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala#L457C24-L457C24)

### Why are the changes needed?
This could save unnecessary read lock acquire and avoid deadlock issue mention above.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added UT in BlockInfoManagerSuite

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43067 from warrenzhu25/deadlock.

Authored-by: Warren Zhu <warren.zhu25@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
(cherry picked from commit 0d6fda5bbee99f9d1821952195efc6764816ec2f)
Signed-off-by: Mridul Muralidharan <mridulatgmail.com>
---
 .../apache/spark/storage/BlockInfoManager.scala    | 11 +++++++----
 .../org/apache/spark/storage/BlockManager.scala    |  6 +-----
 .../spark/storage/BlockInfoManagerSuite.scala      | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
index 45ebb6eafa69f..ab4073fe8c05c 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
@@ -415,13 +415,14 @@ private[storage] class BlockInfoManager(trackingCacheVisibility: Boolean = false
    * then just go ahead and acquire the write lock. Otherwise, if another thread is already
    * writing the block, then we wait for the write to finish before acquiring the read lock.
    *
-   * @return true if the block did not already exist, false otherwise. If this returns false, then
-   *         a read lock on the existing block will be held. If this returns true, a write lock on
-   *         the new block will be held.
+   * @return true if the block did not already exist, false otherwise.
+   *         If this returns true, a write lock on the new block will be held.
+   *         If this returns false then a read lock will be held iff keepReadLock == true.
    */
   def lockNewBlockForWriting(
       blockId: BlockId,
-      newBlockInfo: BlockInfo): Boolean = {
+      newBlockInfo: BlockInfo,
+      keepReadLock: Boolean = true): Boolean = {
     logTrace(s"Task $currentTaskAttemptId trying to put $blockId")
     // Get the lock that will be associated with the to-be written block and lock it for the entire
     // duration of this operation. This way we prevent race conditions when two threads try to write
@@ -449,6 +450,8 @@ private[storage] class BlockInfoManager(trackingCacheVisibility: Boolean = false
           val result = lockForWriting(blockId, blocking = false)
           assert(result.isDefined)
           return true
+        } else if (!keepReadLock) {
+          return false
         } else {
           // Block already exists. This could happen if another thread races with us to compute
           // the same block. In this case we try to acquire a read lock, if the locking succeeds
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 05d57c67576a5..6de6069d2fea5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1510,14 +1510,10 @@ private[spark] class BlockManager(
 
     val putBlockInfo = {
       val newInfo = new BlockInfo(level, classTag, tellMaster)
-      if (blockInfoManager.lockNewBlockForWriting(blockId, newInfo)) {
+      if (blockInfoManager.lockNewBlockForWriting(blockId, newInfo, keepReadLock)) {
         newInfo
       } else {
         logWarning(s"Block $blockId already exists on this machine; not re-adding it")
-        if (!keepReadLock) {
-          // lockNewBlockForWriting returned a read lock on the existing block, so we must free it:
-          releaseLock(blockId)
-        }
         return None
       }
     }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 3708f0aa67223..f133a38269d71 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -166,6 +166,20 @@ class BlockInfoManagerSuite extends SparkFunSuite {
     assert(blockInfoManager.get("block").get.readerCount === 1)
   }
 
+  test("lockNewBlockForWriting should not block when keepReadLock is false") {
+    withTaskId(0) {
+      assert(blockInfoManager.lockNewBlockForWriting("block", newBlockInfo()))
+    }
+    val lock1Future = Future {
+      withTaskId(1) {
+        blockInfoManager.lockNewBlockForWriting("block", newBlockInfo(), false)
+      }
+    }
+
+    assert(!ThreadUtils.awaitResult(lock1Future, 1.seconds))
+    assert(blockInfoManager.get("block").get.readerCount === 0)
+  }
+
   test("read locks are reentrant") {
     withTaskId(1) {
       assert(blockInfoManager.lockNewBlockForWriting("block", newBlockInfo()))

From 5cdb4ab6f0b7aea7d890ee7dff61350671a09e79 Mon Sep 17 00:00:00 2001
From: Bo Xiong <xiongbo@amazon.com>
Date: Thu, 28 Sep 2023 22:53:37 -0500
Subject: [PATCH 036/521] [SPARK-45227][CORE] Fix a subtle thread-safety issue
 with CoarseGrainedExecutorBackend

### What changes were proposed in this pull request?
Fix a subtle thread-safety issue with CoarseGrainedExecutorBackend where an executor process randomly gets stuck

### Why are the changes needed?
For each executor, the single-threaded dispatcher can run into an "infinite loop" (as explained in the SPARK-45227). Once an executor process runs into a state, it'd stop launching tasks from the driver or reporting task status back.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
```
$ build/mvn package -DskipTests -pl core
$ build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.executor.CoarseGrainedExecutorBackendSuite test
```

### Was this patch authored or co-authored using generative AI tooling?
No

******************************************************************************
**_Please feel free to skip reading unless you're interested in details_**
******************************************************************************

### Symptom

Our Spark 3 app running on EMR 6.10.0 with Spark 3.3.1 got stuck in the very last step of writing a data frame to S3 by calling `df.write`. Looking at Spark UI, we saw that an executor process hung over 1 hour. After we manually killed the executor process, the app succeeded. Note that the same EMR cluster with two worker nodes was able to run the same app without any issue before and after the incident.

Below is what's observed from relevant container logs and thread dump.

- A regular task that's sent to the executor, which also reported back to the driver upon the task completion.

```
    $zgrep 'task 150' container_1694029806204_12865_01_000001/stderr.gz
    23/09/12 18:13:55 INFO TaskSetManager: Starting task 150.0 in stage 23.0 (TID 923) (ip-10-0-185-107.ec2.internal, executor 3, partition 150, NODE_LOCAL, 4432 bytes) taskResourceAssignments Map()
    23/09/12 18:13:55 INFO TaskSetManager: Finished task 150.0 in stage 23.0 (TID 923) in 126 ms on ip-10-0-185-107.ec2.internal (executor 3) (16/200)

    $zgrep ' 923' container_1694029806204_12865_01_000004/stderr.gz
    23/09/12 18:13:55 INFO YarnCoarseGrainedExecutorBackend: Got assigned task 923

    $zgrep 'task 150' container_1694029806204_12865_01_000004/stderr.gz
    23/09/12 18:13:55 INFO Executor: Running task 150.0 in stage 23.0 (TID 923)
    23/09/12 18:13:55 INFO Executor: Finished task 150.0 in stage 23.0 (TID 923). 4495 bytes result sent to driver
```

- Another task that's sent to the executor but didn't get launched since the single-threaded dispatcher was stuck (presumably in an "infinite loop" as explained later).

```
    $zgrep 'task 153' container_1694029806204_12865_01_000001/stderr.gz
    23/09/12 18:13:55 INFO TaskSetManager: Starting task 153.0 in stage 23.0 (TID 924) (ip-10-0-185-107.ec2.internal, executor 3, partition 153, NODE_LOCAL, 4432 bytes) taskResourceAssignments Map()

    $zgrep ' 924' container_1694029806204_12865_01_000004/stderr.gz
    23/09/12 18:13:55 INFO YarnCoarseGrainedExecutorBackend: Got assigned task 924

    $zgrep 'task 153' container_1694029806204_12865_01_000004/stderr.gz
    >> note that the above command has no matching result, indicating that task 153.0 in stage 23.0 (TID 924) was never launched
```

- Thread dump shows that the dispatcher-Executor thread has the following stack trace.

```
    "dispatcher-Executor" #40 daemon prio=5 os_prio=0 tid=0x0000ffff98e37800 nid=0x1aff runnable [0x0000ffff73bba000]
    java.lang.Thread.State: RUNNABLE
    at scala.runtime.BoxesRunTime.equalsNumObject(BoxesRunTime.java:142)
    at scala.runtime.BoxesRunTime.equals2(BoxesRunTime.java:131)
    at scala.runtime.BoxesRunTime.equals(BoxesRunTime.java:123)
    at scala.collection.mutable.HashTable.elemEquals(HashTable.scala:365)
    at scala.collection.mutable.HashTable.elemEquals$(HashTable.scala:365)
    at scala.collection.mutable.HashMap.elemEquals(HashMap.scala:44)
    at scala.collection.mutable.HashTable.findEntry0(HashTable.scala:140)
    at scala.collection.mutable.HashTable.findOrAddEntry(HashTable.scala:169)
    at scala.collection.mutable.HashTable.findOrAddEntry$(HashTable.scala:167)
    at scala.collection.mutable.HashMap.findOrAddEntry(HashMap.scala:44)
    at scala.collection.mutable.HashMap.put(HashMap.scala:126)
    at scala.collection.mutable.HashMap.update(HashMap.scala:131)
    at org.apache.spark.executor.CoarseGrainedExecutorBackend$$anonfun$receive$1.applyOrElse(CoarseGrainedExecutorBackend.scala:200)
    at org.apache.spark.rpc.netty.Inbox.$anonfun$process$1(Inbox.scala:115)
    at org.apache.spark.rpc.netty.Inbox$$Lambda$323/1930826709.apply$mcV$sp(Unknown Source)
    at org.apache.spark.rpc.netty.Inbox.safelyCall(Inbox.scala:213)
    at org.apache.spark.rpc.netty.Inbox.process(Inbox.scala:100)
    at org.apache.spark.rpc.netty.MessageLoop.org$apache$spark$rpc$netty$MessageLoop$$receiveLoop(MessageLoop.scala:75)
    at org.apache.spark.rpc.netty.MessageLoop$$anon$1.run(MessageLoop.scala:41)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:750)
```

### Relevant code paths

Within an executor process, there's a [dispatcher thread](https://github.com/apache/spark/blob/1fdd46f173f7bc90e0523eb0a2d5e8e27e990102/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala#L170) dedicated to CoarseGrainedExecutorBackend(a single RPC endpoint) that launches tasks scheduled by the driver. Each task is run on a TaskRunner thread backed by a thread pool created for the executor. The TaskRunner thread and the dispatcher thread are different. However, they read and write a common object (i.e., taskResources) that's a mutable hashmap without thread-safety, in [Executor](https://github.com/apache/spark/blob/1fdd46f173f7bc90e0523eb0a2d5e8e27e990102/core/src/main/scala/org/apache/spark/executor/Executor.scala#L561) and [CoarseGrainedExecutorBackend](https://github.com/apache/spark/blob/1fdd46f173f7bc90e0523eb0a2d5e8e27e990102/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala#L189), respectively.

### What's going on?

Based on the above observations, our hypothesis is that the dispatcher thread runs into an "infinite loop" due to a race condition when two threads access the same hashmap object.  For illustration purpose, let's consider the following scenario where two threads (Thread 1 and Thread 2) access a hash table without thread-safety

- Thread 1 sees A.next = B, but then yields execution to Thread 2
<img src="https://issues.apache.org/jira/secure/attachment/13063040/13063040_hashtable1.png" width="400">

- Thread 2 triggers a resize operation resulting in B.next = A (Note that hashmap doesn't care about ordering), and then yields execution to Thread 1.
<img src="https://issues.apache.org/jira/secure/attachment/13063041/13063041_hashtable2.png" width="400">

- After taking over CPU, Thread 1 would run into an "infinite loop" when traversing the list in the last bucket, given A.next = B and B.next = A in its view.

Closes #43021 from xiongbo-sjtu/master.

Authored-by: Bo Xiong <xiongbo@amazon.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
(cherry picked from commit 8e6b1603a66706ee27a0b16d850f5ee56d633354)
Signed-off-by: Mridul Muralidharan <mridulatgmail.com>
---
 .../spark/executor/CoarseGrainedExecutorBackend.scala | 11 +++++++----
 .../executor/CoarseGrainedExecutorBackendSuite.scala  |  6 +++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index ab238626efe9b..c695a9ec2851b 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -20,9 +20,9 @@ package org.apache.spark.executor
 import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
+import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicBoolean
 
-import scala.collection.mutable
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
 
@@ -71,9 +71,12 @@ private[spark] class CoarseGrainedExecutorBackend(
   /**
    * Map each taskId to the information about the resource allocated to it, Please refer to
    * [[ResourceInformation]] for specifics.
+   * CHM is used to ensure thread-safety (https://issues.apache.org/jira/browse/SPARK-45227)
    * Exposed for testing only.
    */
-  private[executor] val taskResources = new mutable.HashMap[Long, Map[String, ResourceInformation]]
+  private[executor] val taskResources = new ConcurrentHashMap[
+    Long, Map[String, ResourceInformation]
+  ]
 
   private var decommissioned = false
 
@@ -184,7 +187,7 @@ private[spark] class CoarseGrainedExecutorBackend(
       } else {
         val taskDesc = TaskDescription.decode(data.value)
         logInfo("Got assigned task " + taskDesc.taskId)
-        taskResources(taskDesc.taskId) = taskDesc.resources
+        taskResources.put(taskDesc.taskId, taskDesc.resources)
         executor.launchTask(this, taskDesc)
       }
 
@@ -261,7 +264,7 @@ private[spark] class CoarseGrainedExecutorBackend(
   }
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit = {
-    val resources = taskResources.getOrElse(taskId, Map.empty[String, ResourceInformation])
+    val resources = taskResources.getOrDefault(taskId, Map.empty[String, ResourceInformation])
     val cpus = executor.runningTasks.get(taskId).taskDescription.cpus
     val msg = StatusUpdate(executorId, taskId, state, data, cpus, resources)
     if (TaskState.isFinished(state)) {
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index 0dcc7c7f9b4cf..909d605442575 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -302,7 +302,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
           resourceProfile = ResourceProfile.getOrCreateDefaultProfile(conf))
       assert(backend.taskResources.isEmpty)
 
-      val taskId = 1000000
+      val taskId = 1000000L
       // We don't really verify the data, just pass it around.
       val data = ByteBuffer.wrap(Array[Byte](1, 2, 3, 4))
       val taskDescription = new TaskDescription(taskId, 2, "1", "TASK 1000000", 19,
@@ -339,14 +339,14 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
       backend.self.send(LaunchTask(new SerializableBuffer(serializedTaskDescription)))
       eventually(timeout(10.seconds)) {
         assert(backend.taskResources.size == 1)
-        val resources = backend.taskResources(taskId)
+        val resources = backend.taskResources.get(taskId)
         assert(resources(GPU).addresses sameElements Array("0", "1"))
       }
 
       // Update the status of a running task shall not affect `taskResources` map.
       backend.statusUpdate(taskId, TaskState.RUNNING, data)
       assert(backend.taskResources.size == 1)
-      val resources = backend.taskResources(taskId)
+      val resources = backend.taskResources.get(taskId)
       assert(resources(GPU).addresses sameElements Array("0", "1"))
 
       // Update the status of a finished task shall remove the entry from `taskResources` map.

From 845e4f6c5bcf3a368ee78757f3a74b390cdce5c0 Mon Sep 17 00:00:00 2001
From: Peter Kaszt <kasztp@gmail.com>
Date: Mon, 2 Oct 2023 07:48:56 -0500
Subject: [PATCH 037/521] [MINOR][DOCS] Fix Python code sample for
 StreamingQueryListener: Reporting Metrics programmatically using Asynchronous
 APIs

Fix Python language code sample in the docs for _StreamingQueryListener_:
Reporting Metrics programmatically using Asynchronous APIs section.

### What changes were proposed in this pull request?
The code sample in the [Reporting Metrics programmatically using Asynchronous APIs](https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#reporting-metrics-programmatically-using-asynchronous-apis) section was this:
```
spark = ...

class Listener(StreamingQueryListener):
    def onQueryStarted(self, event):
        print("Query started: " + queryStarted.id)

    def onQueryProgress(self, event):
        println("Query terminated: " + queryTerminated.id)

    def onQueryTerminated(self, event):
        println("Query made progress: " + queryProgress.progress)

spark.streams.addListener(Listener())
```

Which is not a proper Python code, and has QueryProgress and QueryTerminated prints mixed. Proposed change/fix:
```
spark = ...

class Listener(StreamingQueryListener):
    def onQueryStarted(self, event):
        print("Query started: " + queryStarted.id)

    def onQueryProgress(self, event):
        print("Query made progress: " + queryProgress.progress)

    def onQueryTerminated(self, event):
        print("Query terminated: " + queryTerminated.id)

spark.streams.addListener(Listener())
```

### Why are the changes needed?
To fix docimentation errors.

### Does this PR introduce _any_ user-facing change?
Yes. -> Sample python code snippet is fixed in docs (see above).

### How was this patch tested?
Checked with github's .md preview, and built the docs according to the readme.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43190 from kasztp/master.

Authored-by: Peter Kaszt <kasztp@gmail.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
(cherry picked from commit d708fd7b68bf0c9964e861cb2c81818d17d7136e)
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 docs/structured-streaming-programming-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 76a22621a0e32..3e87c45a34915 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -3831,10 +3831,10 @@ class Listener(StreamingQueryListener):
         print("Query started: " + queryStarted.id)
 
     def onQueryProgress(self, event):
-        println("Query terminated: " + queryTerminated.id)
+        print("Query made progress: " + queryProgress.progress)
 
     def onQueryTerminated(self, event):
-        println("Query made progress: " + queryProgress.progress)
+    	print("Query terminated: " + queryTerminated.id)
 
 
 spark.streams.addListener(Listener())

From c5203abcbd191423071ef3603e95a7941bb1eec2 Mon Sep 17 00:00:00 2001
From: Herman van Hovell <herman@databricks.com>
Date: Mon, 2 Oct 2023 13:03:06 -0400
Subject: [PATCH 038/521] [SPARK-45371][CONNECT] Fix shading issues in the
 Spark Connect Scala Client

### What changes were proposed in this pull request?
This PR fixes shading for the Spark Connect Scala Client maven build. The following things are addressed:
- Guava and protobuf are included in the shaded jars. These were missing, and were causing users to see `ClassNotFoundException`s.
- Fixed duplicate shading of guava. We use the parent pom's location now.
- Fixed duplicate Netty dependency (shaded and transitive). One was used for GRPC and the other was needed by Arrow. This was fixed by pulling arrow into the shaded jar.
- Use the same package as the shading defined in the parent package.

### Why are the changes needed?
The maven artifacts for the Spark Connect Scala Client are currently broken.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manual tests.
#### Step 1:  Build new shaded library and install it in local maven repository
`build/mvn clean install -pl connector/connect/client/jvm -am -DskipTests`
#### Step 2: Start Connect Server
`connector/connect/bin/spark-connect`
#### Step 3: Launch REPL using the newly created library
This step requires [coursier](https://get-coursier.io/) to be installed.
`cs launch --jvm zulu:17.0.8 --scala 2.13.9 -r m2Local com.lihaoyi:::ammonite:2.5.11 org.apache.spark::spark-connect-client-jvm:4.0.0-SNAPSHOT --java-opt --add-opens=java.base/java.nio=ALL-UNNAMED -M org.apache.spark.sql.application.ConnectRepl`
#### Step 4: Run a bunch of commands:
```scala
// Check version
spark.version

// Run a simple query
{
  spark.range(1, 10000, 1)
    .select($"id", $"id" % 5 as "group", rand(1).as("v1"), rand(2).as("v2"))
    .groupBy($"group")
    .agg(
      avg($"v1").as("v1_avg"),
      avg($"v2").as("v2_avg"))
    .show()
}

// Run a streaming query
{
  import org.apache.spark.sql.execution.streaming.ProcessingTimeTrigger
  val query_name = "simple_streaming"
  val stream = spark.readStream
    .format("rate")
    .option("numPartitions", "1")
    .option("rowsPerSecond", "10")
    .load()
    .withWatermark("timestamp", "10 milliseconds")
    .groupBy(window(col("timestamp"), "10 milliseconds"))
    .count()
    .selectExpr("window.start as timestamp", "count as num_events")
    .writeStream
    .format("memory")
    .queryName(query_name)
    .trigger(ProcessingTimeTrigger.create("10 milliseconds"))
  // run for 20 seconds
  val query = stream.start()
  val start = System.currentTimeMillis()
  val end = System.currentTimeMillis() + 20 * 1000
  while (System.currentTimeMillis() < end) {
    println(s"time: ${System.currentTimeMillis() - start} ms")
    println(query.status)
    spark.sql(s"select * from ${query_name}").show()
    Thread.sleep(500)
  }
  query.stop()
}
```

Closes #43195 from hvanhovell/SPARK-45371.

Authored-by: Herman van Hovell <herman@databricks.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
(cherry picked from commit e53abbbceaa2c41babaa23fe4c2f282f559b4c03)
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 connector/connect/client/jvm/pom.xml | 39 +++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 67227ef38eb8f..236e5850b762f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -50,10 +50,20 @@
       <artifactId>spark-sketch_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <!--
+      We need to define guava and protobuf here because we need to change the scope of both from
+      provided to compile. If we don't do this we can't shade these libraries.
+    -->
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
       <version>${connect.guava.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>com.lihaoyi</groupId>
@@ -85,6 +95,7 @@
         <artifactId>maven-shade-plugin</artifactId>
         <configuration>
           <shadedArtifactAttached>false</shadedArtifactAttached>
+          <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
           <artifactSet>
             <includes>
               <include>com.google.android:*</include>
@@ -92,52 +103,62 @@
               <include>com.google.code.findbugs:*</include>
               <include>com.google.code.gson:*</include>
               <include>com.google.errorprone:*</include>
-              <include>com.google.guava:*</include>
               <include>com.google.j2objc:*</include>
               <include>com.google.protobuf:*</include>
+              <include>com.google.flatbuffers:*</include>
               <include>io.grpc:*</include>
               <include>io.netty:*</include>
               <include>io.perfmark:*</include>
+              <include>org.apache.arrow:*</include>
               <include>org.codehaus.mojo:*</include>
               <include>org.checkerframework:*</include>
               <include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
+              <include>org.apache.spark:spark-sql-api_${scala.binary.version}</include>
             </includes>
           </artifactSet>
           <relocations>
             <relocation>
               <pattern>io.grpc</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.io.grpc</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.io.grpc</shadedPattern>
               <includes>
                 <include>io.grpc.**</include>
               </includes>
             </relocation>
             <relocation>
               <pattern>com.google</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.com.google</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.com.google</shadedPattern>
+              <excludes>
+                <!-- Guava is relocated to ${spark.shade.packageName}.guava (see the parent pom.xml) -->
+                <exclude>com.google.common.**</exclude>
+              </excludes>
             </relocation>
             <relocation>
               <pattern>io.netty</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.io.netty</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.io.netty</shadedPattern>
             </relocation>
             <relocation>
               <pattern>org.checkerframework</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.org.checkerframework</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.org.checkerframework</shadedPattern>
             </relocation>
             <relocation>
               <pattern>javax.annotation</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.javax.annotation</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.javax.annotation</shadedPattern>
             </relocation>
             <relocation>
               <pattern>io.perfmark</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.io.perfmark</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.io.perfmark</shadedPattern>
             </relocation>
             <relocation>
               <pattern>org.codehaus</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.org.codehaus</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.org.codehaus</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.apache.arrow</pattern>
+              <shadedPattern>${spark.shade.packageName}.org.apache.arrow</shadedPattern>
             </relocation>
             <relocation>
               <pattern>android.annotation</pattern>
-              <shadedPattern>${spark.shade.packageName}.connect.client.android.annotation</shadedPattern>
+              <shadedPattern>${spark.shade.packageName}.android.annotation</shadedPattern>
             </relocation>
           </relocations>
           <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc-->

From 522af69713502d33d34b330bce6166e3d15dba8a Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 5 Oct 2023 09:39:36 +0900
Subject: [PATCH 039/521] Revert "[SPARK-45167][CONNECT][PYTHON][FOLLOW-UP] Use
 lighter threading Rlock, and use the existing eventually util function"

This reverts commit 2a9dd2b3968da7c2e96c502aaf4c158ee782e5f4.
---
 python/pyspark/sql/connect/client/reattach.py |  5 ++--
 .../sql/tests/connect/client/test_client.py   | 23 +++++++++++++++----
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index 6addb5bd2c652..e58864b965bd9 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -18,11 +18,12 @@
 
 check_dependencies(__name__)
 
-from threading import RLock
 import warnings
 import uuid
 from collections.abc import Generator
 from typing import Optional, Dict, Any, Iterator, Iterable, Tuple, Callable, cast, Type, ClassVar
+from multiprocessing import RLock
+from multiprocessing.synchronize import RLock as RLockBase
 from multiprocessing.pool import ThreadPool
 import os
 
@@ -55,7 +56,7 @@ class ExecutePlanResponseReattachableIterator(Generator):
     """
 
     # Lock to manage the pool
-    _lock: ClassVar[RLock] = RLock()
+    _lock: ClassVar[RLockBase] = RLock()
     _release_thread_pool: Optional[ThreadPool] = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
 
     @classmethod
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index 93b7006799b30..cf43fb16df7a7 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -25,7 +25,6 @@
 from pyspark.sql.connect.client import SparkConnectClient, ChannelBuilder
 import pyspark.sql.connect.proto as proto
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-from pyspark.testing.utils import eventually
 
 from pyspark.sql.connect.client.core import Retrying
 from pyspark.sql.connect.client.reattach import (
@@ -153,6 +152,20 @@ def _stub_with(self, execute=None, attach=None):
             attach_ops=ResponseGenerator(attach) if attach is not None else None,
         )
 
+    def assertEventually(self, callable, timeout_ms=1000):
+        """Helper method that will continuously evaluate the callable to not raise an
+        exception."""
+        import time
+
+        limit = time.monotonic_ns() + timeout_ms * 1000 * 1000
+        while time.monotonic_ns() < limit:
+            try:
+                callable()
+                break
+            except Exception:
+                time.sleep(0.1)
+        callable()
+
     def test_basic_flow(self):
         stub = self._stub_with([self.response, self.finished])
         ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.policy, [])
@@ -165,7 +178,7 @@ def check_all():
             self.assertEqual(1, stub.release_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        eventually(timeout=1, catch_assertions=True)(check_all)()
+        self.assertEventually(check_all, timeout_ms=1000)
 
     def test_fail_during_execute(self):
         def fatal():
@@ -183,7 +196,7 @@ def check():
             self.assertEqual(1, stub.release_until_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        eventually(timeout=1, catch_assertions=True)(check)()
+        self.assertEventually(check, timeout_ms=1000)
 
     def test_fail_and_retry_during_execute(self):
         def non_fatal():
@@ -202,7 +215,7 @@ def check():
             self.assertEqual(3, stub.release_until_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        eventually(timeout=1, catch_assertions=True)(check)()
+        self.assertEventually(check, timeout_ms=1000)
 
     def test_fail_and_retry_during_reattach(self):
         count = 0
@@ -228,7 +241,7 @@ def check():
             self.assertEqual(1, stub.release_calls)
             self.assertEqual(1, stub.execute_calls)
 
-        eventually(timeout=1, catch_assertions=True)(check)()
+        self.assertEventually(check, timeout_ms=1000)
 
 
 class TestException(grpc.RpcError, grpc.Call):

From 6468f96ea42f6efe42033507c4e26600b751bfcc Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Date: Thu, 5 Oct 2023 09:41:08 +0900
Subject: [PATCH 040/521] [SPARK-45386][SQL][3.5] Fix correctness issue with
 persist using StorageLevel.NONE on Dataset

### What changes were proposed in this pull request?
Support for InMememoryTableScanExec in AQE was added in #39624, but this patch contained a bug when a Dataset is persisted using `StorageLevel.NONE`. Before that patch a query like:
```
import org.apache.spark.storage.StorageLevel
spark.createDataset(Seq(1, 2)).persist(StorageLevel.NONE).count()
```
would correctly return 2. But after that patch it incorrectly returns 0. This is because AQE incorrectly determines based on the runtime statistics that are collected here:
https://github.com/apache/spark/blob/eac5a8c7e6da94bb27e926fc9a681aed6582f7d3/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala#L294
that the input is empty. The problem is that the action that should make sure the statistics are collected here
https://github.com/apache/spark/blob/eac5a8c7e6da94bb27e926fc9a681aed6582f7d3/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala#L285-L291
never use the iterator and when we have `StorageLevel.NONE` the persisting will also not use the iterator and we will not gather the correct statistics.

The proposed fix in the patch just make calling persist with StorageLevel.NONE a no-op. Changing the action since it always "emptied" the iterator would also work but seems like that would be unnecessary work in a lot of normal circumstances.

### Why are the changes needed?
The current code has a correctness issue.

### Does this PR introduce _any_ user-facing change?
Yes, fixes the correctness issue.

### How was this patch tested?
New and existing unit tests.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43213 from eejbyfeldt/SPARK-45386-branch-3.5.

Authored-by: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../scala/org/apache/spark/sql/execution/CacheManager.scala | 4 +++-
 .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala  | 6 ++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 064819275e004..e906c74f8a5ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -113,7 +113,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       planToCache: LogicalPlan,
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
-    if (lookupCachedData(planToCache).nonEmpty) {
+    if (storageLevel == StorageLevel.NONE) {
+      // Do nothing for StorageLevel.NONE since it will not actually cache any data.
+    } else if (lookupCachedData(planToCache).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index c967540541a5c..6d9c43f866a0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -45,6 +45,7 @@ import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.storage.StorageLevel
 
 case class TestDataPoint(x: Int, y: Double, s: String, t: TestDataPoint2)
 case class TestDataPoint2(x: Int, s: String)
@@ -2535,6 +2536,11 @@ class DatasetSuite extends QueryTest
 
     checkDataset(ds.filter(f(col("_1"))), Tuple1(ValueClass(2)))
   }
+
+  test("SPARK-45386: persist with StorageLevel.NONE should give correct count") {
+    val ds = Seq(1, 2).toDS().persist(StorageLevel.NONE)
+    assert(ds.count() == 2)
+  }
 }
 
 class DatasetLargeResultCollectingSuite extends QueryTest

From d60a40e5d30e565536e0833fc650dc581439ee89 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 5 Oct 2023 13:00:02 +0900
Subject: [PATCH 041/521] [SPARK-45167][CONNECT][PYTHON][FOLLOW-UP][3.5] Use
 lighter threading Rlock

### What changes were proposed in this pull request?

This PR reverts the revert: https://github.com/apache/spark/commit/522af69713502d33d34b330bce6166e3d15dba8a. It only partially ports the real change within main code. It excludes the testing side which depends on https://github.com/apache/spark/commit/9798244ca647ec68d36f4b9b21356a6de5f73f77 that does not exist in `branch-3.5`.

### Why are the changes needed?

Mainly for code clean-up.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests should cover this.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43221 from HyukjinKwon/SPARK-45167-followup2.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/connect/client/reattach.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index e58864b965bd9..6addb5bd2c652 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -18,12 +18,11 @@
 
 check_dependencies(__name__)
 
+from threading import RLock
 import warnings
 import uuid
 from collections.abc import Generator
 from typing import Optional, Dict, Any, Iterator, Iterable, Tuple, Callable, cast, Type, ClassVar
-from multiprocessing import RLock
-from multiprocessing.synchronize import RLock as RLockBase
 from multiprocessing.pool import ThreadPool
 import os
 
@@ -56,7 +55,7 @@ class ExecutePlanResponseReattachableIterator(Generator):
     """
 
     # Lock to manage the pool
-    _lock: ClassVar[RLockBase] = RLock()
+    _lock: ClassVar[RLock] = RLock()
     _release_thread_pool: Optional[ThreadPool] = ThreadPool(os.cpu_count() if os.cpu_count() else 8)
 
     @classmethod

From c50e371c2d3ccba9340bc8980add0753f2d7a86b Mon Sep 17 00:00:00 2001
From: Bobby Wang <wbo4958@gmail.com>
Date: Mon, 2 Oct 2023 23:00:56 -0500
Subject: [PATCH 042/521] [SPARK-45250][CORE] Support stage level task resource
 profile for yarn cluster when dynamic allocation disabled

### What changes were proposed in this pull request?
This PR is a follow-up of https://github.com/apache/spark/pull/37268 which supports stage level task resource profile for standalone cluster when dynamic allocation disabled. This PR enables stage-level task resource profile for yarn cluster.

### Why are the changes needed?

Users who work on spark ML/DL cases running on Yarn would expect stage-level task resource profile feature.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

The current tests of https://github.com/apache/spark/pull/37268 can also cover this PR since both yarn and standalone cluster share the same TaskSchedulerImpl class which implements this feature. Apart from that, modifying the existing test to cover yarn cluster. Apart from that, I also performed some manual tests which have been updated in the comments.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43030 from wbo4958/yarn-task-resoure-profile.

Authored-by: Bobby Wang <wbo4958@gmail.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
(cherry picked from commit 5b80639e643b6dd09dd64c3f43ec039b2ef2f9fd)
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../spark/resource/ResourceProfileManager.scala   |  6 +++---
 .../resource/ResourceProfileManagerSuite.scala    | 15 +++++++++++++--
 docs/configuration.md                             |  2 +-
 docs/running-on-yarn.md                           |  6 +++++-
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
index 9f98d4d9c9c79..cd7124a572464 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -67,9 +67,9 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
    */
   private[spark] def isSupported(rp: ResourceProfile): Boolean = {
     if (rp.isInstanceOf[TaskResourceProfile] && !dynamicEnabled) {
-      if ((notRunningUnitTests || testExceptionThrown) && !isStandaloneOrLocalCluster) {
-        throw new SparkException("TaskResourceProfiles are only supported for Standalone " +
-          "cluster for now when dynamic allocation is disabled.")
+      if ((notRunningUnitTests || testExceptionThrown) && !(isStandaloneOrLocalCluster || isYarn)) {
+        throw new SparkException("TaskResourceProfiles are only supported for Standalone and " +
+          "Yarn cluster for now when dynamic allocation is disabled.")
       }
     } else {
       val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
index e97d5c7883aa8..77dc7bcb4c56e 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -126,18 +126,29 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     val defaultProf = rpmanager.defaultResourceProfile
     assert(rpmanager.isSupported(defaultProf))
 
-    // task resource profile.
+    // Standalone: supports task resource profile.
     val gpuTaskReq = new TaskResourceRequests().resource("gpu", 1)
     val taskProf = new TaskResourceProfile(gpuTaskReq.requests)
     assert(rpmanager.isSupported(taskProf))
 
+    // Local: doesn't support task resource profile.
     conf.setMaster("local")
     rpmanager = new ResourceProfileManager(conf, listenerBus)
     val error = intercept[SparkException] {
       rpmanager.isSupported(taskProf)
     }.getMessage
     assert(error === "TaskResourceProfiles are only supported for Standalone " +
-      "cluster for now when dynamic allocation is disabled.")
+      "and Yarn cluster for now when dynamic allocation is disabled.")
+
+    // Local cluster: supports task resource profile.
+    conf.setMaster("local-cluster[1, 1, 1024]")
+    rpmanager = new ResourceProfileManager(conf, listenerBus)
+    assert(rpmanager.isSupported(taskProf))
+
+    // Yarn: supports task resource profile.
+    conf.setMaster("yarn")
+    rpmanager = new ResourceProfileManager(conf, listenerBus)
+    assert(rpmanager.isSupported(taskProf))
   }
 
   test("isSupported task resource profiles with dynamic allocation enabled") {
diff --git a/docs/configuration.md b/docs/configuration.md
index 1139beb66462f..74ddd6df0233c 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -3670,7 +3670,7 @@ See your cluster manager specific page for requirements and details on each of -
 # Stage Level Scheduling Overview
 
 The stage level scheduling feature allows users to specify task and executor resource requirements at the stage level. This allows for different stages to run with executors that have different resources. A prime example of this is one ETL stage runs with executors with just CPUs, the next stage is an ML stage that needs GPUs. Stage level scheduling allows for user to request different executors that have GPUs when the ML stage runs rather then having to acquire executors with GPUs at the start of the application and them be idle while the ETL stage is being run.
-This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN, Kubernetes and Standalone when dynamic allocation is enabled. When dynamic allocation is disabled, it allows users to specify different task resource requirements at stage level, and this is supported on Standalone cluster right now. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page or [Standalone](spark-standalone.html#stage-level-scheduling-overview) page for more implementation details.
+This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN, Kubernetes and Standalone when dynamic allocation is enabled. When dynamic allocation is disabled, it allows users to specify different task resource requirements at stage level, and this is supported on YARN and Standalone cluster right now. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page or [Standalone](spark-standalone.html#stage-level-scheduling-overview) page for more implementation details.
 
 See the `RDD.withResources` and `ResourceProfileBuilder` API's for using this feature. When dynamic allocation is disabled, tasks with different task resource requirements will share executors with `DEFAULT_RESOURCE_PROFILE`. While when dynamic allocation is enabled, the current implementation acquires new executors for each `ResourceProfile`  created and currently has to be an exact match. Spark does not try to fit tasks into an executor that require a different ResourceProfile than the executor was created with. Executors that are not in use will idle timeout with the dynamic allocation logic. The default configuration for this feature is to only allow one ResourceProfile per stage. If the user associates more then 1 ResourceProfile to an RDD, Spark will throw an exception by default. See config `spark.scheduler.resource.profileMergeConflicts` to control that behavior. The current merge strategy Spark implements when `spark.scheduler.resource.profileMergeConflicts` is enabled is a simple max of each resource within the conflicting ResourceProfiles. Spark will create a new ResourceProfile with the max of each of the resources.
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 5eec6c490cb1f..97cc9ac135af1 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -759,7 +759,11 @@ YARN does not tell Spark the addresses of the resources allocated to each contai
 
 # Stage Level Scheduling Overview
 
-Stage level scheduling is supported on YARN when dynamic allocation is enabled. One thing to note that is YARN specific is that each ResourceProfile requires a different container priority on YARN. The mapping is simply the ResourceProfile id becomes the priority, on YARN lower numbers are higher priority. This means that profiles created earlier will have a higher priority in YARN. Normally this won't matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind.
+Stage level scheduling is supported on YARN:
+- When dynamic allocation is disabled: It allows users to specify different task resource requirements at the stage level and will use the same executors requested at startup.
+- When dynamic allocation is enabled: It allows users to specify task and executor resource requirements at the stage level and will request the extra executors.
+
+One thing to note that is YARN specific is that each ResourceProfile requires a different container priority on YARN. The mapping is simply the ResourceProfile id becomes the priority, on YARN lower numbers are higher priority. This means that profiles created earlier will have a higher priority in YARN. Normally this won't matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind.
 Note there is a difference in the way custom resources are handled between the base default profile and custom ResourceProfiles. To allow for the user to request YARN containers with extra resources without Spark scheduling on them, the user can specify resources via the <code>spark.yarn.executor.resource.</code> config. Those configs are only used in the base default profile though and do not get propagated into any other custom ResourceProfiles. This is because there would be no way to remove them if you wanted a stage to not have them. This results in your default profile getting custom resources defined in <code>spark.yarn.executor.resource.</code> plus spark defined resources of GPU or FPGA. Spark converts GPU and FPGA resources into the YARN built in types <code>yarn.io/gpu</code>) and <code>yarn.io/fpga</code>, but does not know the mapping of any other resources. Any other Spark custom resources are not propagated to YARN for the default profile. So if you want Spark to schedule based off a custom resource and have it requested from YARN, you must specify it in both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>) configs. Leave the Spark config off if you only want YARN containers with the extra resources but Spark not to schedule using them. Now for custom ResourceProfiles, it doesn't currently have a way to only specify YARN resources without Spark scheduling off of them. This means for custom ResourceProfiles we propagate all the resources defined in the ResourceProfile to YARN. We still convert GPU and FPGA to the YARN build in types as well. This requires that the name of any custom resources you specify match what they are defined as in YARN.
 
 # Important notes

From 64e2b22f6b4023197871a60eb08b055688e9fdd2 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Thu, 5 Oct 2023 08:38:54 +0900
Subject: [PATCH 043/521] [SPARK-45396][PYTHON] Add doc entry for
 `pyspark.ml.connect` module, and adds `Evaluator` to `__all__` at
 `ml.connect`

This PR documents MLlib's Spark Connect support at API reference.

This PR also piggies back a fix in `__all__` at `python/pyspark/ml/connect/__init__.py` so `from pyspark.sql.commect import Evaluator` works.

With this this, user cannot see `pyspark.ml.connect` Python APIs on doc website.

Yes it adds the new page into your facing documentation ([PySpark API reference](https://spark.apache.org/docs/latest/api/python/reference/index.html)).

Manually tested via:

```bash
cd python/docs
make clean html
```

No.

Closes #43210 from HyukjinKwon/SPARK-45396-followup.

Lead-authored-by: Weichen Xu <weichen.xu@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 35b627a934b1ab28be7d6ba88fdad63dc129525a)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/docs/source/reference/index.rst        |   1 +
 .../source/reference/pyspark.ml.connect.rst   | 122 ++++++++++++++++++
 python/pyspark/ml/connect/__init__.py         |   3 +-
 3 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 python/docs/source/reference/pyspark.ml.connect.rst

diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst
index ed3eb4d07dac6..6330636839cdf 100644
--- a/python/docs/source/reference/index.rst
+++ b/python/docs/source/reference/index.rst
@@ -31,6 +31,7 @@ Pandas API on Spark follows the API specifications of latest pandas release.
    pyspark.pandas/index
    pyspark.ss/index
    pyspark.ml
+   pyspark.ml.connect
    pyspark.streaming
    pyspark.mllib
    pyspark
diff --git a/python/docs/source/reference/pyspark.ml.connect.rst b/python/docs/source/reference/pyspark.ml.connect.rst
new file mode 100644
index 0000000000000..1a3e6a593980f
--- /dev/null
+++ b/python/docs/source/reference/pyspark.ml.connect.rst
@@ -0,0 +1,122 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+MLlib (DataFrame-based) for Spark Connect
+=========================================
+
+.. warning::
+    The namespace for this package can change in the future Spark version.
+
+
+Pipeline APIs
+-------------
+
+.. currentmodule:: pyspark.ml.connect
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    Transformer
+    Estimator
+    Model
+    Evaluator
+    Pipeline
+    PipelineModel
+
+
+Feature
+-------
+
+.. currentmodule:: pyspark.ml.connect.feature
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    MaxAbsScaler
+    MaxAbsScalerModel
+    StandardScaler
+    StandardScalerModel
+
+
+Classification
+--------------
+
+.. currentmodule:: pyspark.ml.connect.classification
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    LogisticRegression
+    LogisticRegressionModel
+
+
+Functions
+---------
+
+.. currentmodule:: pyspark.ml.connect.functions
+
+.. autosummary::
+    :toctree: api/
+
+    array_to_vector
+    vector_to_array
+
+
+Tuning
+------
+
+.. currentmodule:: pyspark.ml.connect.tuning
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    CrossValidator
+    CrossValidatorModel
+
+
+Evaluation
+----------
+
+.. currentmodule:: pyspark.ml.connect.evaluation
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    RegressionEvaluator
+    BinaryClassificationEvaluator
+    MulticlassClassificationEvaluator
+
+
+Utilities
+---------
+
+.. currentmodule:: pyspark.ml.connect.io_utils
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    ParamsReadWrite
+    CoreModelReadWrite
+    MetaAlgorithmReadWrite
+
diff --git a/python/pyspark/ml/connect/__init__.py b/python/pyspark/ml/connect/__init__.py
index 2ee152f6a38a3..e6115a62ccfe8 100644
--- a/python/pyspark/ml/connect/__init__.py
+++ b/python/pyspark/ml/connect/__init__.py
@@ -28,13 +28,14 @@
     evaluation,
     tuning,
 )
+from pyspark.ml.connect.evaluation import Evaluator
 
 from pyspark.ml.connect.pipeline import Pipeline, PipelineModel
 
 __all__ = [
     "Estimator",
     "Transformer",
-    "Estimator",
+    "Evaluator",
     "Model",
     "feature",
     "evaluation",

From 1b5b2a15dec3d17fa1e7be9a78b37dc600eeb8ab Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Sat, 7 Oct 2023 15:26:17 -0700
Subject: [PATCH 044/521] [SPARK-45389][SQL][3.5] Correct MetaException
 matching rule on getting partition metadata

This is the backport of https://github.com/apache/spark/pull/43191 for `branch-3.5`, it should also be applicable for `branch-3.3` and `branch-3.4`

### What changes were proposed in this pull request?

This PR aims to fix the HMS call fallback logic introduced in SPARK-35437.

```patch
try {
  ...
  hive.getPartitionNames
  ...
  hive.getPartitionsByNames
} catch {
- case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] =>
+ case ex: HiveException if ex.getCause.isInstanceOf[MetaException] =>
  ...
}
```

### Why are the changes needed?

Directly method call won't throw `InvocationTargetException`, and check the code of `hive.getPartitionNames` and `hive.getPartitionsByNames`, both of them will wrap a `HiveException` if `MetaException` throws.

### Does this PR introduce _any_ user-facing change?

Yes, it should be a bug fix.

### How was this patch tested?

Pass GA and code review. (I'm not sure how to construct/simulate a MetaException during the HMS thrift call with the current HMS testing infrastructure)

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43260 from pan3793/SPARK-45389-3.5.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/sql/hive/client/HiveShim.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 63f672b22bad2..60ff9ec42f29d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.metastore.TableType
 import org.apache.hadoop.hive.metastore.api.{Database, EnvironmentContext, Function => HiveFunction, FunctionType, Index, MetaException, PrincipalType, ResourceType, ResourceUri}
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.io.AcidUtils
-import org.apache.hadoop.hive.ql.metadata.{Hive, Partition, Table}
+import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition, Table}
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc
 import org.apache.hadoop.hive.ql.processors.{CommandProcessor, CommandProcessorFactory}
 import org.apache.hadoop.hive.ql.session.SessionState
@@ -1190,7 +1190,7 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
         recordHiveCall()
         hive.getPartitionsByNames(table, partNames.asJava)
       } catch {
-        case ex: InvocationTargetException if ex.getCause.isInstanceOf[MetaException] =>
+        case ex: HiveException if ex.getCause.isInstanceOf[MetaException] =>
           logWarning("Caught Hive MetaException attempting to get partition metadata by " +
             "filter from client side. Falling back to fetching all partition metadata", ex)
           recordHiveCall()

From 8105e0c9f93bd4c93d364646766d5ee5e057d19a Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 8 Oct 2023 04:44:55 -0700
Subject: [PATCH 045/521] [MINOR][DOCS] Update `CTAS` with `LOCATION` behavior
 with Spark 3.2+

### What changes were proposed in this pull request?

This PR aims to update `CTAS` with `LOCATION` behavior according to Spark 3.2+.

### Why are the changes needed?

SPARK-28551 changed the behavior at Apache Spark 3.2.0.

https://github.com/apache/spark/blob/24b82dfd6cfb9a658af615446be5423695830dd9/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L2306-L2313

### Does this PR introduce _any_ user-facing change?

No. This is a documentation fix.

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43277 from dongjoon-hyun/minor_ctas.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 2d6d09b71e77b362a4c774170e2ca992a31fb1ea)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-ref-syntax-ddl-create-table-datasource.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md
index 7920a8a558e3d..f645732a15df9 100644
--- a/docs/sql-ref-syntax-ddl-create-table-datasource.md
+++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md
@@ -104,7 +104,9 @@ In general CREATE TABLE is creating a "pointer", and you need to make sure it po
 existing. An exception is file source such as parquet, json. If you don't specify the LOCATION,
 Spark will create a default table location for you.
 
-For CREATE TABLE AS SELECT, Spark will overwrite the underlying data source with the data of the
+For CREATE TABLE AS SELECT with LOCATION, Spark throws analysis exceptions if the given location
+exists as a non-empty directory. If `spark.sql.legacy.allowNonEmptyLocationInCTAS` is set to true,
+Spark overwrites the underlying data source with the data of the
 input query, to make sure the table gets created contains exactly the same data as the input query.
 
 ### Examples

From 5f8ae9a3dbd2c7624bffd588483c9916c302c081 Mon Sep 17 00:00:00 2001
From: Jia Fan <fanjiaeminem@qq.com>
Date: Mon, 9 Oct 2023 12:30:20 +0300
Subject: [PATCH 046/521] [SPARK-45424][SQL] Fix TimestampFormatter return
 optional parse results when only prefix match

### What changes were proposed in this pull request?
When use custom pattern to parse timestamp, if there have matched prefix, not matched all. The `Iso8601TimestampFormatter::parseOptional` and `Iso8601TimestampFormatter::parseWithoutTimeZoneOptional` should not return not empty result.
eg: pattern = `yyyy-MM-dd HH:mm:ss`, value = `9999-12-31 23:59:59.999`. If fact, `yyyy-MM-dd HH:mm:ss` can parse `9999-12-31 23:59:59`  normally, but value have suffix `.999`. so we can't return not empty result.
This bug will affect inference the schema in CSV/JSON.

### Why are the changes needed?
Fix inference the schema bug.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
add new test.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43245 from Hisoka-X/SPARK-45424-inference-schema-unresolved.

Authored-by: Jia Fan <fanjiaeminem@qq.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 4493b431192fcdbab1379b7ffb89eea0cdaa19f1)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/catalyst/util/TimestampFormatter.scala   | 10 ++++++----
 .../sql/catalyst/util/TimestampFormatterSuite.scala    | 10 ++++++++++
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 8a288d0e9f3a3..55eee41c14ca5 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -167,8 +167,9 @@ class Iso8601TimestampFormatter(
 
   override def parseOptional(s: String): Option[Long] = {
     try {
-      val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
-      if (parsed != null) {
+      val parsePosition = new ParsePosition(0)
+      val parsed = formatter.parseUnresolved(s, parsePosition)
+      if (parsed != null && s.length == parsePosition.getIndex) {
         Some(extractMicros(parsed))
       } else {
         None
@@ -196,8 +197,9 @@ class Iso8601TimestampFormatter(
 
   override def parseWithoutTimeZoneOptional(s: String, allowTimeZone: Boolean): Option[Long] = {
     try {
-      val parsed = formatter.parseUnresolved(s, new ParsePosition(0))
-      if (parsed != null) {
+      val parsePosition = new ParsePosition(0)
+      val parsed = formatter.parseUnresolved(s, parsePosition)
+      if (parsed != null && s.length == parsePosition.getIndex) {
         Some(extractMicrosNTZ(s, parsed, allowTimeZone))
       } else {
         None
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index eb173bc7f8c87..2134a0d6ecd36 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -507,4 +507,14 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
     assert(simpleFormatter.parseOptional("abc").isEmpty)
 
   }
+
+  test("SPARK-45424: do not return optional parse results when only prefix match") {
+    val formatter = new Iso8601TimestampFormatter(
+      "yyyy-MM-dd HH:mm:ss",
+      locale = DateFormatter.defaultLocale,
+      legacyFormat = LegacyDateFormats.SIMPLE_DATE_FORMAT,
+      isParsing = true, zoneId = DateTimeTestUtils.LA)
+    assert(formatter.parseOptional("9999-12-31 23:59:59.999").isEmpty)
+    assert(formatter.parseWithoutTimeZoneOptional("9999-12-31 23:59:59.999", true).isEmpty)
+  }
 }

From 4841a404be3c37fc16031a0119b321eefcb2faab Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Mon, 9 Oct 2023 12:32:14 +0300
Subject: [PATCH 047/521] [SPARK-45459][SQL][TESTS][DOCS] Remove the last 2
 extra spaces in the automatically generated `sql-error-conditions.md` file

### What changes were proposed in this pull request?
The pr aims to remove the last 2 extra spaces in the automatically generated `sql-error-conditions.md` file.

### Why are the changes needed?
- When I am work on another PR, I use the following command:
```
SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
        "core/testOnly *SparkThrowableSuite -- -t \"Error classes match with document\""
```
  I found that in the automatically generated `sql-error-conditions.md` file, there are 2 extra spaces added at the end,
Obviously, this is not what we expected, otherwise we would need to manually remove it, which is not in line with automation.

- The git tells us this difference, as follows:
<img width="725" alt="image" src="https://github.com/apache/spark/assets/15246973/a68b657f-3a00-4405-9623-1f7ab9d44d82">

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Pass GA.
- Manually test.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43274 from panbingkun/SPARK-45459.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit af800b505956ff26e03c5fc56b6cb4ac5c0efe2f)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../test/scala/org/apache/spark/SparkThrowableSuite.scala    | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index 0249cde54884b..299bcea3f9e23 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -253,8 +253,7 @@ class SparkThrowableSuite extends SparkFunSuite {
          |
          |Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
          |
-         |$sqlErrorParentDocContent
-         |""".stripMargin
+         |$sqlErrorParentDocContent""".stripMargin
 
     errors.filter(_._2.subClass.isDefined).foreach(error => {
       val name = error._1
@@ -316,7 +315,7 @@ class SparkThrowableSuite extends SparkFunSuite {
         }
         FileUtils.writeStringToFile(
           parentDocPath.toFile,
-          sqlErrorParentDoc + lineSeparator,
+          sqlErrorParentDoc,
           StandardCharsets.UTF_8)
       }
     } else {

From 8bf5a5bca3f9f7db78182d14e56476d384f442fa Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 9 Oct 2023 22:15:45 +0300
Subject: [PATCH 048/521] [SPARK-45383][SQL] Fix error message for time travel
 with non-existing table

### What changes were proposed in this pull request?

Fixes a small bug to report `TABLE_OR_VIEW_NOT_FOUND` error correctly for time travel. It was missed before because `RelationTimeTravel` is a leaf node but it may contain `UnresolvedRelation`.

### Why are the changes needed?

bug fix

### Does this PR introduce _any_ user-facing change?

Yes, the error message becomes reasonable

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43298 from cloud-fan/time-travel.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit ced321c8b5a32c69dfb2841d4bec8a03f21b8038)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala   |  4 ++++
 .../spark/sql/connector/DataSourceV2SQLSuite.scala    | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 511f3622e7e35..533ea8a2b7998 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -365,6 +365,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         })
 
         operator match {
+          case RelationTimeTravel(u: UnresolvedRelation, _, _) =>
+            u.tableNotFound(u.multipartIdentifier)
+
           case etw: EventTimeWatermark =>
             etw.eventTime.dataType match {
               case s: StructType
@@ -377,6 +380,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                     "eventName" -> toSQLId(etw.eventTime.name),
                     "eventType" -> toSQLType(etw.eventTime.dataType)))
             }
+
           case f: Filter if f.condition.dataType != BooleanType =>
             f.failAnalysis(
               errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 06f5600e0d199..7745e9c0a4ee7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3014,6 +3014,17 @@ class DataSourceV2SQLSuiteV1Filter
         sqlState = None,
         parameters = Map("relationId" -> "`x`"))
 
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM non_exist VERSION AS OF 1")
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`non_exist`"),
+        context = ExpectedContext(
+          fragment = "non_exist",
+          start = 14,
+          stop = 22))
+
       val subquery1 = "SELECT 1 FROM non_exist"
       checkError(
         exception = intercept[AnalysisException] {

From ac4b9154b5822779023e66f2efb24d05e20b1cca Mon Sep 17 00:00:00 2001
From: Chaoqin Li <chaoqin.li@databricks.com>
Date: Tue, 10 Oct 2023 11:03:19 +0900
Subject: [PATCH 049/521] [SPARK-45419][SS] Avoid reusing rocksdb sst files in
 a dfferent rocksdb instance

### What changes were proposed in this pull request?
When loading a rocksdb instance, remove file version map entry of larger versions to avoid rocksdb sst file unique id mismatch exception. The SST files in larger versions can't be reused even if they have the same size and name because they belong to another rocksdb instance.

### Why are the changes needed?
Avoid rocksdb file mismatch exception that may occur in runtime.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add rocksdb unit test.

Closes #43174 from chaoqin-li1123/rocksdb_mismatch.

Authored-by: Chaoqin Li <chaoqin.li@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/state/RocksDBFileManager.scala  |  4 +++
 .../streaming/state/RocksDBSuite.scala        | 29 +++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 0891d7737135a..faf9cd701aeca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -207,6 +207,10 @@ class RocksDBFileManager(
    */
   def loadCheckpointFromDfs(version: Long, localDir: File): RocksDBCheckpointMetadata = {
     logInfo(s"Loading checkpoint files for version $version")
+    // The unique ids of SST files are checked when opening a rocksdb instance. The SST files
+    // in larger versions can't be reused even if they have the same size and name because
+    // they belong to another rocksdb instance.
+    versionToRocksDBFiles.keySet().removeIf(_ >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
       localDir.mkdirs()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index e31b05c362f6a..91dd858220717 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -214,6 +214,35 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithChangelogCheckpointingEnabled("SPARK-45419: Do not reuse SST files" +
+    " in different RocksDB instances") {
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 0, compactOnCommit = false)
+    new File(remoteDir).delete()  // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf) { db =>
+      for (version <- 0 to 2) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 3.zip
+      db.doMaintenance()
+      // Roll back to version 1 and start to process data.
+      for (version <- 1 to 3) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // Upload snapshot 4.zip, should not reuse the SST files in 3.zip
+      db.doMaintenance()
+    }
+
+    withDB(remoteDir, conf = conf) { db =>
+      // Open the db to verify that the state in 4.zip is no corrupted.
+      db.load(4)
+    }
+  }
+
   // A rocksdb instance with changelog checkpointing enabled should be able to load
   // an existing checkpoint without changelog.
   testWithChangelogCheckpointingEnabled(

From 24f88b319c88bfe55e8b2b683193a85842bdad88 Mon Sep 17 00:00:00 2001
From: yorksity <yorksity@outlook.com>
Date: Tue, 10 Oct 2023 14:36:23 +0800
Subject: [PATCH 050/521] [SPARK-45205][SQL] CommandResultExec to override
 iterator methods to avoid triggering multiple jobs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

After SPARK-35378 was changed, the execution of statements such as ‘show parititions test' became slower. The change point is that the execution process changes from ExecutedCommandEnec to CommandResultExec, but ExecutedCommandExec originally implemented the following method

override def executeToIterator(): Iterator[InternalRow] = sideEffectResult.iterator

CommandResultExec is not rewritten, so when the hasNext method is executed, a job process is created, resulting in increased time-consuming

### Why are the changes needed?

Improve performance when show partitions/tables.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests should cover this.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43270 from yorksity/SPARK-45205.

Authored-by: yorksity <yorksity@outlook.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit c9c99222e828d556552694dfb48c75bf0703a2c4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/execution/CommandResultExec.scala      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala
index 5f38278d2dc67..45e3e41ab053d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CommandResultExec.scala
@@ -81,6 +81,8 @@ case class CommandResultExec(
     unsafeRows
   }
 
+  override def executeToIterator(): Iterator[InternalRow] = unsafeRows.iterator
+
   override def executeTake(limit: Int): Array[InternalRow] = {
     val taken = unsafeRows.take(limit)
     longMetric("numOutputRows").add(taken.size)

From 81a7f8f184cd597208fcad72130354288a0c9f79 Mon Sep 17 00:00:00 2001
From: liangyongyuan <liangyongyuan@xiaomi.com>
Date: Tue, 10 Oct 2023 14:40:33 +0800
Subject: [PATCH 051/521] [SPARK-45449][SQL] Cache Invalidation Issue with JDBC
 Table

### What changes were proposed in this pull request?
Add an equals method to `JDBCOptions` that considers two instances equal if their `JDBCOptions.parameters` are the same.

### Why are the changes needed?
We have identified a cache invalidation issue when caching JDBC tables in Spark SQL. The cached table is unexpectedly invalidated when queried, leading to a re-read from the JDBC table instead of retrieving data from the cache.
Example SQL:

```
CACHE TABLE cache_t SELECT * FROM mysql.test.test1;
SELECT * FROM cache_t;
```
Expected Behavior:
The expectation is that querying the cached table (cache_t) should retrieve the result from the cache without re-evaluating the execution plan.

Actual Behavior:
However, the cache is invalidated, and the content is re-read from the JDBC table.

Root Cause:
The issue lies in the `CacheData` class, where the comparison involves `JDBCTable`. The `JDBCTable` is a case class:

`case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOptions)`
The comparison of non-case class components, such as `jdbcOptions`, involves pointer comparison. This leads to unnecessary cache invalidation.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add uts

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43258 from lyy-pineapple/spark-git-cache.

Authored-by: liangyongyuan <liangyongyuan@xiaomi.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d073f2d3e2f67a4b612e020a583e23dc1fa63aab)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../execution/datasources/jdbc/JDBCOptions.scala  |  8 ++++++++
 .../v2/jdbc/JDBCTableCatalogSuite.scala           | 15 +++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index 268a65b81ff68..57651684070f7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -239,6 +239,14 @@ class JDBCOptions(
       .get(JDBC_PREFER_TIMESTAMP_NTZ)
       .map(_.toBoolean)
       .getOrElse(SQLConf.get.timestampType == TimestampNTZType)
+
+  override def hashCode: Int = this.parameters.hashCode()
+
+  override def equals(other: Any): Boolean = other match {
+    case otherOption: JDBCOptions =>
+      otherOption.parameters.equals(this.parameters)
+    case _ => false
+  }
 }
 
 class JdbcOptionsInWrite(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 6b85911dca773..eed64b873c451 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -512,4 +513,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(t.schema === replaced)
     }
   }
+
+  test("SPARK-45449: Cache Invalidation Issue with JDBC Table") {
+    withTable("h2.test.cache_t") {
+      withConnection { conn =>
+        conn.prepareStatement(
+          """CREATE TABLE "test"."cache_t" (id decimal(25) PRIMARY KEY NOT NULL,
+            |name TEXT(32) NOT NULL)""".stripMargin).executeUpdate()
+      }
+      sql("INSERT OVERWRITE h2.test.cache_t SELECT 1 AS id, 'a' AS name")
+      sql("CACHE TABLE t1 SELECT id, name FROM h2.test.cache_t")
+      val plan = sql("select * from t1").queryExecution.sparkPlan
+      assert(plan.isInstanceOf[InMemoryTableScanExec])
+    }
+  }
 }

From 40d44a3fa4a59f6e98d076a3b3f006851f25be3a Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 10 Oct 2023 17:16:11 +0900
Subject: [PATCH 052/521] [SPARK-45475][SQL] Uses DataFrame.foreachPartition
 instead of RDD.foreachPartition in JdbcUtils

This PR is kind of a followup for https://github.com/apache/spark/pull/39976 that addresses https://github.com/apache/spark/pull/39976#issuecomment-1752930380 comment.

In order to probably assign the SQL execution ID so `df.observe` works with this.

Yes. `df.observe` will work with JDBC connectors.

Manually tested.

Unit test was added.

Closes #43304 from HyukjinKwon/foreachbatch.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 39cc4abaff73cb49f9d79d1d844fe5c9fa14c917)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/datasources/jdbc/JdbcUtils.scala  |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala   | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index b7019c1dcbe53..aae4be3f09483 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -898,7 +898,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case Some(n) if n < df.rdd.getNumPartitions => df.coalesce(n)
       case _ => df
     }
-    repartitionedDF.rdd.foreachPartition { iterator => savePartition(
+    repartitionedDF.foreachPartition { iterator => savePartition(
       table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel, options)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 93b6652d516cc..eae171e20b706 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -29,7 +29,7 @@ import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 
 import org.apache.spark.{SparkException, SparkSQLException}
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Observation, QueryTest, Row}
 import org.apache.spark.sql.catalyst.{analysis, TableIdentifier}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
@@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCo
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation, JdbcUtils}
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
+import org.apache.spark.sql.functions.{lit, percentile_approx}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -2057,4 +2058,18 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     val df = sql("SELECT * FROM composite_name WHERE `last name` = 'smith'")
     assert(df.collect.toSet === Set(Row("smith", 1)))
   }
+
+  test("SPARK-45475: saving a table via JDBC should work with observe API") {
+    val tableName = "test_table"
+    val namedObservation = Observation("named")
+    val observed_df = spark.range(100).observe(
+      namedObservation, percentile_approx($"id", lit(0.5), lit(100)).as("percentile_approx_val"))
+
+    observed_df.write.format("jdbc")
+      .option("url", urlWithUserAndPass)
+      .option("dbtable", tableName).save()
+
+    val expected = Map("percentile_approx_val" -> 49)
+    assert(namedObservation.get === expected)
+  }
 }

From 80c166092eb691586eeaf7adb43f818f50c6cdea Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 11 Oct 2023 01:03:27 +0900
Subject: [PATCH 053/521] Revert "[SPARK-45475][SQL] Uses
 DataFrame.foreachPartition instead of RDD.foreachPartition in JdbcUtils"

This reverts commit 40d44a3fa4a59f6e98d076a3b3f006851f25be3a.
---
 .../execution/datasources/jdbc/JdbcUtils.scala  |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala   | 17 +----------------
 2 files changed, 2 insertions(+), 17 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index aae4be3f09483..b7019c1dcbe53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -898,7 +898,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case Some(n) if n < df.rdd.getNumPartitions => df.coalesce(n)
       case _ => df
     }
-    repartitionedDF.foreachPartition { iterator => savePartition(
+    repartitionedDF.rdd.foreachPartition { iterator => savePartition(
       table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel, options)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index eae171e20b706..93b6652d516cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -29,7 +29,7 @@ import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 
 import org.apache.spark.{SparkException, SparkSQLException}
-import org.apache.spark.sql.{AnalysisException, DataFrame, Observation, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.{analysis, TableIdentifier}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
@@ -39,7 +39,6 @@ import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCo
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation, JdbcUtils}
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
-import org.apache.spark.sql.functions.{lit, percentile_approx}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -2058,18 +2057,4 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     val df = sql("SELECT * FROM composite_name WHERE `last name` = 'smith'")
     assert(df.collect.toSet === Set(Row("smith", 1)))
   }
-
-  test("SPARK-45475: saving a table via JDBC should work with observe API") {
-    val tableName = "test_table"
-    val namedObservation = Observation("named")
-    val observed_df = spark.range(100).observe(
-      namedObservation, percentile_approx($"id", lit(0.5), lit(100)).as("percentile_approx_val"))
-
-    observed_df.write.format("jdbc")
-      .option("url", urlWithUserAndPass)
-      .option("dbtable", tableName).save()
-
-    val expected = Map("percentile_approx_val" -> 49)
-    assert(namedObservation.get === expected)
-  }
 }

From 22e924778eef9a36c360e3e2a479c9401e6c58a1 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 10 Oct 2023 19:49:20 -0700
Subject: [PATCH 054/521] [SPARK-45473][SQL][3.5] Fix incorrect error message
 for RoundBase

### What changes were proposed in this pull request?

This minor patch fixes incorrect error message of `RoundBase`.

### Why are the changes needed?

Fix incorrect error message.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43315 from viirya/minor_fix-3.5.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/expressions/mathExpressions.scala      | 2 +-
 .../sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index add59a38b7201..b9a2cb348e380 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1509,7 +1509,7 @@ abstract class RoundBase(child: Expression, scale: Expression,
           DataTypeMismatch(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
-              "inputName" -> "scala",
+              "inputName" -> "scale",
               "inputType" -> toSQLType(scale.dataType),
               "inputExpr" -> toSQLExpr(scale)))
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 665204cd0c58e..08be4c8acc4b5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -602,7 +602,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"round(intField, intField)\"",
-        "inputName" -> "scala",
+        "inputName" -> "scale",
         "inputType" -> "\"INT\"",
         "inputExpr" -> "\"intField\""))
 
@@ -649,7 +649,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"bround(intField, intField)\"",
-        "inputName" -> "scala",
+        "inputName" -> "scale",
         "inputType" -> "\"INT\"",
         "inputExpr" -> "\"intField\""))
     checkError(

From 04e6b713ab5e4a607254f331a0ea1e331ae6c857 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 11 Oct 2023 14:15:40 +0900
Subject: [PATCH 055/521] [SPARK-45475][SQL][3.5] Uses
 DataFrame.foreachPartition instead of RDD.foreachPartition in JdbcUtils

This PR cherry-picks https://github.com/apache/spark/pull/43304 to branch-3.5

---

### What changes were proposed in this pull request?

This PR is kind of a followup for https://github.com/apache/spark/pull/39976 that addresses https://github.com/apache/spark/pull/39976#issuecomment-1752930380 comment.

### Why are the changes needed?

In order to probably assign the SQL execution ID so `df.observe` works with this.

### Does this PR introduce _any_ user-facing change?

Yes. `df.observe` will work with JDBC connectors.

### How was this patch tested?

Manually tested.

### Was this patch authored or co-authored using generative AI tooling?

Unit test was added.

Closes #43322 from HyukjinKwon/SPARK-45475-3.5.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/datasources/jdbc/JdbcUtils.scala  |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala   | 17 ++++++++++++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index b7019c1dcbe53..6e7298710a5d8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -898,7 +898,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case Some(n) if n < df.rdd.getNumPartitions => df.coalesce(n)
       case _ => df
     }
-    repartitionedDF.rdd.foreachPartition { iterator => savePartition(
+    repartitionedDF.foreachPartition { iterator: Iterator[Row] => savePartition(
       table, iterator, rddSchema, insertStmt, batchSize, dialect, isolationLevel, options)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 93b6652d516cc..eae171e20b706 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -29,7 +29,7 @@ import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
 
 import org.apache.spark.{SparkException, SparkSQLException}
-import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Observation, QueryTest, Row}
 import org.apache.spark.sql.catalyst.{analysis, TableIdentifier}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
@@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCo
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation, JdbcUtils}
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
+import org.apache.spark.sql.functions.{lit, percentile_approx}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -2057,4 +2058,18 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     val df = sql("SELECT * FROM composite_name WHERE `last name` = 'smith'")
     assert(df.collect.toSet === Set(Row("smith", 1)))
   }
+
+  test("SPARK-45475: saving a table via JDBC should work with observe API") {
+    val tableName = "test_table"
+    val namedObservation = Observation("named")
+    val observed_df = spark.range(100).observe(
+      namedObservation, percentile_approx($"id", lit(0.5), lit(100)).as("percentile_approx_val"))
+
+    observed_df.write.format("jdbc")
+      .option("url", urlWithUserAndPass)
+      .option("dbtable", tableName).save()
+
+    val expected = Map("percentile_approx_val" -> 49)
+    assert(namedObservation.get === expected)
+  }
 }

From 7e3ddc1e582a6e4fa96bab608c4c2bbc2c93b449 Mon Sep 17 00:00:00 2001
From: Jia Fan <fanjiaeminem@qq.com>
Date: Wed, 11 Oct 2023 19:33:23 +0300
Subject: [PATCH 056/521] [SPARK-45433][SQL] Fix CSV/JSON schema inference when
 timestamps do not match specified timestampFormat

### What changes were proposed in this pull request?
This PR fix CSV/JSON schema inference when timestamps do not match specified timestampFormat will report error.
```scala
//eg
val csv = spark.read.option("timestampFormat", "yyyy-MM-dd'T'HH:mm:ss")
  .option("inferSchema", true).csv(Seq("2884-06-24T02:45:51.138").toDS())
csv.show()
//error
Caused by: java.time.format.DateTimeParseException: Text '2884-06-24T02:45:51.138' could not be parsed, unparsed text found at index 19
```
This bug only happend when partition had one row. The data type should be `StringType` not `TimestampType` because the value not match `timestampFormat`.

Use csv as eg, in `CSVInferSchema::tryParseTimestampNTZ`, first, use `timestampNTZFormatter.parseWithoutTimeZoneOptional` to inferring return `TimestampType`, if same partition had another row, it will use `tryParseTimestamp` to parse row with user defined `timestampFormat`, then found it can't be convert to timestamp with `timestampFormat`. Finally return `StringType`. But when only one row, we use `timestampNTZFormatter.parseWithoutTimeZoneOptional` to parse normally timestamp not right. We should only parse it when `spark.sql.timestampType` is `TIMESTAMP_NTZ`. If `spark.sql.timestampType` is `TIMESTAMP_LTZ`, we should directly parse it use `tryParseTimestamp`. To avoid return `TimestampType` when timestamps do not match specified timestampFormat.

### Why are the changes needed?
Fix schema inference bug.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
add new test.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43243 from Hisoka-X/SPARK-45433-inference-mismatch-timestamp-one-row.

Authored-by: Jia Fan <fanjiaeminem@qq.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit eae5c0e1efce83c2bb08754784db070be285285a)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../apache/spark/sql/catalyst/csv/CSVInferSchema.scala |  9 ++++++---
 .../spark/sql/catalyst/json/JsonInferSchema.scala      |  8 +++++---
 .../spark/sql/catalyst/csv/CSVInferSchemaSuite.scala   | 10 ++++++++++
 .../spark/sql/catalyst/json/JsonInferSchemaSuite.scala |  8 ++++++++
 4 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 51586a0065e95..ec01b56f9eb7c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
@@ -202,8 +202,11 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
     // We can only parse the value as TimestampNTZType if it does not have zone-offset or
     // time-zone component and can be parsed with the timestamp formatter.
     // Otherwise, it is likely to be a timestamp with timezone.
-    if (timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
-      SQLConf.get.timestampType
+    val timestampType = SQLConf.get.timestampType
+    if ((SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY ||
+        timestampType == TimestampNTZType) &&
+        timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
+      timestampType
     } else {
       tryParseTimestamp(field)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 5385afe8c9353..4123c5290b6a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.json.JacksonUtils.nextUntil
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -148,11 +148,13 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
           val bigDecimal = decimalParser(field)
             DecimalType(bigDecimal.precision, bigDecimal.scale)
         }
+        val timestampType = SQLConf.get.timestampType
         if (options.prefersDecimal && decimalTry.isDefined) {
           decimalTry.get
-        } else if (options.inferTimestamp &&
+        } else if (options.inferTimestamp && (SQLConf.get.legacyTimeParserPolicy ==
+          LegacyBehaviorPolicy.LEGACY || timestampType == TimestampNTZType) &&
             timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
-          SQLConf.get.timestampType
+          timestampType
         } else if (options.inferTimestamp &&
             timestampFormatter.parseOptional(field).isDefined) {
           TimestampType
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index acedf7998c2d5..fb91200557a65 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -263,4 +263,14 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     inferSchema = new CSVInferSchema(options)
     assert(inferSchema.inferField(DateType, "2012_12_12") == DateType)
   }
+
+  test("SPARK-45433: inferring the schema when timestamps do not match specified timestampFormat" +
+    " with only one row") {
+    val options = new CSVOptions(
+      Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
+      columnPruning = false,
+      defaultTimeZoneId = "UTC")
+    val inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "2884-06-24T02:45:51.138") == StringType)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
index 8290b38e33934..81a4858dce82a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala
@@ -112,4 +112,12 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
     checkType(Map("inferTimestamp" -> "true"), json, TimestampType)
     checkType(Map("inferTimestamp" -> "false"), json, StringType)
   }
+
+  test("SPARK-45433: inferring the schema when timestamps do not match specified timestampFormat" +
+    " with only one row") {
+    checkType(
+      Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss", "inferTimestamp" -> "true"),
+      """{"a": "2884-06-24T02:45:51.138"}""",
+      StringType)
+  }
 }

From 249533bcc8c7fa7f578961ce21d4d7118565dfc1 Mon Sep 17 00:00:00 2001
From: srielau <serge@rielau.com>
Date: Thu, 12 Oct 2023 21:34:49 +0800
Subject: [PATCH 057/521] [SPARK-45132][SQL] Fix IDENTIFIER for function
 invocation

### What changes were proposed in this pull request?

Due to a quirk in the parser, in some cases, IDENTIFIER(<funcStr>)(<arg>) is not properly recognized as a function invocation.

The change is to remove the explicit IDENTIFIER-clause rule in the function invocation grammar and instead recognize
IDENTIFIER(<arg>) within visitFunctionCall.

### Why are the changes needed?

Function invocation support for IDENTIFIER is incomplete otherwise

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added new testcases to identifier-clause.sql

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #42888 from srielau/SPARK-45132.

Lead-authored-by: srielau <serge@rielau.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit f0b2e6da52113802f64f7879f207064d3bdbc7b0)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/parser/SqlBaseParser.g4      |  2 +-
 .../sql/catalyst/parser/AstBuilder.scala      | 43 +++++++++++--------
 .../identifier-clause.sql.out                 | 28 ++++++++++--
 .../sql-tests/inputs/identifier-clause.sql    |  3 +-
 .../results/identifier-clause.sql.out         | 27 +++++++++++-
 5 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 85dbc499fbde5..04128216be073 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -951,7 +951,6 @@ primaryExpression
     | qualifiedName DOT ASTERISK                                                               #star
     | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN                          #rowConstructor
     | LEFT_PAREN query RIGHT_PAREN                                                             #subqueryExpression
-    | IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN                                          #identifierClause
     | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
        (COMMA argument+=functionArgument)*)? RIGHT_PAREN
        (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)?
@@ -1176,6 +1175,7 @@ qualifiedNameList
 
 functionName
     : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
+    | identFunc=IDENTIFIER_KW   // IDENTIFIER itself is also a valid function name.
     | qualifiedName
     | FILTER
     | LEFT
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 83938632e534f..b80ea8fddcfe2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -2223,13 +2223,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
   }
 
-  /**
-   * Create an expression for the IDENTIFIER() clause.
-   */
-  override def visitIdentifierClause(ctx: IdentifierClauseContext): Expression = withOrigin(ctx) {
-    ExpressionWithUnresolvedIdentifier(expression(ctx.expression), UnresolvedAttribute(_))
-  }
-
   /**
    * Create a (windowed) Function expression.
    */
@@ -2251,19 +2244,31 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val filter = Option(ctx.where).map(expression(_))
     val ignoreNulls =
       Option(ctx.nullsOption).map(_.getType == SqlBaseParser.IGNORE).getOrElse(false)
-    val funcCtx = ctx.functionName
-    val func = withFuncIdentClause(
-      funcCtx,
-      ident => UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls)
-    )
 
-    // Check if the function is evaluated in a windowed context.
-    ctx.windowSpec match {
-      case spec: WindowRefContext =>
-        UnresolvedWindowExpression(func, visitWindowRef(spec))
-      case spec: WindowDefContext =>
-        WindowExpression(func, visitWindowDef(spec))
-      case _ => func
+    // Is this an IDENTIFIER clause instead of a function call?
+    if (ctx.functionName.identFunc != null &&
+      arguments.length == 1 && // One argument
+      ctx.setQuantifier == null && // No other clause
+      ctx.where == null &&
+      ctx.nullsOption == null &&
+      ctx.windowSpec == null) {
+      ExpressionWithUnresolvedIdentifier(arguments.head, UnresolvedAttribute(_))
+    } else {
+      // It's a function call
+      val funcCtx = ctx.functionName
+      val func = withFuncIdentClause(
+        funcCtx,
+        ident => UnresolvedFunction(ident, arguments, isDistinct, filter, ignoreNulls)
+      )
+
+      // Check if the function is evaluated in a windowed context.
+      ctx.windowSpec match {
+        case spec: WindowRefContext =>
+          UnresolvedWindowExpression(func, visitWindowRef(spec))
+        case spec: WindowDefContext =>
+          WindowExpression(func, visitWindowDef(spec))
+        case _ => func
+      }
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 00e2d8ff8ae75..f91d0a26cf8a4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -187,10 +187,11 @@ Project [coalesce(cast(null as int), 1) AS coalesce(NULL, 1)#x]
 
 
 -- !query
-SELECT IDENTIFIER('abs')(-1)
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1)
 -- !query analysis
-Project [abs(-1) AS abs(-1)#x]
-+- OneRowRelation
+Project [abs(c1#x) AS abs(c1)#x]
++- SubqueryAlias T
+   +- LocalRelation [c1#x]
 
 
 -- !query
@@ -664,6 +665,27 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`IDENTIFIER`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "`IDENTIFIER`('abs')"
+  } ]
+}
+
+
 -- !query
 CREATE TABLE IDENTIFIER(1)(c1 INT)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index a1bd500455de9..07ae157072938 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -36,7 +36,7 @@ DROP SCHEMA s;
 
 -- Function reference
 SELECT IDENTIFIER('COAL' || 'ESCE')(NULL, 1);
-SELECT IDENTIFIER('abs')(-1);
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1);
 SELECT * FROM IDENTIFIER('ra' || 'nge')(0, 1);
 
 -- Table DDL
@@ -107,6 +107,7 @@ SELECT IDENTIFIER('') FROM VALUES(1) AS T(``);
 VALUES(IDENTIFIER(CAST(NULL AS STRING)));
 VALUES(IDENTIFIER(1));
 VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)));
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1);
 
 CREATE TABLE IDENTIFIER(1)(c1 INT);
 CREATE TABLE IDENTIFIER('a.b.c')(c1 INT);
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 8eabb74da97ba..ed87f69fc5e6b 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -205,9 +205,9 @@ struct<coalesce(NULL, 1):int>
 
 
 -- !query
-SELECT IDENTIFIER('abs')(-1)
+SELECT IDENTIFIER('abs')(c1) FROM VALUES(-1) AS T(c1)
 -- !query schema
-struct<abs(-1):int>
+struct<abs(c1):int>
 -- !query output
 1
 
@@ -770,6 +770,29 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`IDENTIFIER`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "`IDENTIFIER`('abs')"
+  } ]
+}
+
+
 -- !query
 CREATE TABLE IDENTIFIER(1)(c1 INT)
 -- !query schema

From b5f3dc9e76082a81357555ace0c489df97e6f81a Mon Sep 17 00:00:00 2001
From: mayurb <mayurb@uber.com>
Date: Fri, 13 Oct 2023 10:17:56 +0800
Subject: [PATCH 058/521] =?UTF-8?q?[SPARK-45498][CORE]=20Followup:=20Ignor?=
 =?UTF-8?q?e=20task=20completion=20from=20old=20stage=20a=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
With [SPARK-45182](https://issues.apache.org/jira/browse/SPARK-45182), we added a fix for not letting laggard tasks of the older attempts of the indeterminate stage from marking the partition has completed in the map output tracker.

When a task is completed, the DAG scheduler also notifies all the task sets of the stage about that partition being completed. Tasksets would not schedule such tasks if they are not already scheduled. This is not correct for the indeterminate stage, since we want to re-run all the tasks on a re-attempt

### Why are the changes needed?
Since the partition is not completed by older attempts and the partition from the newer attempt also doesn't get scheduled, the stage will have to be rescheduled to complete that partition. Since the stage is indeterminate, all the partitions will be recomputed

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added check in existing unit test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43326 from mayurdb/indeterminateFix.

Authored-by: mayurb <mayurb@uber.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit fb3b707bc1c875c14ff7c6e7a3f39b5c4b852c86)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/scheduler/DAGScheduler.scala     | 6 +++---
 .../org/apache/spark/scheduler/DAGSchedulerSuite.scala      | 5 ++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index d73bb6339015b..d8adaae19b90d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -1847,9 +1847,9 @@ private[spark] class DAGScheduler(
       case Success =>
         // An earlier attempt of a stage (which is zombie) may still have running tasks. If these
         // tasks complete, they still count and we can mark the corresponding partitions as
-        // finished. Here we notify the task scheduler to skip running tasks for the same partition,
-        // to save resource.
-        if (task.stageAttemptId < stage.latestInfo.attemptNumber()) {
+        // finished if the stage is determinate. Here we notify the task scheduler to skip running
+        // tasks for the same partition to save resource.
+        if (!stage.isIndeterminate && task.stageAttemptId < stage.latestInfo.attemptNumber()) {
           taskScheduler.notifyPartitionCompletion(stageId, task.partitionId)
         }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index e351f8b95bbb0..9b7c5d5ace314 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -3169,13 +3169,16 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       makeMapStatus("hostB",
         2)))
 
-    // The second task of the  shuffle map stage 1 from 1st attempt succeeds
+    // The second task of the shuffle map stage 1 from 1st attempt succeeds
     runEvent(makeCompletionEvent(
       taskSets(1).tasks(1),
       Success,
       makeMapStatus("hostC",
         2)))
 
+    // Above task completion should not mark the partition 1 complete from 2nd attempt
+    assert(!tasksMarkedAsCompleted.contains(taskSets(3).tasks(1)))
+
     // This task completion should get ignored and partition 1 should be missing
     // for shuffle map stage 1
     assert(mapOutputTracker.findMissingPartitions(shuffleId2) == Some(Seq(1)))

From 370717702c9e7236aab4ea7135d8085cd4792e99 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 13 Oct 2023 14:05:23 +0800
Subject: [PATCH 059/521] [SPARK-45532][DOCS] Restore codetabs for the Protobuf
 Data Source Guide

### What changes were proposed in this pull request?

This PR restores the [Protobuf Data Source Guide](https://spark.apache.org/docs/latest/sql-data-sources-protobuf.html#python)'s code tabs which https://github.com/apache/spark/pull/40614 removed for markdown syntax fixes

In this PR, we introduce a hidden div to hold the code-block marker of markdown, then make both the liquid and markdown happy.

### Why are the changes needed?

improve doc readability and consistency.

### Does this PR introduce _any_ user-facing change?

yes, doc change

### How was this patch tested?

#### Doc build

![image](https://github.com/apache/spark/assets/8326978/8aefeee0-92b2-4048-a3f6-108e4c3f309d)

#### markdown editor and view

![image](https://github.com/apache/spark/assets/8326978/283b0820-390a-4540-8713-647c40f956ac)

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43361 from yaooqinn/SPARK-45532.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 0257b77528a3a0d0ba08df5363470e4bc5928b06)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/sql-data-sources-protobuf.md | 243 ++++++++++++++++++------------
 1 file changed, 150 insertions(+), 93 deletions(-)

diff --git a/docs/sql-data-sources-protobuf.md b/docs/sql-data-sources-protobuf.md
index f92a8f20b3570..c8ee139e344fe 100644
--- a/docs/sql-data-sources-protobuf.md
+++ b/docs/sql-data-sources-protobuf.md
@@ -18,7 +18,10 @@ license: |
   limitations under the License.
 ---
 
-Since Spark 3.4.0 release, [Spark SQL](https://spark.apache.org/docs/latest/sql-programming-guide.html) provides built-in support for reading and writing protobuf data.
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+Since Spark 3.4.0 release, [Spark SQL](sql-programming-guide.html) provides built-in support for reading and writing protobuf data.
 
 ## Deploying
 The `spark-protobuf` module is external and not included in `spark-submit` or `spark-shell` by default.
@@ -46,45 +49,53 @@ Kafka key-value record will be augmented with some metadata, such as the ingesti
 
 Spark SQL schema is generated based on the protobuf descriptor file or protobuf class passed to `from_protobuf` and `to_protobuf`. The specified protobuf class or protobuf descriptor file must match the data, otherwise, the behavior is undefined: it may fail or return arbitrary results.
 
-### Python
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+
+<div class="d-none">
+This div is only used to make markdown editor/viewer happy and does not display on web
+
 ```python
+</div>
+
+{% highlight python %}
+
 from pyspark.sql.protobuf.functions import from_protobuf, to_protobuf
 
-# `from_protobuf` and `to_protobuf` provides two schema choices. Via Protobuf descriptor file,
+# from_protobuf and to_protobuf provide two schema choices. Via Protobuf descriptor file,
 # or via shaded Java class.
 # give input .proto protobuf schema
-# syntax  = "proto3"
+# syntax = "proto3"
 # message AppEvent {
-#  string name = 1;
-#  int64 id = 2;
-#  string context = 3;
+#   string name = 1;
+#   int64 id = 2;
+#   string context = 3;
 # }
-
-df = spark\
-.readStream\
-.format("kafka")\
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
-.option("subscribe", "topic1")\
-.load()
+df = spark
+  .readStream
+  .format("kafka")\
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
 
 # 1. Decode the Protobuf data of schema `AppEvent` into a struct;
 # 2. Filter by column `name`;
 # 3. Encode the column `event` in Protobuf format.
 # The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
-output = df\
-.select(from_protobuf("value", "AppEvent", descriptorFilePath).alias("event"))\
-.where('event.name == "alice"')\
-.select(to_protobuf("event", "AppEvent", descriptorFilePath).alias("event"))
+output = df
+  .select(from_protobuf("value", "AppEvent", descriptorFilePath).alias("event"))
+  .where('event.name == "alice"')
+  .select(to_protobuf("event", "AppEvent", descriptorFilePath).alias("event"))
 
 # Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
 # class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
 # it may fail or return arbitrary result. To avoid conflicts, the jar file containing the
 # 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
 # https://github.com/rangadi/shaded-protobuf-classes.
-
-output = df\
-.select(from_protobuf("value", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))\
-.where('event.name == "alice"')
+output = df
+  .select(from_protobuf("value", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))
+  .where('event.name == "alice"')
 
 output.printSchema()
 # root
@@ -94,52 +105,66 @@ output.printSchema()
 #  |   |-- context: string (nullable = true)
 
 output = output
-.select(to_protobuf("event", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))
-
-query = output\
-.writeStream\
-.format("kafka")\
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
-.option("topic", "topic2")\
-.start()
+  .select(to_protobuf("event", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))
+
+query = output
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
+  .option("topic", "topic2")
+  .start()
+
+{% endhighlight %}
+
+<div class="d-none">
 ```
+</div>
+
+</div>
+
+<div data-lang="scala" markdown="1">
+
+<div class="d-none">
+This div is only used to make markdown editor/viewer happy and does not display on web
 
-### Scala
 ```scala
+</div>
+
+{% highlight scala %}
 import org.apache.spark.sql.protobuf.functions._
 
-// `from_protobuf` and `to_protobuf` provides two schema choices. Via Protobuf descriptor file,
+// `from_protobuf` and `to_protobuf` provides two schema choices. Via the protobuf descriptor file,
 // or via shaded Java class.
 // give input .proto protobuf schema
-// syntax  = "proto3"
+// syntax = "proto3"
 // message AppEvent {
-//    string name = 1;
-//    int64 id = 2;
-//    string context = 3;
+//   string name = 1;
+//   int64 id = 2;
+//   string context = 3;
 // }
 
 val df = spark
-.readStream
-.format("kafka")
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-.option("subscribe", "topic1")
-.load()
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
 
 // 1. Decode the Protobuf data of schema `AppEvent` into a struct;
 // 2. Filter by column `name`;
 // 3. Encode the column `event` in Protobuf format.
 // The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
 val output = df
-.select(from_protobuf($"value", "AppEvent", descriptorFilePath) as $"event")
-.where("event.name == \"alice\"")
-.select(to_protobuf($"user", "AppEvent", descriptorFilePath) as $"event")
+  .select(from_protobuf($"value", "AppEvent", descriptorFilePath) as $"event")
+  .where("event.name == \"alice\"")
+  .select(to_protobuf($"user", "AppEvent", descriptorFilePath) as $"event")
 
 val query = output
-.writeStream
-.format("kafka")
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-.option("topic", "topic2")
-.start()
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic2")
+  .start()
 
 // Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
 // class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
@@ -147,8 +172,8 @@ val query = output
 // 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
 // https://github.com/rangadi/shaded-protobuf-classes.
 var output = df
-.select(from_protobuf($"value", "org.example.protos..AppEvent") as $"event")
-.where("event.name == \"alice\"")
+  .select(from_protobuf($"value", "org.example.protos..AppEvent") as $"event")
+  .where("event.name == \"alice\"")
 
 output.printSchema()
 // root
@@ -160,43 +185,56 @@ output.printSchema()
 output = output.select(to_protobuf($"event", "org.sparkproject.spark_protobuf.protobuf.AppEvent") as $"event")
 
 val query = output
-.writeStream
-.format("kafka")
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-.option("topic", "topic2")
-.start()
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic2")
+  .start()
+
+{% endhighlight %}
+
+<div class="d-none">
 ```
+</div>
+</div>
+
+<div data-lang="java" markdown="1">
+
+<div class="d-none">
+This div is only used to make markdown editor/viewer happy and does not display on web
 
-### Java
 ```java
+</div>
+
+{% highlight java %}
 import static org.apache.spark.sql.functions.col;
 import static org.apache.spark.sql.protobuf.functions.*;
 
-// `from_protobuf` and `to_protobuf` provides two schema choices. Via Protobuf descriptor file,
+// `from_protobuf` and `to_protobuf` provides two schema choices. Via the protobuf descriptor file,
 // or via shaded Java class.
 // give input .proto protobuf schema
-// syntax  = "proto3"
+// syntax = "proto3"
 // message AppEvent {
-//  string name = 1;
-//  int64 id = 2;
-//  string context = 3;
+//   string name = 1;
+//   int64 id = 2;
+//   string context = 3;
 // }
 
 Dataset<Row> df = spark
-.readStream()
-.format("kafka")
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-.option("subscribe", "topic1")
-.load();
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load();
 
 // 1. Decode the Protobuf data of schema `AppEvent` into a struct;
 // 2. Filter by column `name`;
 // 3. Encode the column `event` in Protobuf format.
 // The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
 Dataset<Row> output = df
-.select(from_protobuf(col("value"), "AppEvent", descriptorFilePath).as("event"))
-.where("event.name == \"alice\"")
-.select(to_protobuf(col("event"), "AppEvent", descriptorFilePath).as("event"));
+  .select(from_protobuf(col("value"), "AppEvent", descriptorFilePath).as("event"))
+  .where("event.name == \"alice\"")
+  .select(to_protobuf(col("event"), "AppEvent", descriptorFilePath).as("event"));
 
 // Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
 // class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
@@ -204,10 +242,10 @@ Dataset<Row> output = df
 // 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
 // https://github.com/rangadi/shaded-protobuf-classes.
 Dataset<Row> output = df
-.select(
-  from_protobuf(col("value"),
-  "org.sparkproject.spark_protobuf.protobuf.AppEvent").as("event"))
-.where("event.name == \"alice\"")
+  .select(
+    from_protobuf(col("value"),
+    "org.sparkproject.spark_protobuf.protobuf.AppEvent").as("event"))
+  .where("event.name == \"alice\"")
 
 output.printSchema()
 // root
@@ -221,19 +259,28 @@ output = output.select(
   "org.sparkproject.spark_protobuf.protobuf.AppEvent").as("event"));
 
 StreamingQuery query = output
-.writeStream()
-.format("kafka")
-.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-.option("topic", "topic2")
-.start();
+  .writeStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic2")
+  .start();
+
+{% endhighlight %}
+
+<div class="d-none">
 ```
+</div>
+</div>
+
+</div>
 
 ## Supported types for Protobuf -> Spark SQL conversion
+
 Currently Spark supports reading [protobuf scalar types](https://developers.google.com/protocol-buffers/docs/proto3#scalar), [enum types](https://developers.google.com/protocol-buffers/docs/proto3#enum), [nested type](https://developers.google.com/protocol-buffers/docs/proto3#nested), and [maps type](https://developers.google.com/protocol-buffers/docs/proto3#maps) under messages of Protobuf.
 In addition to the these types, `spark-protobuf` also introduces support for Protobuf `OneOf` fields. which allows you to handle messages that can have multiple possible sets of fields, but only one set can be present at a time. This is useful for situations where the data you are working with is not always in the same format, and you need to be able to handle messages with different sets of fields without encountering errors.
 
-<table class="table">
-  <tr><th><b>Protobuf type</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Protobuf type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>boolean</td>
     <td>BooleanType</td>
@@ -282,16 +329,12 @@ In addition to the these types, `spark-protobuf` also introduces support for Pro
     <td>OneOf</td>
     <td>Struct</td>
   </tr>
-  <tr>
-    <td>Any</td>
-    <td>StructType</td>
-  </tr>
 </table>
 
 It also supports reading the following Protobuf types [Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#timestamp) and [Duration](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#duration)
 
-<table class="table">
-  <tr><th><b>Protobuf logical type</b></th><th><b>Protobuf schema</b></th><th><b>Spark SQL type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Protobuf logical type</b></th><th><b>Protobuf schema</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>duration</td>
     <td>MessageType{seconds: Long, nanos: Int}</td>
@@ -305,10 +348,11 @@ It also supports reading the following Protobuf types [Timestamp](https://develo
 </table>
 
 ## Supported types for Spark SQL -> Protobuf conversion
+
 Spark supports the writing of all Spark SQL types into Protobuf. For most types, the mapping from Spark types to Protobuf types is straightforward (e.g. IntegerType gets converted to int);
 
-<table class="table">
-  <tr><th><b>Spark SQL type</b></th><th><b>Protobuf type</b></th></tr>
+<table class="table table-striped">
+  <thead><tr><th><b>Spark SQL type</b></th><th><b>Protobuf type</b></th></tr></thead>
   <tr>
     <td>BooleanType</td>
     <td>boolean</td>
@@ -356,15 +400,23 @@ Spark supports the writing of all Spark SQL types into Protobuf. For most types,
 </table>
 
 ## Handling circular references protobuf fields
+
 One common issue that can arise when working with Protobuf data is the presence of circular references. In Protobuf, a circular reference occurs when a field refers back to itself or to another field that refers back to the original field. This can cause issues when parsing the data, as it can result in infinite loops or other unexpected behavior.
-To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed. 
+To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed.
 
 Setting `recursive.fields.max.depth` to 0 drops all recursive fields, setting it to 1 allows it to be recursed once, and setting it to 2 allows it to be recursed twice. A `recursive.fields.max.depth` value greater than 10 is not allowed, as it can lead to performance issues and even stack overflows.
 
 SQL Schema for the below protobuf message will vary based on the value of `recursive.fields.max.depth`.
 
-```proto
-syntax  = "proto3"
+<div data-lang="proto" markdown="1">
+<div class="d-none">
+This div is only used to make markdown editor/viewer happy and does not display on web
+
+```protobuf
+</div>
+
+{% highlight protobuf %}
+syntax = "proto3"
 message Person {
   string name = 1;
   Person bff = 2
@@ -376,4 +428,9 @@ message Person {
 0: struct<name: string, bff: null>
 1: struct<name string, bff: <name: string, bff: null>>
 2: struct<name string, bff: <name: string, bff: struct<name: string, bff: null>>> ...
-```
\ No newline at end of file
+
+{% endhighlight %}
+<div class="d-none">
+```
+</div>
+</div>
\ No newline at end of file

From e8d7497abadd0bccb4bd8e615aadc77fc2038566 Mon Sep 17 00:00:00 2001
From: Bobby Wang <wbo4958@gmail.com>
Date: Fri, 13 Oct 2023 10:50:18 -0500
Subject: [PATCH 060/521] [SPARK-45495][CORE] Support stage level task resource
 profile for k8s cluster when dynamic allocation disabled

### What changes were proposed in this pull request?
This PR is a follow-up of https://github.com/apache/spark/pull/37268 which supports stage-level task resource profile for standalone cluster when dynamic allocation is disabled. This PR enables stage-level task resource profile for the Kubernetes cluster.

### Why are the changes needed?

Users who work on spark ML/DL cases running on Kubernetes would expect stage-level task resource profile feature.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

The current tests of https://github.com/apache/spark/pull/37268 can also cover this PR since both Kubernetes and standalone cluster share the same TaskSchedulerImpl class which implements this feature. Apart from that, modifying the existing test to cover the Kubernetes cluster. Apart from that, I also performed some manual tests which have been updated in the comments.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43323 from wbo4958/k8s-stage-level.

Authored-by: Bobby Wang <wbo4958@gmail.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
(cherry picked from commit 632eabdb6dfb78c0a5dc84c01806548e1dc6dd0a)
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../apache/spark/resource/ResourceProfileManager.scala   | 7 ++++---
 .../spark/resource/ResourceProfileManagerSuite.scala     | 9 +++++++--
 docs/configuration.md                                    | 2 +-
 docs/running-on-kubernetes.md                            | 4 +++-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
index cd7124a572464..afbacb8013645 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -67,9 +67,10 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
    */
   private[spark] def isSupported(rp: ResourceProfile): Boolean = {
     if (rp.isInstanceOf[TaskResourceProfile] && !dynamicEnabled) {
-      if ((notRunningUnitTests || testExceptionThrown) && !(isStandaloneOrLocalCluster || isYarn)) {
-        throw new SparkException("TaskResourceProfiles are only supported for Standalone and " +
-          "Yarn cluster for now when dynamic allocation is disabled.")
+      if ((notRunningUnitTests || testExceptionThrown) &&
+        !(isStandaloneOrLocalCluster || isYarn || isK8s)) {
+        throw new SparkException("TaskResourceProfiles are only supported for Standalone, " +
+          "Yarn and Kubernetes cluster for now when dynamic allocation is disabled.")
       }
     } else {
       val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
index 77dc7bcb4c56e..7149267583bc5 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -137,8 +137,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     val error = intercept[SparkException] {
       rpmanager.isSupported(taskProf)
     }.getMessage
-    assert(error === "TaskResourceProfiles are only supported for Standalone " +
-      "and Yarn cluster for now when dynamic allocation is disabled.")
+    assert(error === "TaskResourceProfiles are only supported for Standalone, " +
+      "Yarn and Kubernetes cluster for now when dynamic allocation is disabled.")
 
     // Local cluster: supports task resource profile.
     conf.setMaster("local-cluster[1, 1, 1024]")
@@ -149,6 +149,11 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     conf.setMaster("yarn")
     rpmanager = new ResourceProfileManager(conf, listenerBus)
     assert(rpmanager.isSupported(taskProf))
+
+    // K8s: supports task resource profile.
+    conf.setMaster("k8s://foo")
+    rpmanager = new ResourceProfileManager(conf, listenerBus)
+    assert(rpmanager.isSupported(taskProf))
   }
 
   test("isSupported task resource profiles with dynamic allocation enabled") {
diff --git a/docs/configuration.md b/docs/configuration.md
index 74ddd6df0233c..4b0b9b3e3c260 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -3670,7 +3670,7 @@ See your cluster manager specific page for requirements and details on each of -
 # Stage Level Scheduling Overview
 
 The stage level scheduling feature allows users to specify task and executor resource requirements at the stage level. This allows for different stages to run with executors that have different resources. A prime example of this is one ETL stage runs with executors with just CPUs, the next stage is an ML stage that needs GPUs. Stage level scheduling allows for user to request different executors that have GPUs when the ML stage runs rather then having to acquire executors with GPUs at the start of the application and them be idle while the ETL stage is being run.
-This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN, Kubernetes and Standalone when dynamic allocation is enabled. When dynamic allocation is disabled, it allows users to specify different task resource requirements at stage level, and this is supported on YARN and Standalone cluster right now. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page or [Standalone](spark-standalone.html#stage-level-scheduling-overview) page for more implementation details.
+This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN, Kubernetes and Standalone when dynamic allocation is enabled. When dynamic allocation is disabled, it allows users to specify different task resource requirements at stage level, and this is supported on YARN, Kubernetes and Standalone cluster right now. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page or [Standalone](spark-standalone.html#stage-level-scheduling-overview) page for more implementation details.
 
 See the `RDD.withResources` and `ResourceProfileBuilder` API's for using this feature. When dynamic allocation is disabled, tasks with different task resource requirements will share executors with `DEFAULT_RESOURCE_PROFILE`. While when dynamic allocation is enabled, the current implementation acquires new executors for each `ResourceProfile`  created and currently has to be an exact match. Spark does not try to fit tasks into an executor that require a different ResourceProfile than the executor was created with. Executors that are not in use will idle timeout with the dynamic allocation logic. The default configuration for this feature is to only allow one ResourceProfile per stage. If the user associates more then 1 ResourceProfile to an RDD, Spark will throw an exception by default. See config `spark.scheduler.resource.profileMergeConflicts` to control that behavior. The current merge strategy Spark implements when `spark.scheduler.resource.profileMergeConflicts` is enabled is a simple max of each resource within the conflicting ResourceProfiles. Spark will create a new ResourceProfile with the max of each of the resources.
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 707a76196f3ab..38a745f1afca3 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1936,5 +1936,7 @@ With the above configuration, the job will be scheduled by YuniKorn scheduler in
 
 ### Stage Level Scheduling Overview
 
-Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
+Stage level scheduling is supported on Kubernetes:
+- When dynamic allocation is disabled: It allows users to specify different task resource requirements at the stage level and will use the same executors requested at startup.
+- When dynamic allocation is enabled: It allows users to specify task and executor resource requirements at the stage level and will request the extra executors. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
 Note, there is a difference in the way pod template resources are handled between the base default profile and custom ResourceProfiles. Any resources specified in the pod template file will only be used with the base default profile. If you create custom ResourceProfiles be sure to include all necessary resources there since the resources from the template file will not be propagated to custom ResourceProfiles.

From 146fba1a22e3f1555f3e4494522810030f9a7854 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Fri, 13 Oct 2023 13:29:45 +0800
Subject: [PATCH 061/521] [SPARK-45508][CORE] Add
 "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED" so Platform can access
 Cleaner on Java 9+

This PR adds `--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED` to our JVM flags so that we can access `jdk.internal.ref.Cleaner` in JDK 9+.

This allows Spark to allocate direct memory while ignoring the JVM's MaxDirectMemorySize limit. Spark uses JDK internal APIs to directly construct DirectByteBuffers while bypassing that limit, but there is a fallback path at https://github.com/apache/spark/blob/v3.5.0/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java#L213 that is used if we cannot reflectively access the `Cleaner` API.

No.

Added a unit test in `PlatformUtilSuite`.

No.

Closes #43344 from JoshRosen/SPARK-45508.

Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 96bac6c033b5bb37101ebcd8436ab9a84db8e092)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/java/org/apache/spark/unsafe/Platform.java    | 7 ++++++-
 .../java/org/apache/spark/unsafe/PlatformUtilSuite.java    | 7 +++++++
 .../java/org/apache/spark/launcher/JavaModuleOptions.java  | 1 +
 pom.xml                                                    | 1 +
 project/SparkBuild.scala                                   | 1 +
 5 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
index a91ea2ee6b5a8..e02346c477375 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java
@@ -96,7 +96,7 @@ public final class Platform {
         Method createMethod = cleanerClass.getMethod("create", Object.class, Runnable.class);
         // Accessing jdk.internal.ref.Cleaner should actually fail by default in JDK 9+,
         // unfortunately, unless the user has allowed access with something like
-        // --add-opens java.base/java.lang=ALL-UNNAMED  If not, we can't really use the Cleaner
+        // --add-opens java.base/jdk.internal.ref=ALL-UNNAMED  If not, we can't use the Cleaner
         // hack below. It doesn't break, just means the user might run into the default JVM limit
         // on off-heap memory and increase it or set the flag above. This tests whether it's
         // available:
@@ -118,6 +118,11 @@ public final class Platform {
     }
   }
 
+  // Visible for testing
+  public static boolean cleanerCreateMethodIsDefined() {
+    return CLEANER_CREATE_METHOD != null;
+  }
+
   /**
    * @return true when running JVM is having sun's Unsafe package available in it and underlying
    *         system having unaligned-access capability.
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index c59878fea9941..c99f2d85f4e54 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -157,4 +157,11 @@ public void heapMemoryReuse() {
     Assert.assertEquals(1024 * 1024 + 7, onheap4.size());
     Assert.assertEquals(obj3, onheap4.getBaseObject());
   }
+
+  @Test
+  public void cleanerCreateMethodIsDefined() {
+    // Regression test for SPARK-45508: we don't expect the "no cleaner" fallback
+    // path to be hit in normal usage.
+    Assert.assertTrue(Platform.cleanerCreateMethodIsDefined());
+  }
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
index 013dde2766f49..f6a9607e7c5d3 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
@@ -36,6 +36,7 @@ public class JavaModuleOptions {
       "--add-opens=java.base/java.util=ALL-UNNAMED",
       "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
       "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
+      "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED",
       "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
       "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
       "--add-opens=java.base/sun.security.action=ALL-UNNAMED",
diff --git a/pom.xml b/pom.xml
index 8fc4b89a78cc2..be8400c33bf2b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -308,6 +308,7 @@
       --add-opens=java.base/java.util=ALL-UNNAMED
       --add-opens=java.base/java.util.concurrent=ALL-UNNAMED
       --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED
+      --add-opens=java.base/jdk.internal.ref=ALL-UNNAMED
       --add-opens=java.base/sun.nio.ch=ALL-UNNAMED
       --add-opens=java.base/sun.nio.cs=ALL-UNNAMED
       --add-opens=java.base/sun.security.action=ALL-UNNAMED
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 563d53577548e..718f2bb28cec4 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1619,6 +1619,7 @@ object TestSettings {
         "--add-opens=java.base/java.util=ALL-UNNAMED",
         "--add-opens=java.base/java.util.concurrent=ALL-UNNAMED",
         "--add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED",
+        "--add-opens=java.base/jdk.internal.ref=ALL-UNNAMED",
         "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
         "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
         "--add-opens=java.base/sun.security.action=ALL-UNNAMED",

From daa3281e6a68845943fcf61ba7ad1d2d3c8be28f Mon Sep 17 00:00:00 2001
From: xieshuaihu <xieshuaihu@agora.io>
Date: Mon, 16 Oct 2023 17:01:18 +0900
Subject: [PATCH 062/521] [SPARK-45538][PYTHON][CONNECT] pyspark connect
 overwrite_partitions bug

Fix a bug in pyspark connect.

DataFrameWriterV2.overwritePartitions set mode as overwrite_partitions [pyspark/sql/connect/readwriter.py, line 825], but WirteOperationV2 take it as overwrite_partition [pyspark/sql/connect/plan.py, line 1660]

make dataframe.writeTo(table).overwritePartitions() work

No

No test. This bug is very obvious.

No

Closes #43367 from xieshuaihu/python_connect_overwrite.

Authored-by: xieshuaihu <xieshuaihu@agora.io>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9bdad31039134b492caeeba430120d5978a085ee)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/connect/plan.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index b7ea1f9499354..9af5823dd8b84 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1655,7 +1655,7 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE
             elif wm == "overwrite":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE
-            elif wm == "overwrite_partition":
+            elif wm == "overwrite_partitions":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS
             elif wm == "append":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_APPEND

From 0dc1962374dceea29a0fa7802881dfeff335d3c9 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Mon, 16 Oct 2023 17:10:26 +0800
Subject: [PATCH 063/521] [SPARK-44619][INFRA][3.5] Free up disk space for
 container jobs

### What changes were proposed in this pull request?
Free up disk space for container jobs

### Why are the changes needed?
increase the available disk space

before this PR
![image](https://github.com/apache/spark/assets/7322292/64230324-607b-4c1d-ac2d-84b9bcaab12a)

after this PR
![image](https://github.com/apache/spark/assets/7322292/aafed2d6-5d26-4f7f-b020-1efe4f551a8f)

### Does this PR introduce _any_ user-facing change?
No, infra-only

### How was this patch tested?
updated CI

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43381 from LuciferYang/SPARK-44619-35.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .github/workflows/build_and_test.yml |  6 +++++
 dev/free_disk_space_container        | 33 ++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)
 create mode 100755 dev/free_disk_space_container

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 1fcca7e4c3984..674e59508510c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -407,6 +407,8 @@ jobs:
         key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           pyspark-coursier-
+    - name: Free up disk space
+      run: ./dev/free_disk_space_container
     - name: Install Java ${{ matrix.java }}
       uses: actions/setup-java@v3
       with:
@@ -504,6 +506,8 @@ jobs:
         key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           sparkr-coursier-
+    - name: Free up disk space
+      run: ./dev/free_disk_space_container
     - name: Install Java ${{ inputs.java }}
       uses: actions/setup-java@v3
       with:
@@ -612,6 +616,8 @@ jobs:
         key: docs-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           docs-maven-
+    - name: Free up disk space
+      run: ./dev/free_disk_space_container
     - name: Install Java 8
       uses: actions/setup-java@v3
       with:
diff --git a/dev/free_disk_space_container b/dev/free_disk_space_container
new file mode 100755
index 0000000000000..cc3b74643e4fa
--- /dev/null
+++ b/dev/free_disk_space_container
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+echo "=================================="
+echo "Free up disk space on CI system"
+echo "=================================="
+
+echo "Listing 100 largest packages"
+dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n | tail -n 100
+df -h
+
+echo "Removing large packages"
+rm -rf /__t/CodeQL
+rm -rf /__t/go
+rm -rf /__t/node
+
+df -h

From b2103731bcfe7e0bee3b1302c773e46f80badcc9 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Tue, 17 Oct 2023 09:50:39 +0800
Subject: [PATCH 064/521] [SPARK-45484][SQL][3.5] Deprecated the incorrect
 parquet compression codec lz4raw

### What changes were proposed in this pull request?
According to the discussion at https://github.com/apache/spark/pull/43310#issuecomment-1757139681, this PR want deprecates the incorrect parquet compression codec `lz4raw` at Spark 3.5.1 and adds a warning log.

The warning log prompts users that `lz4raw` will be removed it at Apache Spark 4.0.0.

### Why are the changes needed?
Deprecated the incorrect parquet compression codec `lz4raw`.

### Does this PR introduce _any_ user-facing change?
'Yes'.
Users will see the waring log below.
`Parquet compression codec 'lz4raw' is deprecated, please use 'lz4_raw'`

### How was this patch tested?
Exists test cases and new test cases.

### Was this patch authored or co-authored using generative AI tooling?
'No'.

Closes #43330 from beliefer/SPARK-45484_3.5.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Jiaan Geng <beliefer@163.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   | 14 +++++++++--
 .../datasources/parquet/ParquetOptions.scala  |  8 +++++-
 .../datasources/FileSourceCodecSuite.scala    |  2 +-
 ...rquetCompressionCodecPrecedenceSuite.scala | 25 ++++++++++++++++---
 4 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 73d3756ef6b93..427d04801902f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -995,12 +995,22 @@ object SQLConf {
       "`parquet.compression` is specified in the table-specific options/properties, the " +
       "precedence would be `compression`, `parquet.compression`, " +
       "`spark.sql.parquet.compression.codec`. Acceptable values include: none, uncompressed, " +
-      "snappy, gzip, lzo, brotli, lz4, lz4raw, zstd.")
+      "snappy, gzip, lzo, brotli, lz4, lz4raw, lz4_raw, zstd.")
     .version("1.1.1")
     .stringConf
     .transform(_.toLowerCase(Locale.ROOT))
     .checkValues(
-      Set("none", "uncompressed", "snappy", "gzip", "lzo", "brotli", "lz4", "lz4raw", "zstd"))
+      Set(
+        "none",
+        "uncompressed",
+        "snappy",
+        "gzip",
+        "lzo",
+        "brotli",
+        "lz4",
+        "lz4raw",
+        "lz4_raw",
+        "zstd"))
     .createWithDefault("snappy")
 
   val PARQUET_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.parquet.filterPushdown")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index 023d2460959cd..95869b6fbb9d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.internal.SQLConf
@@ -32,7 +33,7 @@ import org.apache.spark.sql.internal.SQLConf
 class ParquetOptions(
     @transient private val parameters: CaseInsensitiveMap[String],
     @transient private val sqlConf: SQLConf)
-  extends FileSourceOptions(parameters) {
+  extends FileSourceOptions(parameters) with Logging {
 
   import ParquetOptions._
 
@@ -59,6 +60,9 @@ class ParquetOptions(
       throw new IllegalArgumentException(s"Codec [$codecName] " +
         s"is not available. Available codecs are ${availableCodecs.mkString(", ")}.")
     }
+    if (codecName == "lz4raw") {
+      log.warn("Parquet compression codec 'lz4raw' is deprecated, please use 'lz4_raw'")
+    }
     shortParquetCompressionCodecNames(codecName).name()
   }
 
@@ -96,7 +100,9 @@ object ParquetOptions extends DataSourceOptions {
     "lzo" -> CompressionCodecName.LZO,
     "brotli" -> CompressionCodecName.BROTLI,
     "lz4" -> CompressionCodecName.LZ4,
+    // Deprecated, to be removed at Spark 4.0.0, please use 'lz4_raw' instead.
     "lz4raw" -> CompressionCodecName.LZ4_RAW,
+    "lz4_raw" -> CompressionCodecName.LZ4_RAW,
     "zstd" -> CompressionCodecName.ZSTD)
 
   def getParquetCompressionCodecName(name: String): String = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
index 09a348cd29451..9f3d6ff48d477 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceCodecSuite.scala
@@ -59,7 +59,7 @@ class ParquetCodecSuite extends FileSourceCodecSuite {
   // Exclude "brotli" because the com.github.rdblue:brotli-codec dependency is not available
   // on Maven Central.
   override protected def availableCodecs: Seq[String] = {
-    Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4", "lz4raw")
+    Seq("none", "uncompressed", "snappy", "gzip", "zstd", "lz4", "lz4raw", "lz4_raw")
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
index ac0aad16f1eba..27e2816ce9d94 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
@@ -29,9 +29,23 @@ import org.apache.spark.sql.test.SharedSparkSession
 
 class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSparkSession {
   test("Test `spark.sql.parquet.compression.codec` config") {
-    Seq("NONE", "UNCOMPRESSED", "SNAPPY", "GZIP", "LZO", "LZ4", "BROTLI", "ZSTD").foreach { c =>
+    Seq(
+      "NONE",
+      "UNCOMPRESSED",
+      "SNAPPY",
+      "GZIP",
+      "LZO",
+      "LZ4",
+      "BROTLI",
+      "ZSTD",
+      "LZ4RAW",
+      "LZ4_RAW").foreach { c =>
       withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> c) {
-        val expected = if (c == "NONE") "UNCOMPRESSED" else c
+        val expected = c match {
+          case "NONE" => "UNCOMPRESSED"
+          case "LZ4RAW" => "LZ4_RAW"
+          case other => other
+        }
         val option = new ParquetOptions(Map.empty[String, String], spark.sessionState.conf)
         assert(option.compressionCodecClassName == expected)
       }
@@ -97,7 +111,10 @@ class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSpar
         createTableWithCompression(tempTableName, isPartitioned, compressionCodec, tmpDir)
         val partitionPath = if (isPartitioned) "p=2" else ""
         val path = s"${tmpDir.getPath.stripSuffix("/")}/$tempTableName/$partitionPath"
-        val realCompressionCodecs = getTableCompressionCodec(path)
+        val realCompressionCodecs = getTableCompressionCodec(path).map {
+          case "LZ4_RAW" if compressionCodec == "LZ4RAW" => "LZ4RAW"
+          case other => other
+        }
         assert(realCompressionCodecs.forall(_ == compressionCodec))
       }
     }
@@ -105,7 +122,7 @@ class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSpar
 
   test("Create parquet table with compression") {
     Seq(true, false).foreach { isPartitioned =>
-      val codecs = Seq("UNCOMPRESSED", "SNAPPY", "GZIP", "ZSTD", "LZ4")
+      val codecs = Seq("UNCOMPRESSED", "SNAPPY", "GZIP", "ZSTD", "LZ4", "LZ4RAW", "LZ4_RAW")
       codecs.foreach { compressionCodec =>
         checkCompressionCodec(compressionCodec, isPartitioned)
       }

From 22a83caa4896a8d03ec7e76b3e7a3bd08930adcb Mon Sep 17 00:00:00 2001
From: Bo Xiong <xiongbo@amazon.com>
Date: Tue, 17 Oct 2023 18:05:23 +0800
Subject: [PATCH 065/521] [SPARK-45283][CORE][TESTS][3.5] Make
 StatusTrackerSuite less fragile

### Why are the changes needed?

It's discovered from [Github Actions](https://github.com/xiongbo-sjtu/spark/actions/runs/6270601155/job/17028788767) that StatusTrackerSuite can run into random failures, as shown by the following error message.  The proposed fix is to update the unit test to remove the nondeterministic behavior.

The fix has been made to the master branch in https://github.com/apache/spark/pull/43194.  This PR is meant to patch branch-3.5 only.

```
    [info] StatusTrackerSuite:
    [info] - basic status API usage (99 milliseconds)
    [info] - getJobIdsForGroup() (56 milliseconds)
    [info] - getJobIdsForGroup() with takeAsync() (48 milliseconds)
    [info] - getJobIdsForGroup() with takeAsync() across multiple partitions (58 milliseconds)
    [info] - getJobIdsForTag() *** FAILED *** (10 seconds, 77 milliseconds)
    [info] The code passed to eventually never returned normally.
           Attempted 651 times over 10.005059944000001 seconds.
           Last failure message: Set(3, 2, 1) was not equal to Set(1, 2). (StatusTrackerSuite.scala:148)
```

Full trace can be found [here](https://issues.apache.org/jira/browse/SPARK-45283).

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
```
build/mvn package -DskipTests -pl core
build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.StatusTrackerSuite test
```

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43388 from xiongbo-sjtu/branch-3.5.

Authored-by: Bo Xiong <xiongbo@amazon.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../test/scala/org/apache/spark/StatusTrackerSuite.scala | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
index 0817abbc6a328..9019ea484b3f3 100644
--- a/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/StatusTrackerSuite.scala
@@ -140,16 +140,19 @@ class StatusTrackerSuite extends SparkFunSuite with Matchers with LocalSparkCont
     }
 
     sc.removeJobTag("tag1")
+
     // takeAsync() across multiple partitions
     val thirdJobFuture = sc.parallelize(1 to 1000, 2).takeAsync(999)
-    val thirdJobId = eventually(timeout(10.seconds)) {
-      thirdJobFuture.jobIds.head
+    val thirdJobIds = eventually(timeout(10.seconds)) {
+      // Wait for the two jobs triggered by takeAsync
+      thirdJobFuture.jobIds.size should be(2)
+      thirdJobFuture.jobIds
     }
     eventually(timeout(10.seconds)) {
       sc.statusTracker.getJobIdsForTag("tag1").toSet should be (
         Set(firstJobId, secondJobId))
       sc.statusTracker.getJobIdsForTag("tag2").toSet should be (
-        Set(secondJobId, thirdJobId))
+        Set(secondJobId) ++ thirdJobIds)
     }
   }
 }

From 6a5747d66e53ed0d934cdd9ca5c9bd9fde6868e6 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 17 Oct 2023 22:19:18 +0800
Subject: [PATCH 066/521] [SPARK-45568][TESTS] Fix flaky
 WholeStageCodegenSparkSubmitSuite

### What changes were proposed in this pull request?

WholeStageCodegenSparkSubmitSuite is [flaky](https://github.com/apache/spark/actions/runs/6479534195/job/17593342589) because SHUFFLE_PARTITIONS(200) creates 200 reducers for one total core and improper stop progress causes executor launcher reties. The heavy load and reties might result in timeout test failures.

### Why are the changes needed?

CI robustness

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing WholeStageCodegenSparkSubmitSuite
### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43394 from yaooqinn/SPARK-45568.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit f00ec39542a5f9ac75d8c24f0f04a7be703c8d7c)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../WholeStageCodegenSparkSubmitSuite.scala   | 57 ++++++++++---------
 1 file changed, 30 insertions(+), 27 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
index e253de76221ad..69145d890fc19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSparkSubmitSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.deploy.SparkSubmitTestUtils
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{QueryTest, Row, SparkSession}
 import org.apache.spark.sql.functions.{array, col, count, lit}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.Platform
@@ -70,39 +71,41 @@ class WholeStageCodegenSparkSubmitSuite extends SparkSubmitTestUtils
 
 object WholeStageCodegenSparkSubmitSuite extends Assertions with Logging {
 
-  var spark: SparkSession = _
-
   def main(args: Array[String]): Unit = {
     TestUtils.configTestLog4j2("INFO")
 
-    spark = SparkSession.builder().getOrCreate()
+    val spark = SparkSession.builder()
+      .config(SQLConf.SHUFFLE_PARTITIONS.key, "2")
+      .getOrCreate()
+
+    try {
+      // Make sure the test is run where the driver and the executors uses different object layouts
+      val driverArrayHeaderSize = Platform.BYTE_ARRAY_OFFSET
+      val executorArrayHeaderSize =
+        spark.sparkContext.range(0, 1).map(_ => Platform.BYTE_ARRAY_OFFSET).collect().head
+      assert(driverArrayHeaderSize > executorArrayHeaderSize)
 
-    // Make sure the test is run where the driver and the executors uses different object layouts
-    val driverArrayHeaderSize = Platform.BYTE_ARRAY_OFFSET
-    val executorArrayHeaderSize =
-      spark.sparkContext.range(0, 1).map(_ => Platform.BYTE_ARRAY_OFFSET).collect.head.toInt
-    assert(driverArrayHeaderSize > executorArrayHeaderSize)
+      val df = spark.range(71773).select((col("id") % lit(10)).cast(IntegerType) as "v")
+        .groupBy(array(col("v"))).agg(count(col("*")))
+      val plan = df.queryExecution.executedPlan
+      assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]))
 
-    val df = spark.range(71773).select((col("id") % lit(10)).cast(IntegerType) as "v")
-      .groupBy(array(col("v"))).agg(count(col("*")))
-    val plan = df.queryExecution.executedPlan
-    assert(plan.exists(_.isInstanceOf[WholeStageCodegenExec]))
+      val expectedAnswer =
+        Row(Array(0), 7178) ::
+          Row(Array(1), 7178) ::
+          Row(Array(2), 7178) ::
+          Row(Array(3), 7177) ::
+          Row(Array(4), 7177) ::
+          Row(Array(5), 7177) ::
+          Row(Array(6), 7177) ::
+          Row(Array(7), 7177) ::
+          Row(Array(8), 7177) ::
+          Row(Array(9), 7177) :: Nil
 
-    val expectedAnswer =
-      Row(Array(0), 7178) ::
-        Row(Array(1), 7178) ::
-        Row(Array(2), 7178) ::
-        Row(Array(3), 7177) ::
-        Row(Array(4), 7177) ::
-        Row(Array(5), 7177) ::
-        Row(Array(6), 7177) ::
-        Row(Array(7), 7177) ::
-        Row(Array(8), 7177) ::
-        Row(Array(9), 7177) :: Nil
-    val result = df.collect
-    QueryTest.sameRows(result.toSeq, expectedAnswer) match {
-      case Some(errMsg) => fail(errMsg)
-      case _ =>
+      QueryTest.checkAnswer(df, expectedAnswer)
+    } finally {
+      spark.stop()
     }
+
   }
 }

From 18599ea750f50e07a910487fb3a871ed69fb9cab Mon Sep 17 00:00:00 2001
From: Sean Owen <srowen@gmail.com>
Date: Tue, 17 Oct 2023 16:10:56 -0700
Subject: [PATCH 067/521] [MINOR][SQL] Remove signature from Hive thriftserver
 exception

### What changes were proposed in this pull request?

Don't return expected signature to caller in Hive thriftserver exception

### Why are the changes needed?

Please see private discussion

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43402 from srowen/HiveCookieSigner.

Authored-by: Sean Owen <srowen@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit cf59b1f51c16301f689b4e0f17ba4dbd140e1b19)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/java/org/apache/hive/service/CookieSigner.java    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
index 782e47a6cd902..4b8d2cb1536cd 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/CookieSigner.java
@@ -81,8 +81,7 @@ public String verifyAndExtract(String signedStr) {
       LOG.debug("Signature generated for " + rawValue + " inside verify is " + currentSignature);
     }
     if (!MessageDigest.isEqual(originalSignature.getBytes(), currentSignature.getBytes())) {
-      throw new IllegalArgumentException("Invalid sign, original = " + originalSignature +
-        " current = " + currentSignature);
+      throw new IllegalArgumentException("Invalid sign");
     }
     return rawValue;
   }

From ed2a4cc6033ac35faa7b19eb236a4c953543d519 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 18 Oct 2023 11:43:59 +0900
Subject: [PATCH 068/521] [MINOR][DOCS] Update the docs for
 spark.sql.optimizer.canChangeCachedPlanOutputPartitioning configuration

### What changes were proposed in this pull request?

This PR fixes the documentation for `spark.sql.optimizer.canChangeCachedPlanOutputPartitioning` configuration by saying this is enabled by default. This is a followup of https://github.com/apache/spark/pull/40390 (but did not use a JIRA due to fixed versions properties in the JIRA).

### Why are the changes needed?

To mention that this is enabled, to the end users.

### Does this PR introduce _any_ user-facing change?

No, it's an internal conf, not documented.

### How was this patch tested?

CI in this PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43411 from HyukjinKwon/fix-docs.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 0cb4a84f6ab0c1bd101e6bc72be82987bbc02e9b)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 427d04801902f..4ea0cd5bcc126 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -1529,7 +1529,7 @@ object SQLConf {
       .doc("Whether to forcibly enable some optimization rules that can change the output " +
         "partitioning of a cached query when executing it for caching. If it is set to true, " +
         "queries may need an extra shuffle to read the cached data. This configuration is " +
-        "disabled by default. Currently, the optimization rules enabled by this configuration " +
+        "enabled by default. The optimization rules enabled by this configuration " +
         s"are ${ADAPTIVE_EXECUTION_ENABLED.key} and ${AUTO_BUCKETED_SCAN_ENABLED.key}.")
       .version("3.2.0")
       .booleanConf

From 2f66851972c2fc66b053a8c78bc2814b7bb4257f Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 19 Oct 2023 09:43:13 +0800
Subject: [PATCH 069/521] [SPARK-45585][TEST] Fix time format and redirection
 issues in SparkSubmit tests

### What changes were proposed in this pull request?

This PR fixes:

- The deviation from `new Timestamp(new Date().getTime)` and log4j2 date format pattern from sub spark-submit progress
```
2023-10-17 03:58:48.275 - stderr> 23/10/17 18:58:48 INFO StandaloneSchedulerBackend: Connected to Spark cluster with app ID app-20231017185848-0000
2023-10-17 03:58:48.278 - stderr> 23/10/17 18:58:48 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 57637.
```
- The duplication of `new Timestamp(new Date().getTime)` when using logInfo instead of println
```
23/10/17 19:02:34.392 Thread-5 INFO SparkShellSuite: 2023-10-17 04:02:34.392 - stderr> 23/10/17 19:02:34 WARN Utils: Your hostname, hulk.local resolves to a loopback address: 127.0.0.1; using 10.221.103.23 instead (on interface en0)
23/10/17 19:02:34.393 Thread-5 INFO SparkShellSuite: 2023-10-17 04:02:34.393 - stderr> 23/10/17 19:02:34 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address

```
- Correctly redirects sub spark-submit progress logs to unit-tests.log

### Why are the changes needed?

test fixes

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

- WholeStageCodegenSparkSubmitSuite - before

```
18:58:53.882 shutdown-hook-0 INFO ShutdownHookManager: Shutdown hook called
18:58:53.882 shutdown-hook-0 INFO ShutdownHookManager: Deleting directory /Users/hzyaoqin/spark/target/tmp/spark-ecd53d47-d109-4ddc-80dd-2d829f34371e
11:58:18.892 pool-1-thread-1 WARN Utils: Your hostname, hulk.local resolves to a loopback address: 127.0.0.1; using 10.221.103.23 instead (on interface en0)
11:58:18.893 pool-1-thread-1 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
11:58:18.932 pool-1-thread-1-ScalaTest-running-WholeStageCodegenSparkSubmitSuite INFO WholeStageCodegenSparkSubmitSuite:
```

- WholeStageCodegenSparkSubmitSuite - after
```
===== TEST OUTPUT FOR o.a.s.sql.execution.WholeStageCodegenSparkSubmitSuite: 'Generated code on driver should not embed platform-specific constant' =====

11:58:19.882 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:19 WARN Utils: Your hostname, hulk.local resolves to a loopback address: 127.0.0.1; using 10.221.103.23 instead (on interface en0)
11:58:19.883 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:19 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
11:58:20.195 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SparkContext: Running Spark version 4.0.0-SNAPSHOT
11:58:20.195 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SparkContext: OS info Mac OS X, 13.4, aarch64
11:58:20.195 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SparkContext: Java version 17.0.8
11:58:20.227 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
11:58:20.253 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceUtils: ==============================================================
11:58:20.253 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceUtils: No custom resources configured for spark.driver.
11:58:20.253 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceUtils: ==============================================================
11:58:20.254 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SparkContext: Submitted application: org.apache.spark.sql.execution.WholeStageCodegenSparkSubmitSuite
11:58:20.266 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceProfile: Default ResourceProfile created, executor resources: Map(memory -> name: memory, amount: 1024, script: , vendor: , offHeap -> name: offHeap, amount: 0, script: , vendor: ), task resources: Map(cpus -> name: cpus, amount: 1.0)
11:58:20.268 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceProfile: Limiting resource is cpu
11:58:20.268 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO ResourceProfileManager: Added ResourceProfile id: 0
11:58:20.302 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SecurityManager: Changing view acls to: hzyaoqin
11:58:20.302 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SecurityManager: Changing modify acls to: hzyaoqin
11:58:20.303 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SecurityManager: Changing view acls groups to:
11:58:20.303 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SecurityManager: Changing modify acls groups to:
11:58:20.305 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: hzyaoqin; groups with view permissions: EMPTY; users with modify permissions: hzyaoqin; groups with modify permissions: EMPTY; RPC SSL disabled
11:58:20.448 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO Utils: Successfully started service 'sparkDriver' on port 52173.
11:58:20.465 Thread-6 INFO WholeStageCodegenSparkSubmitSuite: stderr> 23/10/18 11:58:20 INFO SparkEnv: Registering MapOutputTracker

```

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43421 from yaooqinn/SPARK-45585.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit a14f90941caf06e2d77789a3952dd588e6900b90)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/deploy/SparkSubmitTestUtils.scala       | 15 ++-------------
 .../org/apache/spark/repl/SparkShellSuite.scala   | 11 ++++-------
 .../spark/sql/hive/thriftserver/CliSuite.scala    |  9 ++-------
 3 files changed, 8 insertions(+), 27 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitTestUtils.scala
index 2ab2e17df03a8..932e972374cae 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitTestUtils.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.deploy
 
 import java.io.File
-import java.sql.Timestamp
-import java.util.Date
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -69,17 +67,8 @@ trait SparkSubmitTestUtils extends SparkFunSuite with TimeLimits {
     env.put("SPARK_HOME", sparkHome)
 
     def captureOutput(source: String)(line: String): Unit = {
-      // This test suite has some weird behaviors when executed on Jenkins:
-      //
-      // 1. Sometimes it gets extremely slow out of unknown reason on Jenkins.  Here we add a
-      //    timestamp to provide more diagnosis information.
-      // 2. Log lines are not correctly redirected to unit-tests.log as expected, so here we print
-      //    them out for debugging purposes.
-      val logLine = s"${new Timestamp(new Date().getTime)} - $source> $line"
-      // scalastyle:off println
-      println(logLine)
-      // scalastyle:on println
-      history += logLine
+      logInfo(s"$source> $line")
+      history += line
     }
 
     val process = builder.start()
diff --git a/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
index 39544beec4154..067f08cb67528 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.repl
 
 import java.io._
 import java.nio.charset.StandardCharsets
-import java.sql.Timestamp
-import java.util.Date
 
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Promise
@@ -70,10 +68,9 @@ class SparkShellSuite extends SparkFunSuite {
     val lock = new Object
 
     def captureOutput(source: String)(line: String): Unit = lock.synchronized {
-      // This test suite sometimes gets extremely slow out of unknown reason on Jenkins.  Here we
-      // add a timestamp to provide more diagnosis information.
-      val newLine = s"${new Timestamp(new Date().getTime)} - $source> $line"
-      log.info(newLine)
+      val newLine = s"$source> $line"
+
+      logInfo(newLine)
       buffer += newLine
 
       if (line.startsWith("Spark context available") && line.contains("app id")) {
@@ -82,7 +79,7 @@ class SparkShellSuite extends SparkFunSuite {
 
       // If we haven't found all expected answers and another expected answer comes up...
       if (next < expectedAnswers.size && line.contains(expectedAnswers(next))) {
-        log.info(s"$source> found expected output line $next: '${expectedAnswers(next)}'")
+        logInfo(s"$source> found expected output line $next: '${expectedAnswers(next)}'")
         next += 1
         // If all expected answers have been found...
         if (next == expectedAnswers.size) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index d3a9a9f08411c..8ba9ea28a5a96 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.io._
 import java.nio.charset.StandardCharsets
-import java.sql.Timestamp
-import java.util.Date
 import java.util.concurrent.CountDownLatch
 
 import scala.collection.JavaConverters._
@@ -145,11 +143,8 @@ class CliSuite extends SparkFunSuite {
     val lock = new Object
 
     def captureOutput(source: String)(line: String): Unit = lock.synchronized {
-      // This test suite sometimes gets extremely slow out of unknown reason on Jenkins.  Here we
-      // add a timestamp to provide more diagnosis information.
-      val newLine = s"${new Timestamp(new Date().getTime)} - $source> $line"
-      log.info(newLine)
-      buffer += newLine
+      logInfo(s"$source> $line")
+      buffer += line
 
       if (line.startsWith("Spark master: ") && line.contains("Application Id: ")) {
         foundMasterAndApplicationIdMessage.trySuccess(())

From 17d283990b64614828838afa718f48b855ab7842 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Thu, 19 Oct 2023 13:57:01 +0900
Subject: [PATCH 070/521] [SPARK-45553][PS] Deprecate
 `assertPandasOnSparkEqual`

### What changes were proposed in this pull request?

This PR proposes to deprecate `assertPandasOnSparkEqual`.

### Why are the changes needed?

Now we have more pandas friendly testing utils such as `ps.testing.assert_frame_equal`, `ps.testing.assert_series_equal` and `ps.testing.assert_index_equal`.

### Does this PR introduce _any_ user-facing change?

Not for now, but `assertPandasOnSparkEqual` will be removed in the future version.

### How was this patch tested?

The existing CI should pass.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43426 from itholic/SPARK-45553.

Authored-by: Haejoon Lee <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit f3e280b952da8b8ab6c78371f3715cc674a73bc1)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/testing/pandasutils.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index c80ffb7ee53cb..04a523bce7640 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -365,6 +365,11 @@ def assertPandasOnSparkEqual(
 
     .. versionadded:: 3.5.0
 
+    .. deprecated:: 3.5.1
+        `assertPandasOnSparkEqual` will be removed in Spark 4.0.0.
+        Use `ps.testing.assert_frame_equal`, `ps.testing.assert_series_equal`
+        and `ps.testing.assert_index_equal` instead.
+
     Parameters
     ----------
     actual: pandas-on-Spark DataFrame, Series, or Index
@@ -417,6 +422,12 @@ def assertPandasOnSparkEqual(
     >>> s2 = ps.Index([212.3, 100.0001])
     >>> assertPandasOnSparkEqual(s1, s2, almost=True)  # pass, ps.Index obj are almost equal
     """
+    warnings.warn(
+        "`assertPandasOnSparkEqual` will be removed in Spark 4.0.0. "
+        "Use `ps.testing.assert_frame_equal`, `ps.testing.assert_series_equal` "
+        "and `ps.testing.assert_index_equal` instead.",
+        FutureWarning,
+    )
     if actual is None and expected is None:
         return True
     elif actual is None or expected is None:

From feb48dc146d8a89882875f25115af52e8295dfcc Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Thu, 19 Oct 2023 20:16:21 +0800
Subject: [PATCH 071/521] [SPARK-45543][SQL] `InferWindowGroupLimit` causes bug
 if the other window functions haven't the same window frame as the rank-like
 functions

### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/38799 Introduce the group limit of Window for rank-based filter to optimize top-k computation.
But it causes a bug if window expressions exists non-rank function which has the window frame is not the same as `(UnboundedPreceding, CurrentRow)`.
Please see the detail at https://issues.apache.org/jira/browse/SPARK-45543.

### Why are the changes needed?
Fix the bug.

### Does this PR introduce _any_ user-facing change?
'Yes'.

### How was this patch tested?
New test cases.

### Was this patch authored or co-authored using generative AI tooling?
'No'.

Closes #43385 from beliefer/SPARK-45543.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Jiaan Geng <beliefer@163.com>
(cherry picked from commit d6d4e52ecc3015b41c51bc7e4e122696c76b06ee)
Signed-off-by: Jiaan Geng <beliefer@163.com>
---
 .../optimizer/InferWindowGroupLimit.scala     |  18 ++-
 .../sql/DataFrameWindowFunctionsSuite.scala   | 112 ++++++++++++++++++
 2 files changed, 126 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
index 261be2914630e..04204c6a2e108 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
@@ -52,23 +52,33 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
     if (limits.nonEmpty) Some(limits.min) else None
   }
 
-  private def support(
+  /**
+   * All window expressions should use the same expanding window, so that
+   * we can safely do the early stop.
+   */
+  private def isExpandingWindow(
       windowExpression: NamedExpression): Boolean = windowExpression match {
-    case Alias(WindowExpression(_: Rank | _: DenseRank | _: RowNumber, WindowSpecDefinition(_, _,
+    case Alias(WindowExpression(_, WindowSpecDefinition(_, _,
     SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true
     case _ => false
   }
 
+  private def support(windowFunction: Expression): Boolean = windowFunction match {
+    case _: Rank | _: DenseRank | _: RowNumber => true
+    case _ => false
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = {
     if (conf.windowGroupLimitThreshold == -1) return plan
 
     plan.transformWithPruning(_.containsAllPatterns(FILTER, WINDOW), ruleId) {
       case filter @ Filter(condition,
         window @ Window(windowExpressions, partitionSpec, orderSpec, child))
-        if !child.isInstanceOf[WindowGroupLimit] && windowExpressions.exists(support) &&
+        if !child.isInstanceOf[WindowGroupLimit] && windowExpressions.forall(isExpandingWindow) &&
           orderSpec.nonEmpty =>
         val limits = windowExpressions.collect {
-          case alias @ Alias(WindowExpression(rankLikeFunction, _), _) if support(alias) =>
+          case alias @ Alias(WindowExpression(rankLikeFunction, _), _)
+            if support(rankLikeFunction) =>
             extractLimits(condition, alias.toAttribute).map((_, rankLikeFunction))
         }.flatten
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index a57e927ba8427..47380db421734 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -1521,4 +1521,116 @@ class DataFrameWindowFunctionsSuite extends QueryTest
       assert(windows.size === 1)
     }
   }
+
+  test("SPARK-45543: InferWindowGroupLimit causes bug " +
+    "if the other window functions haven't the same window frame as the rank-like functions") {
+    val df = Seq(
+      (1, "Dave", 1, 2020),
+      (2, "Dave", 1, 2021),
+      (3, "Dave", 2, 2022),
+      (4, "Dave", 3, 2023),
+      (5, "Dave", 3, 2024),
+      (6, "Mark", 2, 2022),
+      (7, "Mark", 3, 2023),
+      (8, "Mark", 3, 2024),
+      (9, "Amy", 6, 2021),
+      (10, "Amy", 5, 2022),
+      (11, "Amy", 6, 2023),
+      (12, "Amy", 7, 2024),
+      (13, "John", 7, 2024)).toDF("id", "name", "score", "year")
+
+    val window = Window.partitionBy($"year").orderBy($"score".desc)
+    val window2 = window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
+    val window3 = window.rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+
+    Seq(-1, 100).foreach { threshold =>
+      withSQLConf(SQLConf.WINDOW_GROUP_LIMIT_THRESHOLD.key -> threshold.toString) {
+        // The other window functions have the same window frame as the rank-like functions.
+        // df2, df3 and df4 can apply InferWindowGroupLimit
+        val df2 = df
+          .withColumn("rn", row_number().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window2))
+          .sort($"year")
+
+        checkAnswer(df2.filter("rn=1"), Seq(
+          Row(1, "Dave", 1, 2020, 1, Array(1)),
+          Row(9, "Amy", 6, 2021, 1, Array(6)),
+          Row(10, "Amy", 5, 2022, 1, Array(5)),
+          Row(11, "Amy", 6, 2023, 1, Array(6)),
+          Row(12, "Amy", 7, 2024, 1, Array(7))
+        ))
+
+        val df3 = df
+          .withColumn("rank", rank().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window2))
+          .sort($"year")
+
+        checkAnswer(df3.filter("rank=2"), Seq(
+          Row(2, "Dave", 1, 2021, 2, Array(6, 1)),
+          Row(3, "Dave", 2, 2022, 2, Array(5, 2)),
+          Row(6, "Mark", 2, 2022, 2, Array(5, 2, 2)),
+          Row(4, "Dave", 3, 2023, 2, Array(6, 3)),
+          Row(7, "Mark", 3, 2023, 2, Array(6, 3, 3))
+        ))
+
+        val df4 = df
+          .withColumn("rank", dense_rank().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window2))
+          .sort($"year")
+
+        checkAnswer(df4.filter("rank=2"), Seq(
+          Row(2, "Dave", 1, 2021, 2, Array(6, 1)),
+          Row(3, "Dave", 2, 2022, 2, Array(5, 2)),
+          Row(6, "Mark", 2, 2022, 2, Array(5, 2, 2)),
+          Row(4, "Dave", 3, 2023, 2, Array(6, 3)),
+          Row(7, "Mark", 3, 2023, 2, Array(6, 3, 3)),
+          Row(5, "Dave", 3, 2024, 2, Array(7, 7, 3)),
+          Row(8, "Mark", 3, 2024, 2, Array(7, 7, 3, 3))
+        ))
+
+        // The other window functions haven't the same window frame as the rank-like functions.
+        // df5, df6 and df7 cannot apply InferWindowGroupLimit
+        val df5 = df
+          .withColumn("rn", row_number().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window3))
+          .sort($"year")
+
+        checkAnswer(df5.filter("rn=1"), Seq(
+          Row(1, "Dave", 1, 2020, 1, Array(1)),
+          Row(9, "Amy", 6, 2021, 1, Array(6, 1)),
+          Row(10, "Amy", 5, 2022, 1, Array(5, 2, 2)),
+          Row(11, "Amy", 6, 2023, 1, Array(6, 3, 3)),
+          Row(12, "Amy", 7, 2024, 1, Array(7, 7, 3, 3))
+        ))
+
+        val df6 = df
+          .withColumn("rank", rank().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window3))
+          .sort($"year")
+
+        checkAnswer(df6.filter("rank=2"), Seq(
+          Row(2, "Dave", 1, 2021, 2, Array(6, 1)),
+          Row(3, "Dave", 2, 2022, 2, Array(5, 2, 2)),
+          Row(6, "Mark", 2, 2022, 2, Array(5, 2, 2)),
+          Row(4, "Dave", 3, 2023, 2, Array(6, 3, 3)),
+          Row(7, "Mark", 3, 2023, 2, Array(6, 3, 3))
+        ))
+
+        val df7 = df
+          .withColumn("rank", dense_rank().over(window))
+          .withColumn("all_scores", collect_list($"score").over(window3))
+          .sort($"year")
+
+        checkAnswer(df7.filter("rank=2"), Seq(
+          Row(2, "Dave", 1, 2021, 2, Array(6, 1)),
+          Row(3, "Dave", 2, 2022, 2, Array(5, 2, 2)),
+          Row(6, "Mark", 2, 2022, 2, Array(5, 2, 2)),
+          Row(4, "Dave", 3, 2023, 2, Array(6, 3, 3)),
+          Row(7, "Mark", 3, 2023, 2, Array(6, 3, 3)),
+          Row(5, "Dave", 3, 2024, 2, Array(7, 7, 3, 3)),
+          Row(8, "Mark", 3, 2024, 2, Array(7, 7, 3, 3))
+        ))
+      }
+    }
+  }
 }

From f47b63c6a62fb6f1fd894f64736847719af7a199 Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Fri, 20 Oct 2023 08:36:42 +0800
Subject: [PATCH 072/521] [SPARK-45584][SQL] Fix subquery execution failure
 with TakeOrderedAndProjectExec

This PR fixes a bug when there are subqueries in `TakeOrderedAndProjectExec`. The executeCollect method does not wait for subqueries to finish and it can result in IllegalArgumentException when executing a simple query.
For example this query:
```
WITH t2 AS (
  SELECT * FROM t1 ORDER BY id
)
SELECT *, (SELECT COUNT(*) FROM t2) FROM t2 LIMIT 10
```
will fail with this error
```
 java.lang.IllegalArgumentException: requirement failed: Subquery subquery#242, [id=#109] has not finished
```

To fix a bug.

No

New unit test

No

Closes #43419 from allisonwang-db/spark-45584-subquery-failure.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 8fd915ffaba1cc99813cc8d6d2a28688d7fae39b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/execution/limit.scala    |  2 +-
 .../org/apache/spark/sql/SubquerySuite.scala  | 24 +++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 877f6508d963f..77135d21a26ab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -282,7 +282,7 @@ case class TakeOrderedAndProjectExec(
     projectList.map(_.toAttribute)
   }
 
-  override def executeCollect(): Array[InternalRow] = {
+  override def executeCollect(): Array[InternalRow] = executeQuery {
     val orderingSatisfies = SortOrder.orderingSatisfies(child.outputOrdering, sortOrder)
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
     val limited = if (orderingSatisfies) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index d235d2a15fea3..a7a0f6156cb1d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -2712,4 +2712,28 @@ class SubquerySuite extends QueryTest
         expected)
     }
   }
+
+  test("SPARK-45584: subquery execution should not fail with ORDER BY and LIMIT") {
+    withTable("t1") {
+      sql(
+        """
+          |CREATE TABLE t1 USING PARQUET
+          |AS SELECT * FROM VALUES
+          |(1, "a"),
+          |(2, "a"),
+          |(3, "a") t(id, value)
+          |""".stripMargin)
+      val df = sql(
+        """
+          |WITH t2 AS (
+          |  SELECT * FROM t1 ORDER BY id
+          |)
+          |SELECT *, (SELECT COUNT(*) FROM t2) FROM t2 LIMIT 10
+          |""".stripMargin)
+      // This should not fail with IllegalArgumentException.
+      checkAnswer(
+        df,
+        Row(1, "a", 3) :: Row(2, "a", 3) :: Row(3, "a", 3) :: Nil)
+    }
+  }
 }

From 6a3a8f5750e1db3d4dea3a9d12795764035b953a Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Fri, 20 Oct 2023 15:01:15 +0900
Subject: [PATCH 073/521] [SPARK-45553][3.5][PS] Correct warning messages

### What changes were proposed in this pull request?

This followups for https://github.com/apache/spark/pull/43426.

### Why are the changes needed?

To remove incorrect context from the warning message.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The existing CI should pass

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43459 from itholic/45553-remove.

Authored-by: Haejoon Lee <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/testing/pandasutils.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index 04a523bce7640..2463289d59f71 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -367,8 +367,6 @@ def assertPandasOnSparkEqual(
 
     .. deprecated:: 3.5.1
         `assertPandasOnSparkEqual` will be removed in Spark 4.0.0.
-        Use `ps.testing.assert_frame_equal`, `ps.testing.assert_series_equal`
-        and `ps.testing.assert_index_equal` instead.
 
     Parameters
     ----------
@@ -423,9 +421,7 @@ def assertPandasOnSparkEqual(
     >>> assertPandasOnSparkEqual(s1, s2, almost=True)  # pass, ps.Index obj are almost equal
     """
     warnings.warn(
-        "`assertPandasOnSparkEqual` will be removed in Spark 4.0.0. "
-        "Use `ps.testing.assert_frame_equal`, `ps.testing.assert_series_equal` "
-        "and `ps.testing.assert_index_equal` instead.",
+        "`assertPandasOnSparkEqual` will be removed in Spark 4.0.0. ",
         FutureWarning,
     )
     if actual is None and expected is None:

From 6c55a6c0c680f80a6cdef7f1a83045b6400b4d09 Mon Sep 17 00:00:00 2001
From: Zamil Majdy <zamil.majdy@databricks.com>
Date: Sun, 22 Oct 2023 10:53:22 +0500
Subject: [PATCH 074/521] [SPARK-45604][SQL] Add LogicalType checking on INT64
 -> DateTime conversion on Parquet Vectorized Reader

### What changes were proposed in this pull request?

Currently, the read logical type is not checked while converting physical types INT64 into DateTime. One valid scenario where this can break is where the physical type is `timestamp_ntz`, and the logical type is `array<timestamp_ntz>`, since the logical type check does not happen, this conversion is allowed. However, the vectorized reader does not support this and will produce NPE on on-heap memory mode and SEGFAULT on off-heap memory mode. Segmentation fault on off-heap memory mode can be prevented by having an explicit boundary check on OffHeapColumnVector, but this is outside of the scope of this PR, and will be done here: https://github.com/apache/spark/pull/43452.

### Why are the changes needed?
Prevent NPE or Segfault from happening.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

A new test is added in `ParquetSchemaSuite`.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43451 from majdyz/SPARK-45604.

Lead-authored-by: Zamil Majdy <zamil.majdy@databricks.com>
Co-authored-by: Zamil Majdy <zamil.majdy@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 13b67ee8cc377a5cc47d02b9addbc00eabfc8b6c)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../parquet/ParquetVectorUpdaterFactory.java  | 10 +++++++--
 .../parquet/ParquetSchemaSuite.scala          | 21 +++++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
index 15d58f0c7572a..42442cf8ea8a4 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
@@ -109,7 +109,8 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa
           // For unsigned int64, it stores as plain signed int64 in Parquet when dictionary
           // fallbacks. We read them as decimal values.
           return new UnsignedLongUpdater();
-        } else if (isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MICROS)) {
+        } else if (isTimestamp(sparkType) &&
+            isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MICROS)) {
           validateTimestampType(sparkType);
           if ("CORRECTED".equals(datetimeRebaseMode)) {
             return new LongUpdater();
@@ -117,7 +118,8 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa
             boolean failIfRebase = "EXCEPTION".equals(datetimeRebaseMode);
             return new LongWithRebaseUpdater(failIfRebase, datetimeRebaseTz);
           }
-        } else if (isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MILLIS)) {
+        } else if (isTimestamp(sparkType) &&
+            isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MILLIS)) {
           validateTimestampType(sparkType);
           if ("CORRECTED".equals(datetimeRebaseMode)) {
             return new LongAsMicrosUpdater();
@@ -1150,6 +1152,10 @@ private static boolean isLongDecimal(DataType dt) {
     return false;
   }
 
+  private static boolean isTimestamp(DataType dt) {
+    return dt == DataTypes.TimestampType || dt == DataTypes.TimestampNTZType;
+  }
+
   private static boolean isDecimalTypeMatched(ColumnDescriptor descriptor, DataType dt) {
     DecimalType d = (DecimalType) dt;
     LogicalTypeAnnotation typeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index facc9b90ff778..3f47c5e506ffd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -1087,6 +1087,27 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     }
   }
 
+  test("SPARK-45604: schema mismatch failure error on timestamp_ntz to array<timestamp_ntz>") {
+    import testImplicits._
+
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      val timestamp = java.time.LocalDateTime.of(1, 2, 3, 4, 5)
+      val df1 = Seq((1, timestamp)).toDF()
+      val df2 = Seq((2, Array(timestamp))).toDF()
+      df1.write.mode("overwrite").parquet(s"$path/parquet")
+      df2.write.mode("append").parquet(s"$path/parquet")
+
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+        val e = intercept[SparkException] {
+          spark.read.schema(df2.schema).parquet(s"$path/parquet").collect()
+        }
+        assert(e.getCause.isInstanceOf[SparkException])
+        assert(e.getCause.getCause.isInstanceOf[SchemaColumnConvertNotSupportedException])
+      }
+    }
+  }
+
   test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with nanosAsLong=true)") {
     val tsAttribute = "birthday"
     withSQLConf(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key -> "true") {

From 75a38b9024af3c9cfd85e916c46359f7e7315c87 Mon Sep 17 00:00:00 2001
From: Ankur Dave <ankurdave@gmail.com>
Date: Mon, 23 Oct 2023 10:47:42 +0800
Subject: [PATCH 075/521] [SPARK-45616][CORE] Avoid ParVector, which does not
 propagate ThreadLocals or SparkSession
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
`CastSuiteBase` and `ExpressionInfoSuite` use `ParVector.foreach()` to run Spark SQL queries in parallel. They incorrectly assume that each parallel operation will inherit the main thread’s active SparkSession. This is only true when these parallel operations run in freshly-created threads. However, when other code has already run some parallel operations before Spark was started, then there may be existing threads that do not have an active SparkSession. In that case, these tests fail with NullPointerExceptions when creating SparkPlans or running SQL queries.

The fix is to use the existing method `ThreadUtils.parmap()`. This method creates fresh threads that inherit the current active SparkSession, and it propagates the Spark ThreadLocals.

This PR also adds a scalastyle warning against use of ParVector.

### Why are the changes needed?
This change makes `CastSuiteBase` and `ExpressionInfoSuite` less brittle to future changes that may run parallel operations during test startup.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Reproduced the test failures by running a ParVector operation before Spark starts. Verified that this PR fixes the test failures in this condition.

```scala
  protected override def beforeAll(): Unit = {
    // Run a ParVector operation before initializing the SparkSession. This starts some Scala
    // execution context threads that have no active SparkSession. These threads will be reused for
    // later ParVector operations, reproducing SPARK-45616.
    new ParVector((0 until 100).toVector).foreach { _ => }

    super.beforeAll()
  }
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43466 from ankurdave/SPARK-45616.

Authored-by: Ankur Dave <ankurdave@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 376de8a502fca6b46d7f21560a60024d643144ea)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/rdd/UnionRDD.scala   |  2 ++
 .../scala/org/apache/spark/util/ThreadUtils.scala    |  4 ++++
 scalastyle-config.xml                                | 12 ++++++++++++
 .../sql/catalyst/expressions/CastSuiteBase.scala     |  9 ++++++---
 .../org/apache/spark/sql/execution/command/ddl.scala |  2 ++
 .../spark/sql/expressions/ExpressionInfoSuite.scala  | 11 ++++++-----
 .../org/apache/spark/streaming/DStreamGraph.scala    |  4 ++++
 .../streaming/util/FileBasedWriteAheadLog.scala      |  2 ++
 8 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 0a93023443704..3c1451a01850d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -76,8 +76,10 @@ class UnionRDD[T: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     val parRDDs = if (isPartitionListingParallel) {
+      // scalastyle:off parvector
       val parArray = new ParVector(rdds.toVector)
       parArray.tasksupport = UnionRDD.partitionEvalTaskSupport
+      // scalastyle:on parvector
       parArray
     } else {
       rdds
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index 16d7de56c39eb..2d3d6ec89ffbd 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -363,6 +363,10 @@ private[spark] object ThreadUtils {
    * Comparing to the map() method of Scala parallel collections, this method can be interrupted
    * at any time. This is useful on canceling of task execution, for example.
    *
+   * Functions are guaranteed to be executed in freshly-created threads that inherit the calling
+   * thread's Spark thread-local variables. These threads also inherit the calling thread's active
+   * SparkSession.
+   *
    * @param in - the input collection which should be transformed in parallel.
    * @param prefix - the prefix assigned to the underlying thread pool.
    * @param maxThreads - maximum number of thread can be created during execution.
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 74e8480deaff7..0ccd937e72e88 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -227,6 +227,18 @@ This file is divided into 3 sections:
     ]]></customMessage>
   </check>
 
+  <check customId="parvector" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new.*ParVector</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure you want to create a ParVector? It will not automatically propagate Spark ThreadLocals or the
+      active SparkSession for the submitted tasks. In most cases, you should use ThreadUtils.parmap instead.
+      If you must use ParVector, then wrap your creation of the ParVector with
+      // scalastyle:off parvector
+      ...ParVector...
+      // scalastyle:on parvector
+    ]]></customMessage>
+  </check>
+
   <check customId="caselocale" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
     <parameters><parameter name="regex">(\.toUpperCase|\.toLowerCase)(?!(\(|\(Locale.ROOT\)))</parameter></parameters>
     <customMessage><![CDATA[
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 0172fd9b3e4c7..1ce311a5544fa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -22,8 +22,6 @@ import java.time.{Duration, LocalDate, LocalDateTime, Period}
 import java.time.temporal.ChronoUnit
 import java.util.{Calendar, Locale, TimeZone}
 
-import scala.collection.parallel.immutable.ParVector
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
@@ -42,6 +40,7 @@ import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND
 import org.apache.spark.sql.types.UpCastRule.numericPrecedence
 import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
 import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.ThreadUtils
 
 /**
  * Common test suite for [[Cast]] with ansi mode on and off. It only includes test cases that work
@@ -126,7 +125,11 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("cast string to timestamp") {
-    new ParVector(ALL_TIMEZONES.toVector).foreach { zid =>
+    ThreadUtils.parmap(
+      ALL_TIMEZONES,
+      prefix = "CastSuiteBase-cast-string-to-timestamp",
+      maxThreads = Runtime.getRuntime.availableProcessors
+    ) { zid =>
       def checkCastStringToTimestamp(str: String, expected: Timestamp): Unit = {
         checkEvaluation(cast(Literal(str), TimestampType, Option(zid.getId)), expected)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index a8f7cdb260010..bb8fea71019fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -755,8 +755,10 @@ case class RepairTableCommand(
     val statusPar: Seq[FileStatus] =
       if (partitionNames.length > 1 && statuses.length > threshold || partitionNames.length > 2) {
         // parallelize the list of partitions here, then we can have better parallelism later.
+        // scalastyle:off parvector
         val parArray = new ParVector(statuses.toVector)
         parArray.tasksupport = evalTaskSupport
+        // scalastyle:on parvector
         parArray.seq
       } else {
         statuses
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 4dd93983e87e3..a02137a56aacc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.expressions
 
-import scala.collection.parallel.immutable.ParVector
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
@@ -26,7 +24,7 @@ import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.tags.SlowSQLTest
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 @SlowSQLTest
 class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
@@ -197,8 +195,11 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // The encrypt expression includes a random initialization vector to its encrypted result
       classOf[AesEncrypt].getName)
 
-    val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector)
-    parFuncs.foreach { funcId =>
+    ThreadUtils.parmap(
+      spark.sessionState.functionRegistry.listFunction(),
+      prefix = "ExpressionInfoSuite-check-outputs-of-expression-examples",
+      maxThreads = Runtime.getRuntime.availableProcessors
+    ) { funcId =>
       // Examples can change settings. We clone the session to prevent tests clashing.
       val clonedSpark = spark.cloneSession()
       // Coalescing partitions can change result order, so disable it.
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
index 43aaa7e1eeaec..a8f55c8b4d641 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala
@@ -52,7 +52,9 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
       outputStreams.foreach(_.validateAtStart())
       numReceivers = inputStreams.count(_.isInstanceOf[ReceiverInputDStream[_]])
       inputStreamNameAndID = inputStreams.map(is => (is.name, is.id)).toSeq
+      // scalastyle:off parvector
       new ParVector(inputStreams.toVector).foreach(_.start())
+      // scalastyle:on parvector
     }
   }
 
@@ -62,7 +64,9 @@ final private[streaming] class DStreamGraph extends Serializable with Logging {
 
   def stop(): Unit = {
     this.synchronized {
+      // scalastyle:off parvector
       new ParVector(inputStreams.toVector).foreach(_.stop())
+      // scalastyle:on parvector
     }
   }
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
index d1f9dfb791355..4e65bc75e4395 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala
@@ -314,8 +314,10 @@ private[streaming] object FileBasedWriteAheadLog {
     val groupSize = taskSupport.parallelismLevel.max(8)
 
     source.grouped(groupSize).flatMap { group =>
+      // scalastyle:off parvector
       val parallelCollection = new ParVector(group.toVector)
       parallelCollection.tasksupport = taskSupport
+      // scalastyle:on parvector
       parallelCollection.map(handler)
     }.flatten
   }

From 06f48855d7b9e0cd0b02b6f7884af39ce1a5f68c Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Mon, 23 Oct 2023 21:12:01 +0900
Subject: [PATCH 076/521] [SPARK-45631][SS][PYSPARK] Remove @abstractmethod
 from onQueryIdle in PySpark StreamingQueryListener

### What changes were proposed in this pull request?

Credit to anish-db for the initial investigation and the fix.

This PR proposes to remove `abstractmethod` annotation from `onQueryIdle` in PySpark StreamingQueryListener.

The function `onQueryIdle` was added with the annotation `abstractmethod`, which does not pick up default implementation and enforces users to implement the new method. This breaks all existing streaming query listener implementations and enforces them to add the dummy function implementation at least.

This PR re-allows existing implementations to work properly without explicitly adding a new function `onQueryIdle`.

### Why are the changes needed?

We broke backward compatibility in [SPARK-43183](https://issues.apache.org/jira/browse/SPARK-43183) and we want to fix it.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Modified tests. Now tests are verifying two different implementations covering old interface vs new interface.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43483 from HeartSaVioR/SPARK-45631.

Lead-authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Co-authored-by: Anish Shrigondekar <anish.shrigondekar@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 75bc5ac9f2b07bc894091b8b15682ee906a19356)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 python/pyspark/sql/streaming/listener.py      |   4 +-
 .../streaming/test_streaming_listener.py      | 117 ++++++++++++------
 2 files changed, 82 insertions(+), 39 deletions(-)

diff --git a/python/pyspark/sql/streaming/listener.py b/python/pyspark/sql/streaming/listener.py
index 16f40396490c7..3a0f30872dc8c 100644
--- a/python/pyspark/sql/streaming/listener.py
+++ b/python/pyspark/sql/streaming/listener.py
@@ -107,7 +107,9 @@ def onQueryProgress(self, event: "QueryProgressEvent") -> None:
         """
         pass
 
-    @abstractmethod
+    # NOTE: Do not mark this as abstract method, since we released this abstract class without
+    # this method in prior version and marking this as abstract method would break existing
+    # implementations.
     def onQueryIdle(self, event: "QueryIdleEvent") -> None:
         """
         Called when the query is idle and waiting for new data to process.
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_listener.py b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
index 87d0dae00d8bd..05c1ec71675c2 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_listener.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
@@ -251,7 +251,23 @@ def test_listener_events(self):
         progress_event = None
         terminated_event = None
 
-        class TestListener(StreamingQueryListener):
+        # V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
+        # `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
+        class TestListenerV1(StreamingQueryListener):
+            def onQueryStarted(self, event):
+                nonlocal start_event
+                start_event = event
+
+            def onQueryProgress(self, event):
+                nonlocal progress_event
+                progress_event = event
+
+            def onQueryTerminated(self, event):
+                nonlocal terminated_event
+                terminated_event = event
+
+        # V2: The interface after the method `onQueryIdle` is added. It is Spark 3.5+.
+        class TestListenerV2(StreamingQueryListener):
             def onQueryStarted(self, event):
                 nonlocal start_event
                 start_event = event
@@ -267,48 +283,71 @@ def onQueryTerminated(self, event):
                 nonlocal terminated_event
                 terminated_event = event
 
-        test_listener = TestListener()
+        def verify(test_listener):
+            nonlocal start_event
+            nonlocal progress_event
+            nonlocal terminated_event
 
-        try:
-            self.spark.streams.addListener(test_listener)
+            start_event = None
+            progress_event = None
+            terminated_event = None
 
-            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+            try:
+                self.spark.streams.addListener(test_listener)
 
-            # check successful stateful query
-            df_stateful = df.groupBy().count()  # make query stateful
-            q = (
-                df_stateful.writeStream.format("noop")
-                .queryName("test")
-                .outputMode("complete")
-                .start()
-            )
-            self.assertTrue(q.isActive)
-            time.sleep(10)
-            q.stop()
+                df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
 
-            # Make sure all events are empty
-            self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
+                # check successful stateful query
+                df_stateful = df.groupBy().count()  # make query stateful
+                q = (
+                    df_stateful.writeStream.format("noop")
+                    .queryName("test")
+                    .outputMode("complete")
+                    .start()
+                )
+                self.assertTrue(q.isActive)
+                time.sleep(10)
+                q.stop()
 
-            self.check_start_event(start_event)
-            self.check_progress_event(progress_event)
-            self.check_terminated_event(terminated_event)
+                # Make sure all events are empty
+                self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
 
-            # Check query terminated with exception
-            from pyspark.sql.functions import col, udf
+                self.check_start_event(start_event)
+                self.check_progress_event(progress_event)
+                self.check_terminated_event(terminated_event)
 
-            bad_udf = udf(lambda x: 1 / 0)
-            q = df.select(bad_udf(col("value"))).writeStream.format("noop").start()
-            time.sleep(5)
-            q.stop()
-            self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
-            self.check_terminated_event(terminated_event, "ZeroDivisionError")
+                # Check query terminated with exception
+                from pyspark.sql.functions import col, udf
 
-        finally:
-            self.spark.streams.removeListener(test_listener)
+                bad_udf = udf(lambda x: 1 / 0)
+                q = df.select(bad_udf(col("value"))).writeStream.format("noop").start()
+                time.sleep(5)
+                q.stop()
+                self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
+                self.check_terminated_event(terminated_event, "ZeroDivisionError")
+
+            finally:
+                self.spark.streams.removeListener(test_listener)
+
+        verify(TestListenerV1())
+        verify(TestListenerV2())
 
     def test_remove_listener(self):
         # SPARK-38804: Test StreamingQueryManager.removeListener
-        class TestListener(StreamingQueryListener):
+        # V1: Initial interface of StreamingQueryListener containing methods `onQueryStarted`,
+        # `onQueryProgress`, `onQueryTerminated`. It is prior to Spark 3.5.
+        class TestListenerV1(StreamingQueryListener):
+            def onQueryStarted(self, event):
+                pass
+
+            def onQueryProgress(self, event):
+                pass
+
+            def onQueryTerminated(self, event):
+                pass
+
+        # V2: The interface after the method `onQueryIdle` is added. It is Spark 3.5+.
+        class TestListenerV2(StreamingQueryListener):
             def onQueryStarted(self, event):
                 pass
 
@@ -321,13 +360,15 @@ def onQueryIdle(self, event):
             def onQueryTerminated(self, event):
                 pass
 
-        test_listener = TestListener()
+        def verify(test_listener):
+            num_listeners = len(self.spark.streams._jsqm.listListeners())
+            self.spark.streams.addListener(test_listener)
+            self.assertEqual(num_listeners + 1, len(self.spark.streams._jsqm.listListeners()))
+            self.spark.streams.removeListener(test_listener)
+            self.assertEqual(num_listeners, len(self.spark.streams._jsqm.listListeners()))
 
-        num_listeners = len(self.spark.streams._jsqm.listListeners())
-        self.spark.streams.addListener(test_listener)
-        self.assertEqual(num_listeners + 1, len(self.spark.streams._jsqm.listListeners()))
-        self.spark.streams.removeListener(test_listener)
-        self.assertEqual(num_listeners, len(self.spark.streams._jsqm.listListeners()))
+        verify(TestListenerV1())
+        verify(TestListenerV2())
 
     def test_query_started_event_fromJson(self):
         start_event = """

From da08382faf101aeada757e28ebca24ffe8f5bb8e Mon Sep 17 00:00:00 2001
From: Vitalii Li <vitalii.li@databricks.com>
Date: Tue, 24 Oct 2023 15:13:00 +0800
Subject: [PATCH 077/521] [SPARK-45430] Fix for FramelessOffsetWindowFunction
 when IGNORE NULLS and offset > rowCount

### What changes were proposed in this pull request?

This is a fix for the failure when function that utilized `FramelessOffsetWindowFunctionFrame` is used with `ignoreNulls = true` and `offset > rowCount`.

e.g.

```
select x, lead(x, 5) IGNORE NULLS over (order by x) from (select explode(sequence(1, 3)) x)
```

### Why are the changes needed?

Fix existing bug

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Modify existing unit test to cover this case

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43236 from vitaliili-db/SPARK-45430.

Authored-by: Vitalii Li <vitalii.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 32e1e58411913517c87d7e75942437f4e1c1d40e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../window/WindowFunctionFrame.scala          |  6 ++-
 .../sql/DataFrameWindowFunctionsSuite.scala   | 40 ++++++++++---------
 2 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
index 2b7f702a7f20a..a849c3894f0d6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowFunctionFrame.scala
@@ -201,7 +201,11 @@ class FrameLessOffsetWindowFunctionFrame(
   override def prepare(rows: ExternalAppendOnlyUnsafeRowArray): Unit = {
     resetStates(rows)
     if (ignoreNulls) {
-      findNextRowWithNonNullInput()
+      if (Math.abs(offset) > rows.length) {
+        fillDefaultValue(EmptyRow)
+      } else {
+        findNextRowWithNonNullInput()
+      }
     } else {
       // drain the first few rows if offset is larger than zero
       while (inputIndex < offset) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index 47380db421734..47a311c71d55d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -819,6 +819,8 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         lead($"value", 1, null, true).over(window),
         lead($"value", 2, null, true).over(window),
         lead($"value", 3, null, true).over(window),
+        // offset > rowCount: SPARK-45430
+        lead($"value", 100, null, true).over(window),
         lead(concat($"value", $"key"), 1, null, true).over(window),
         lag($"value", 1).over(window),
         lag($"value", 2).over(window),
@@ -826,27 +828,29 @@ class DataFrameWindowFunctionsSuite extends QueryTest
         lag($"value", 1, null, true).over(window),
         lag($"value", 2, null, true).over(window),
         lag($"value", 3, null, true).over(window),
+        // abs(offset) > rowCount: SPARK-45430
+        lag($"value", -100, null, true).over(window),
         lag(concat($"value", $"key"), 1, null, true).over(window))
         .orderBy($"order"),
       Seq(
-        Row("a", 0, null, "x", null, null, "x", "y", "z", "xa",
-          null, null, null, null, null, null, null),
-        Row("a", 1, "x", null, null, "x", "y", "z", "v", "ya",
-          null, null, "x", null, null, null, null),
-        Row("b", 2, null, null, "y", null, "y", "z", "v", "ya",
-          "x", null, null, "x", null, null, "xa"),
-        Row("c", 3, null, "y", null, null, "y", "z", "v", "ya",
-          null, "x", null, "x", null, null, "xa"),
-        Row("a", 4, "y", null, "z", "y", "z", "v", null, "za",
-          null, null, "y", "x", null, null, "xa"),
-        Row("b", 5, null, "z", "v", null, "z", "v", null, "za",
-          "y", null, null, "y", "x", null, "ya"),
-        Row("a", 6, "z", "v", null, "z", "v", null, null, "va",
-          null, "y", "z", "y", "x", null, "ya"),
-        Row("a", 7, "v", null, null, "v", null, null, null, null,
-          "z", null, "v", "z", "y", "x", "za"),
-        Row("a", 8, null, null, null, null, null, null, null, null,
-          "v", "z", null, "v", "z", "y", "va")))
+        Row("a", 0, null, "x", null, null, "x", "y", "z", null, "xa",
+          null, null, null, null, null, null, null, null),
+        Row("a", 1, "x", null, null, "x", "y", "z", "v", null, "ya",
+          null, null, "x", null, null, null, null, null),
+        Row("b", 2, null, null, "y", null, "y", "z", "v", null, "ya",
+          "x", null, null, "x", null, null, null, "xa"),
+        Row("c", 3, null, "y", null, null, "y", "z", "v", null, "ya",
+          null, "x", null, "x", null, null, null, "xa"),
+        Row("a", 4, "y", null, "z", "y", "z", "v", null, null, "za",
+          null, null, "y", "x", null, null, null, "xa"),
+        Row("b", 5, null, "z", "v", null, "z", "v", null, null, "za",
+          "y", null, null, "y", "x", null, null, "ya"),
+        Row("a", 6, "z", "v", null, "z", "v", null, null, null, "va",
+          null, "y", "z", "y", "x", null, null, "ya"),
+        Row("a", 7, "v", null, null, "v", null, null, null, null, null,
+          "z", null, "v", "z", "y", "x", null, "za"),
+        Row("a", 8, null, null, null, null, null, null, null, null, null,
+          "v", "z", null, "v", "z", "y", null, "va")))
   }
 
   test("lag - Offset expression <offset> must be a literal") {

From ddccf5add8f5aa35693c4120f0b161a74379aec9 Mon Sep 17 00:00:00 2001
From: Michael Zhang <m.zhang@databricks.com>
Date: Tue, 24 Oct 2023 14:51:45 +0500
Subject: [PATCH 078/521] [SPARK-45561][SQL] Add proper conversions for TINYINT
 in MySQLDialect

### What changes were proposed in this pull request?

Change MySql Dialect to convert catalyst TINYINT into MySQL TINYINT rather than BYTE and INTEGER. BYTE does not exist in MySQL. The same applies to MsSqlServerDialect.

### Why are the changes needed?

Since BYTE type does not exist in MySQL, any casts that could be pushed down involving BYTE type would fail.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

UT pass.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43390 from michaelzhan-db/SPARK-45561.

Lead-authored-by: Michael Zhang <m.zhang@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 5092c8970246eb828a31154796c3b16f0b61bddd)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 8 +++++---
 .../scala/org/apache/spark/sql/jdbc/MySQLDialect.scala    | 5 ++++-
 .../test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index dc3acb66ff1f4..20fdc965874ff 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -56,10 +56,10 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
 
     conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), "
       + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, "
-      + "dbl DOUBLE)").executeUpdate()
+      + "dbl DOUBLE, tiny TINYINT)").executeUpdate()
     conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', "
       + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
-      + "42.75, 1.0000000000000002)").executeUpdate()
+      + "42.75, 1.0000000000000002, -128)").executeUpdate()
 
     conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
       + "yr YEAR)").executeUpdate()
@@ -89,7 +89,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     val rows = df.collect()
     assert(rows.length == 1)
     val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 9)
+    assert(types.length == 10)
     assert(types(0).equals("class java.lang.Boolean"))
     assert(types(1).equals("class java.lang.Long"))
     assert(types(2).equals("class java.lang.Integer"))
@@ -99,6 +99,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(types(6).equals("class java.math.BigDecimal"))
     assert(types(7).equals("class java.lang.Double"))
     assert(types(8).equals("class java.lang.Double"))
+    assert(types(9).equals("class java.lang.Byte"))
     assert(rows(0).getBoolean(0) == false)
     assert(rows(0).getLong(1) == 0x225)
     assert(rows(0).getInt(2) == 17)
@@ -109,6 +110,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getAs[BigDecimal](6).equals(bd))
     assert(rows(0).getDouble(7) == 42.75)
     assert(rows(0).getDouble(8) == 1.0000000000000002)
+    assert(rows(0).getByte(9) == 0x80.toByte)
   }
 
   test("Date types") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index a08c89318b660..c7e14cc78d5bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference, NullOrdering, SortDirection}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
-import org.apache.spark.sql.types.{BooleanType, DataType, FloatType, LongType, MetadataBuilder, StringType}
+import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, FloatType, LongType, MetadataBuilder, StringType}
 
 private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
 
@@ -102,6 +102,8 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       // Some MySQL JDBC drivers converts JSON type into Types.VARCHAR with a precision of -1.
       // Explicitly converts it into StringType here.
       Some(StringType)
+    } else if (sqlType == Types.TINYINT && typeName.equals("TINYINT")) {
+      Some(ByteType)
     } else None
   }
 
@@ -184,6 +186,7 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
     // We override getJDBCType so that FloatType is mapped to FLOAT instead
     case FloatType => Option(JdbcType("FLOAT", java.sql.Types.FLOAT))
     case StringType => Option(JdbcType("LONGTEXT", java.sql.Types.LONGVARCHAR))
+    case ByteType => Option(JdbcType("TINYINT", java.sql.Types.TINYINT))
     case _ => JdbcUtils.getCommonJDBCType(dt)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index eae171e20b706..71c7245b06090 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -914,6 +914,8 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     assert(mySqlDialect.getCatalystType(java.sql.Types.VARBINARY, "BIT", 1, metadata) == None)
     assert(mySqlDialect.getCatalystType(java.sql.Types.BIT, "TINYINT", 1, metadata) ==
       Some(BooleanType))
+    assert(mySqlDialect.getCatalystType(java.sql.Types.TINYINT, "TINYINT", 1, metadata) ==
+      Some(ByteType))
   }
 
   test("SPARK-35446: MySQLDialect type mapping of float") {

From 26f6663d76849e1aa16833398082ba9b4a2e73af Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Wed, 25 Oct 2023 15:59:00 +0900
Subject: [PATCH 079/521] [SPARK-45588][SPARK-45640][SQL][TESTS][3.5] Fix flaky
 ProtobufCatalystDataConversionSuite

### What changes were proposed in this pull request?
The pr aims to fix flaky ProtobufCatalystDataConversionSuite, include:
- Fix the type check (when the random value was empty array, we didn't skip it. Original intention is to skip default values for types.) [SPARK-45588]
- When data.get(0) is null, data.get(0).asInstanceOf[Array[Byte]].isEmpty will be thrown java.lang.NullPointerException. [SPARK-45640]

Backport above to branch 3.5.
Master branch pr: https://github.com/apache/spark/pull/43424 & https://github.com/apache/spark/pull/43493

### Why are the changes needed?
Fix flaky ProtobufCatalystDataConversionSuite.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Pass GA
- Manually test

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43521 from panbingkun/branch-3.5_SPARK-45640.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/protobuf/ProtobufCatalystDataConversionSuite.scala     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
index d3e63a11a66bf..b7f17fece5fa6 100644
--- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
@@ -137,7 +137,8 @@ class ProtobufCatalystDataConversionSuite
       while (
         data != null &&
         (data.get(0) == defaultValue ||
-          (dt == BinaryType &&
+          (dt.fields(0).dataType == BinaryType &&
+            data.get(0) != null &&
             data.get(0).asInstanceOf[Array[Byte]].isEmpty)))
         data = generator().asInstanceOf[Row]
 

From 9e4411e2450d0503933626207b5e03308c30bc72 Mon Sep 17 00:00:00 2001
From: Paul Staab <paulstaab@users.noreply.github.com>
Date: Wed, 25 Oct 2023 07:36:15 -0500
Subject: [PATCH 080/521] [SPARK-40154][PYTHON][DOCS] Correct storage level in
 Dataframe.cache docstring

### What changes were proposed in this pull request?
Corrects the docstring `DataFrame.cache` to give the correct storage level after it changed with Spark 3.0. It seems that the docstring of `DataFrame.persist` was updated, but `cache` was forgotten.

### Why are the changes needed?
The doctoring claims that `cache` uses serialised storage, but it actually uses deserialised storage. I confirmed that this is still the case with Spark 3.5.0 using the example code from the Jira ticket.

### Does this PR introduce _any_ user-facing change?
Yes, the docstring changes.

### How was this patch tested?
The Github actions workflow succeeded.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43229 from paulstaab/SPARK-40154.

Authored-by: Paul Staab <paulstaab@users.noreply.github.com>
Signed-off-by: Sean Owen <srowen@gmail.com>
(cherry picked from commit 94607dd001b133a25dc9865f25b3f9e7f5a5daa3)
Signed-off-by: Sean Owen <srowen@gmail.com>
---
 python/pyspark/sql/dataframe.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 30ed73d3c47b0..5707ae2a31fec 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1485,7 +1485,7 @@ def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
         self.rdd.foreachPartition(f)  # type: ignore[arg-type]
 
     def cache(self) -> "DataFrame":
-        """Persists the :class:`DataFrame` with the default storage level (`MEMORY_AND_DISK`).
+        """Persists the :class:`DataFrame` with the default storage level (`MEMORY_AND_DISK_DESER`).
 
         .. versionadded:: 1.3.0
 
@@ -1494,7 +1494,7 @@ def cache(self) -> "DataFrame":
 
         Notes
         -----
-        The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0.
+        The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala in 3.0.
 
         Returns
         -------

From 1b77c6556060a921af2158896d2693c08e622624 Mon Sep 17 00:00:00 2001
From: Chao Sun <sunchao@apple.com>
Date: Thu, 26 Oct 2023 13:53:30 -0700
Subject: [PATCH 081/521] [SPARK-45652][SQL][3.5] SPJ: Handle empty input
 partitions after dynamic filtering

This is a cherry-pick of https://github.com/apache/spark/pull/43531 to branch-3.5, with a few modifications.

### What changes were proposed in this pull request?

Handle the case when input partitions become empty after V2 dynamic filtering, when SPJ is enabled.

### Why are the changes needed?

Current in the situation when all input partitions are filtered out via dynamic filtering, SPJ doesn't work but instead will panic:
```
java.util.NoSuchElementException: None.get
	at scala.None$.get(Option.scala:529)
	at scala.None$.get(Option.scala:527)
	at org.apache.spark.sql.execution.datasources.v2.BatchScanExec.filteredPartitions$lzycompute(BatchScanExec.scala:108)
	at org.apache.spark.sql.execution.datasources.v2.BatchScanExec.filteredPartitions(BatchScanExec.scala:65)
	at org.apache.spark.sql.execution.datasources.v2.BatchScanExec.inputRDD$lzycompute(BatchScanExec.scala:136)
	at org.apache.spark.sql.execution.datasources.v2.BatchScanExec.inputRDD(BatchScanExec.scala:135)
	at org.apache.spark.sql.boson.BosonBatchScanExec.inputRDD$lzycompute(BosonBatchScanExec.scala:28)
```

This is because the `groupPartitions` method will return `None` in this scenario. We should handle the case.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added a test case for this.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43540 from sunchao/SPARK-45652-branch-3.5.

Authored-by: Chao Sun <sunchao@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/v2/BatchScanExec.scala        |  4 +-
 .../KeyGroupedPartitioningSuite.scala         | 42 +++++++++++++++++++
 2 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index eba3c71f871e3..2a3a5cdeb82b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -101,7 +101,7 @@ case class BatchScanExec(
                 "partition values that are not present in the original partitioning.")
           }
 
-          groupPartitions(newPartitions).get.map(_._2)
+          groupPartitions(newPartitions).getOrElse(Seq.empty).map(_._2)
 
         case _ =>
           // no validation is needed as the data source did not report any specific partitioning
@@ -145,7 +145,7 @@ case class BatchScanExec(
                   "is enabled")
 
             val groupedPartitions = groupPartitions(finalPartitions.map(_.head),
-              groupSplits = true).get
+              groupSplits = true).getOrElse(Seq.empty)
 
             // This means the input partitions are not grouped by partition values. We'll need to
             // check `groupByPartitionValues` and decide whether to group and replicate splits
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 8461f528277c3..6b07c77aefb60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -1095,4 +1095,46 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       }
     }
   }
+
+  test("SPARK-45652: SPJ should handle empty partition after dynamic filtering") {
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+      SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+      SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+      SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "10") {
+      val items_partitions = Array(identity("id"))
+      createTable(items, items_schema, items_partitions)
+      sql(s"INSERT INTO testcat.ns.$items VALUES " +
+          s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+          s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " +
+          s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+          s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp)), " +
+          s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+      val purchases_partitions = Array(identity("item_id"))
+      createTable(purchases, purchases_schema, purchases_partitions)
+      sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+          s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
+          s"(1, 44.0, cast('2020-01-15' as timestamp)), " +
+          s"(1, 45.0, cast('2020-01-15' as timestamp)), " +
+          s"(2, 11.0, cast('2020-01-01' as timestamp)), " +
+          s"(3, 19.5, cast('2020-02-01' as timestamp))")
+
+      Seq(true, false).foreach { pushDownValues =>
+        Seq(true, false).foreach { partiallyClustered => {
+          withSQLConf(
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+                partiallyClustered.toString,
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+            // The dynamic filtering effectively filtered out all the partitions
+            val df = sql(s"SELECT p.price from testcat.ns.$items i, testcat.ns.$purchases p " +
+                "WHERE i.id = p.item_id AND i.price > 50.0")
+            checkAnswer(df, Seq.empty)
+          }
+        }
+        }
+      }
+    }
+  }
 }

From e2b92b8812ce6ce590eb5ef8f2661cd73547c6a9 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Fri, 27 Oct 2023 15:25:59 +0900
Subject: [PATCH 082/521] [SPARK-45670][CORE][3.5] SparkSubmit does not support
 `--total-executor-cores` when deploying on K8s

This is the cherry-pick of https://github.com/apache/spark/pull/43536 for branch-3.5

### What changes were proposed in this pull request?

Remove Kubernetes from the support list of `--total-executor-cores` in SparkSubmit

### Why are the changes needed?

`--total-executor-cores` does not take effect in Spark on K8s, [the
comments from original PR](https://github.com/apache/spark/pull/19717#discussion_r154568773) also proves that

### Does this PR introduce _any_ user-facing change?

The output of `spark-submit --help` changed

```patch
...
-  Spark standalone, Mesos and Kubernetes only:
+  Spark standalone and Mesos only:
    --total-executor-cores NUM  Total cores for all executors.
...
```
### How was this patch tested?

Pass GA and review.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43550 from pan3793/SPARK-45670-3.5.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala   | 2 +-
 .../scala/org/apache/spark/deploy/SparkSubmitArguments.scala    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 60253ed5fda1f..af35f451e3704 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -683,7 +683,7 @@ private[spark] class SparkSubmit extends Logging {
         confKey = EXECUTOR_CORES.key),
       OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = EXECUTOR_MEMORY.key),
-      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
+      OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES,
         confKey = CORES_MAX.key),
       OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES,
         confKey = FILES.key),
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index a3fe5153bee9f..93dd25db0937b 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -567,7 +567,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |  --kill SUBMISSION_ID        If given, kills the driver specified.
         |  --status SUBMISSION_ID      If given, requests the status of the driver specified.
         |
-        | Spark standalone, Mesos and Kubernetes only:
+        | Spark standalone and Mesos only:
         |  --total-executor-cores NUM  Total cores for all executors.
         |
         | Spark standalone, YARN and Kubernetes only:

From d393b50e6d5e64976747c9e84e3787366dbe4280 Mon Sep 17 00:00:00 2001
From: zeruibao <zerui.bao@databricks.com>
Date: Fri, 27 Oct 2023 14:45:33 +0800
Subject: [PATCH 083/521] [SPARK-43380][SQL] Fix slowdown in Avro read

### What changes were proposed in this pull request?
Fix slowdown in Avro read. There is a https://github.com/apache/spark/pull/42503 causes the performance regression. It seems that creating an `AvroOptions` inside `toSqlType` is very expensive. Try to pass this in the callstack.

After regression
![image-20231024-193909](https://github.com/apache/spark/assets/125398515/c6af9376-e058-4da9-8f63-d9e8663b36ef)

Before regression
![image-20231024-193650](https://github.com/apache/spark/assets/125398515/fd609c05-accb-4ce8-8020-2866328a52f7)

### Why are the changes needed?
Need to fix the performance regression of Avro read.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43530 from zeruibao/SPARK-4380-real-fix-regression-2.

Lead-authored-by: zeruibao <zerui.bao@databricks.com>
Co-authored-by: Zerui Bao <125398515+zeruibao@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7d94c5769a8b95a2811e73527fa6ea60f9087901)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/avro/AvroDataToCatalyst.scala   |  3 +-
 .../spark/sql/avro/AvroDeserializer.scala     | 11 ++++---
 .../spark/sql/avro/AvroFileFormat.scala       |  3 +-
 .../spark/sql/avro/SchemaConverters.scala     | 32 ++++++++++++-------
 .../v2/avro/AvroPartitionReaderFactory.scala  |  3 +-
 .../AvroCatalystDataConversionSuite.scala     |  7 ++--
 .../spark/sql/avro/AvroRowReaderSuite.scala   |  3 +-
 .../spark/sql/avro/AvroSerdeSuite.scala       |  3 +-
 .../org/apache/spark/sql/avro/AvroSuite.scala |  2 +-
 9 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
index 59f2999bdd395..2c2a45fc3f14f 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
@@ -61,7 +61,8 @@ private[sql] case class AvroDataToCatalyst(
   @transient private lazy val reader = new GenericDatumReader[Any](actualSchema, expectedSchema)
 
   @transient private lazy val deserializer =
-    new AvroDeserializer(expectedSchema, dataType, avroOptions.datetimeRebaseModeInRead)
+    new AvroDeserializer(expectedSchema, dataType,
+      avroOptions.datetimeRebaseModeInRead, avroOptions.useStableIdForUnionType)
 
   @transient private var decoder: BinaryDecoder = _
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index e82116eec1e9c..fe0bd7392b636 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -49,18 +49,21 @@ private[sql] class AvroDeserializer(
     rootCatalystType: DataType,
     positionalFieldMatch: Boolean,
     datetimeRebaseSpec: RebaseSpec,
-    filters: StructFilters) {
+    filters: StructFilters,
+    useStableIdForUnionType: Boolean) {
 
   def this(
       rootAvroType: Schema,
       rootCatalystType: DataType,
-      datetimeRebaseMode: String) = {
+      datetimeRebaseMode: String,
+      useStableIdForUnionType: Boolean) = {
     this(
       rootAvroType,
       rootCatalystType,
       positionalFieldMatch = false,
       RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)),
-      new NoopFilters)
+      new NoopFilters,
+      useStableIdForUnionType)
   }
 
   private lazy val decimalConversions = new DecimalConversion()
@@ -118,7 +121,7 @@ private[sql] class AvroDeserializer(
     val incompatibleMsg = errorPrefix +
         s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
 
-    val realDataType = SchemaConverters.toSqlType(avroType).dataType
+    val realDataType = SchemaConverters.toSqlType(avroType, useStableIdForUnionType).dataType
     val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA
     val preventReadingIncorrectType = !SQLConf.get.getConf(confKey)
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index 53562a3afdb5b..7b0292df43c2f 100755
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -141,7 +141,8 @@ private[sql] class AvroFileFormat extends FileFormat
             requiredSchema,
             parsedOptions.positionalFieldMatching,
             datetimeRebaseMode,
-            avroFilters)
+            avroFilters,
+            parsedOptions.useStableIdForUnionType)
           override val stopPosition = file.start + file.length
 
           override def hasNext: Boolean = hasNextRow
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 6f21639e28d68..06abe977e3b08 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -46,16 +46,24 @@ object SchemaConverters {
    */
   case class SchemaType(dataType: DataType, nullable: Boolean)
 
+  /**
+   * Converts an Avro schema to a corresponding Spark SQL schema.
+   *
+   * @since 4.0.0
+   */
+  def toSqlType(avroSchema: Schema, useStableIdForUnionType: Boolean): SchemaType = {
+    toSqlTypeHelper(avroSchema, Set.empty, useStableIdForUnionType)
+  }
   /**
    * Converts an Avro schema to a corresponding Spark SQL schema.
    *
    * @since 2.4.0
    */
   def toSqlType(avroSchema: Schema): SchemaType = {
-    toSqlTypeHelper(avroSchema, Set.empty, AvroOptions(Map()))
+    toSqlType(avroSchema, false)
   }
   def toSqlType(avroSchema: Schema, options: Map[String, String]): SchemaType = {
-    toSqlTypeHelper(avroSchema, Set.empty, AvroOptions(options))
+    toSqlTypeHelper(avroSchema, Set.empty, AvroOptions(options).useStableIdForUnionType)
   }
 
   // The property specifies Catalyst type of the given field
@@ -64,7 +72,7 @@ object SchemaConverters {
   private def toSqlTypeHelper(
       avroSchema: Schema,
       existingRecordNames: Set[String],
-      avroOptions: AvroOptions): SchemaType = {
+      useStableIdForUnionType: Boolean): SchemaType = {
     avroSchema.getType match {
       case INT => avroSchema.getLogicalType match {
         case _: Date => SchemaType(DateType, nullable = false)
@@ -117,7 +125,7 @@ object SchemaConverters {
         }
         val newRecordNames = existingRecordNames + avroSchema.getFullName
         val fields = avroSchema.getFields.asScala.map { f =>
-          val schemaType = toSqlTypeHelper(f.schema(), newRecordNames, avroOptions)
+          val schemaType = toSqlTypeHelper(f.schema(), newRecordNames, useStableIdForUnionType)
           StructField(f.name, schemaType.dataType, schemaType.nullable)
         }
 
@@ -127,13 +135,14 @@ object SchemaConverters {
         val schemaType = toSqlTypeHelper(
           avroSchema.getElementType,
           existingRecordNames,
-          avroOptions)
+          useStableIdForUnionType)
         SchemaType(
           ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
           nullable = false)
 
       case MAP =>
-        val schemaType = toSqlTypeHelper(avroSchema.getValueType, existingRecordNames, avroOptions)
+        val schemaType = toSqlTypeHelper(avroSchema.getValueType,
+          existingRecordNames, useStableIdForUnionType)
         SchemaType(
           MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
           nullable = false)
@@ -143,17 +152,18 @@ object SchemaConverters {
           // In case of a union with null, eliminate it and make a recursive call
           val remainingUnionTypes = AvroUtils.nonNullUnionBranches(avroSchema)
           if (remainingUnionTypes.size == 1) {
-            toSqlTypeHelper(remainingUnionTypes.head, existingRecordNames, avroOptions)
+            toSqlTypeHelper(remainingUnionTypes.head, existingRecordNames, useStableIdForUnionType)
               .copy(nullable = true)
           } else {
             toSqlTypeHelper(
               Schema.createUnion(remainingUnionTypes.asJava),
               existingRecordNames,
-              avroOptions).copy(nullable = true)
+              useStableIdForUnionType).copy(nullable = true)
           }
         } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
           case Seq(t1) =>
-            toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames, avroOptions)
+            toSqlTypeHelper(avroSchema.getTypes.get(0),
+              existingRecordNames, useStableIdForUnionType)
           case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
             SchemaType(LongType, nullable = false)
           case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
@@ -167,9 +177,9 @@ object SchemaConverters {
             val fieldNameSet : mutable.Set[String] = mutable.Set()
             val fields = avroSchema.getTypes.asScala.zipWithIndex.map {
               case (s, i) =>
-                val schemaType = toSqlTypeHelper(s, existingRecordNames, avroOptions)
+                val schemaType = toSqlTypeHelper(s, existingRecordNames, useStableIdForUnionType)
 
-                val fieldName = if (avroOptions.useStableIdForUnionType) {
+                val fieldName = if (useStableIdForUnionType) {
                   // Avro's field name may be case sensitive, so field names for two named type
                   // could be "a" and "A" and we need to distinguish them. In this case, we throw
                   // an exception.
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
index cc7bd180e8477..2c85c1b067392 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
@@ -103,7 +103,8 @@ case class AvroPartitionReaderFactory(
           readDataSchema,
           options.positionalFieldMatching,
           datetimeRebaseMode,
-          avroFilters)
+          avroFilters,
+          options.useStableIdForUnionType)
         override val stopPosition = partitionedFile.start + partitionedFile.length
 
         override def next(): Boolean = hasNextRow
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
index 1cb34a0bc4dc5..250b5e0615ad8 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
@@ -59,7 +59,7 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite
 
     val expected = {
       val avroSchema = new Schema.Parser().parse(schema)
-      SchemaConverters.toSqlType(avroSchema).dataType match {
+      SchemaConverters.toSqlType(avroSchema, false).dataType match {
         case st: StructType => Row.fromSeq((0 until st.length).map(_ => null))
         case _ => null
       }
@@ -281,13 +281,14 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite
       data: GenericData.Record,
       expected: Option[Any],
       filters: StructFilters = new NoopFilters): Unit = {
-    val dataType = SchemaConverters.toSqlType(schema).dataType
+    val dataType = SchemaConverters.toSqlType(schema, false).dataType
     val deserializer = new AvroDeserializer(
       schema,
       dataType,
       false,
       RebaseSpec(LegacyBehaviorPolicy.CORRECTED),
-      filters)
+      filters,
+      false)
     val deserialized = deserializer.deserialize(data)
     expected match {
       case None => assert(deserialized == None)
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
index 70d0bc6c0ad10..965e3a0c1cba6 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
@@ -75,7 +75,8 @@ class AvroRowReaderSuite
           StructType(new StructField("value", IntegerType, true) :: Nil),
           false,
           RebaseSpec(CORRECTED),
-          new NoopFilters)
+          new NoopFilters,
+          false)
         override val stopPosition = fileSize
 
         override def hasNext: Boolean = hasNextRow
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
index 7f99f3c737c86..a21f3f008fdc7 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
@@ -226,7 +226,8 @@ object AvroSerdeSuite {
         sql,
         isPositional(matchType),
         RebaseSpec(CORRECTED),
-        new NoopFilters)
+        new NoopFilters,
+        false)
   }
 
   /**
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index ffb0a49641b59..1df99210a55ac 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -2137,7 +2137,7 @@ abstract class AvroSuite
 
   private def checkSchemaWithRecursiveLoop(avroSchema: String): Unit = {
     val message = intercept[IncompatibleSchemaException] {
-      SchemaConverters.toSqlType(new Schema.Parser().parse(avroSchema))
+      SchemaConverters.toSqlType(new Schema.Parser().parse(avroSchema), false)
     }.getMessage
 
     assert(message.contains("Found recursive reference in Avro schema"))

From de66d8f2ec78525ea73eba8a0d8fa6e4d4839ead Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 27 Oct 2023 21:20:40 +0900
Subject: [PATCH 084/521] [SPARK-45706][PYTHON][DOCS] Makes entire Binder build
 fails fast during setting up

### What changes were proposed in this pull request?

This PR proposes to make entire Binder build fails fast during setting up to prevent the Binder image to be successfully built which it cannot be rebuilt later on the same commit.

### Why are the changes needed?

Binder build is currently broken for Spark 3.5.0:

https://mybinder.org/v2/gh/apache/spark/ce5ddad9903?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb

Seems like we uploaded PySpark late into PyPI, and the installation steps just slightly ignored the failure (a user triggered the first docker image for Binder, and that's being reused at that time PySpark wasn't uploaded to PyPI).

![Screenshot 2023-10-27 at 5 42 26 PM](https://github.com/apache/spark/assets/6477701/9030e4a1-2afa-43a2-aee0-dda01abb46ce)

### Does this PR introduce _any_ user-facing change?

Yes, it fixes the user-facing live notebooks (at https://spark.apache.org/docs/latest/api/python/index.html).

### How was this patch tested?

Manually tested in my fork:
- https://mybinder.org/v2/gh/HyukjinKwon/spark/ce5ddad9903?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb
- https://mybinder.org/v2/gh/HyukjinKwon/spark/ce5ddad9903?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_connect.ipynb
- https://mybinder.org/v2/gh/HyukjinKwon/spark/ce5ddad9903?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_ps.ipynb
### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43553 from HyukjinKwon/SPARK-45706.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit f1d1dc1f87a7e4accd1c3c2d824f39df05465906)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/postBuild | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/binder/postBuild b/binder/postBuild
index 70ae23b393707..fec233f8c8ce9 100644
--- a/binder/postBuild
+++ b/binder/postBuild
@@ -20,6 +20,11 @@
 # This file is used for Binder integration to install PySpark available in
 # Jupyter notebook.
 
+# SPARK-45706: Should fail fast. Otherwise, the Binder image is successfully
+# built, and it cannot be rebuilt.
+set -o pipefail
+set -e
+
 VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)")
 TAG=$(git describe --tags --exact-match 2>/dev/null)
 

From 238630e3aca7c146201f9032a8294a06e7ee3dbd Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 27 Oct 2023 19:21:04 -0700
Subject: [PATCH 085/521] [SPARK-45678][CORE] Cover
 BufferReleasingInputStream.available/reset under tryOrFetchFailedException

### What changes were proposed in this pull request?

This patch proposes to wrap `BufferReleasingInputStream.available/reset` under `tryOrFetchFailedException`. So `IOException` during `available`/`reset` call will be rethrown as `FetchFailedException`.

### Why are the changes needed?

We have encountered shuffle data corruption issue:

```
Caused by: java.io.IOException: FAILED_TO_UNCOMPRESS(5)
at org.xerial.snappy.SnappyNative.throw_error(SnappyNative.java:112)
at org.xerial.snappy.SnappyNative.rawUncompress(Native Method)
at org.xerial.snappy.Snappy.rawUncompress(Snappy.java:504)
at org.xerial.snappy.Snappy.uncompress(Snappy.java:543)
at org.xerial.snappy.SnappyInputStream.hasNextChunk(SnappyInputStream.java:450)
at org.xerial.snappy.SnappyInputStream.available(SnappyInputStream.java:497)
at org.apache.spark.storage.BufferReleasingInputStream.available(ShuffleBlockFetcherIterator.scala:1356)
```

Spark shuffle has capacity to detect corruption for a few stream op like `read` and `skip`, such `IOException` in the stack trace will be rethrown as `FetchFailedException` that will re-try the failed shuffle task. But in the stack trace it is `available` that is not covered by the mechanism. So no-retry has been happened and the Spark application just failed.

As the `available`/`reset` op will also involve data decompression and throw `IOException`, we should be able to check it like `read` and `skip` do.

### Does this PR introduce _any_ user-facing change?

Yes. Data corruption during `available`/`reset` op is now causing `FetchFailedException` like `read` and `skip` that can be retried instead of `IOException`.

### How was this patch tested?

Added test.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43543 from viirya/add_available.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Chao Sun <sunchao@apple.com>
---
 .../storage/ShuffleBlockFetcherIterator.scala |  8 ++-
 .../ShuffleBlockFetcherIteratorSuite.scala    | 64 ++++++++++++++++++-
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index b21a2aa1c1791..b9365f45a11ae 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -1354,7 +1354,8 @@ private class BufferReleasingInputStream(
     }
   }
 
-  override def available(): Int = delegate.available()
+  override def available(): Int =
+    tryOrFetchFailedException(delegate.available())
 
   override def mark(readlimit: Int): Unit = delegate.mark(readlimit)
 
@@ -1369,12 +1370,13 @@ private class BufferReleasingInputStream(
   override def read(b: Array[Byte], off: Int, len: Int): Int =
     tryOrFetchFailedException(delegate.read(b, off, len))
 
-  override def reset(): Unit = delegate.reset()
+  override def reset(): Unit = tryOrFetchFailedException(delegate.reset())
 
   /**
    * Execute a block of code that returns a value, close this stream quietly and re-throwing
    * IOException as FetchFailedException when detectCorruption is true. This method is only
-   * used by the `read` and `skip` methods inside `BufferReleasingInputStream` currently.
+   * used by the `available`, `read` and `skip` methods inside `BufferReleasingInputStream`
+   * currently.
    */
   private def tryOrFetchFailedException[T](block: => T): T = {
     try {
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index af37a72c9e3f8..f2d5f27a66cce 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -182,6 +182,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       blocksByAddress: Map[BlockManagerId, Seq[(BlockId, Long, Int)]],
       taskContext: Option[TaskContext] = None,
       streamWrapperLimitSize: Option[Long] = None,
+      corruptAtAvailableReset: Boolean = false,
       blockManager: Option[BlockManager] = None,
       maxBytesInFlight: Long = Long.MaxValue,
       maxReqsInFlight: Int = Int.MaxValue,
@@ -201,7 +202,14 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       blockManager.getOrElse(createMockBlockManager()),
       mapOutputTracker,
       blocksByAddress.iterator,
-      (_, in) => streamWrapperLimitSize.map(new LimitedInputStream(in, _)).getOrElse(in),
+      (_, in) => {
+        val limited = streamWrapperLimitSize.map(new LimitedInputStream(in, _)).getOrElse(in)
+        if (corruptAtAvailableReset) {
+          new CorruptAvailableResetStream(limited)
+        } else {
+          limited
+        }
+      },
       maxBytesInFlight,
       maxReqsInFlight,
       maxBlocksInFlightPerAddress,
@@ -712,6 +720,16 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     corruptBuffer
   }
 
+  private class CorruptAvailableResetStream(in: InputStream) extends InputStream {
+    override def read(): Int = in.read()
+
+    override def read(dest: Array[Byte], off: Int, len: Int): Int = in.read(dest, off, len)
+
+    override def available(): Int = throw new IOException("corrupt at available")
+
+    override def reset(): Unit = throw new IOException("corrupt at reset")
+  }
+
   private class CorruptStream(corruptAt: Long = 0L) extends InputStream {
     var pos = 0
     var closed = false
@@ -1879,4 +1897,48 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       blockManager = Some(blockManager), streamWrapperLimitSize = Some(100))
     verifyLocalBlocksFromFallback(iterator)
   }
+
+  test("SPARK-45678: retry corrupt blocks on available() and reset()") {
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockId(0, 0, 0) -> createMockManagedBuffer()
+    )
+
+    // Semaphore to coordinate event sequence in two different threads.
+    val sem = new Semaphore(0)
+
+    answerFetchBlocks { invocation =>
+      val listener = invocation.getArgument[BlockFetchingListener](4)
+      Future {
+        listener.onBlockFetchSuccess(
+          ShuffleBlockId(0, 0, 0).toString, createMockManagedBuffer())
+        sem.release()
+      }
+    }
+
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      Map(remoteBmId -> toBlockList(blocks.keys, 1L, 0)),
+      streamWrapperLimitSize = Some(100),
+      detectCorruptUseExtraMemory = false, // Don't use `ChunkedByteBufferInputStream`.
+      corruptAtAvailableReset = true,
+      checksumEnabled = false
+    )
+
+    sem.acquire()
+
+    val (id1, stream) = iterator.next()
+    assert(id1 === ShuffleBlockId(0, 0, 0))
+
+    val err1 = intercept[FetchFailedException] {
+      stream.available()
+    }
+
+    assert(err1.getMessage.contains("corrupt at available"))
+
+    val err2 = intercept[FetchFailedException] {
+      stream.reset()
+    }
+
+    assert(err2.getMessage.contains("corrupt at reset"))
+  }
 }

From 212f00462f2419039742a305cdc941a886e3a15a Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Date: Tue, 31 Oct 2023 11:19:32 +0800
Subject: [PATCH 086/521] [SPARK-45592][SQL] Correctness issue in AQE with
 InMemoryTableScanExec

Fixes correctness issue in 3.5.0. The problem seems to be that when AQEShuffleRead does a coalesced read it can return a HashPartitioning with the coalesced number of partitions. This causes a correctness bug as the partitioning is not compatible for joins with other HashPartitioning even though the number of partitions matches. This is resolved in this patch by introducing CoalescedHashPartitioning and making AQEShuffleRead return that instead.

The fix was suggested by cloud-fan

> AQEShuffleRead should probably return a different partitioning, e.g. CoalescedHashPartitioning. It still satisfies ClusterDistribution, so Aggregate is fine and there will be no shuffle. For joins, two CoalescedHashPartitionings are compatible if they have the same original partition number and coalesce boundaries, and CoalescedHashPartitioning is not compatible with HashPartitioning.

Correctness bug.

Yes, fixed correctness issue.

New and existing unit test.

No

Closes #43435 from eejbyfeldt/SPARK-45592.

Authored-by: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 2be03d81cea34ab08c44426837260c22c67e092e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/physical/partitioning.scala         |  49 +++
 .../sql/catalyst/DistributionSuite.scala      | 124 +++---
 .../spark/sql/catalyst/ShuffleSpecSuite.scala | 401 ++++++++++--------
 .../adaptive/AQEShuffleReadExec.scala         |  11 +-
 .../org/apache/spark/sql/DatasetSuite.scala   |  14 +
 .../WriteDistributionAndOrderingSuite.scala   |  53 ++-
 6 files changed, 386 insertions(+), 266 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index d2f9e9b5d5bf5..1eefe65859bdd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -306,6 +306,35 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
 
   override protected def withNewChildrenInternal(
     newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions = newChildren)
+
+}
+
+case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int)
+
+/**
+ * Represents a partitioning where partitions have been coalesced from a HashPartitioning into a
+ * fewer number of partitions.
+ */
+case class CoalescedHashPartitioning(from: HashPartitioning, partitions: Seq[CoalescedBoundary])
+  extends Expression with Partitioning with Unevaluable {
+
+  override def children: Seq[Expression] = from.expressions
+  override def nullable: Boolean = from.nullable
+  override def dataType: DataType = from.dataType
+
+  override def satisfies0(required: Distribution): Boolean = from.satisfies0(required)
+
+  override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec =
+    CoalescedHashShuffleSpec(from.createShuffleSpec(distribution), partitions)
+
+  override protected def withNewChildrenInternal(
+    newChildren: IndexedSeq[Expression]): CoalescedHashPartitioning =
+      copy(from = from.copy(expressions = newChildren))
+
+  override val numPartitions: Int = partitions.length
+
+  override def toString: String = from.toString
+  override def sql: String = from.sql
 }
 
 /**
@@ -661,6 +690,26 @@ case class HashShuffleSpec(
   override def numPartitions: Int = partitioning.numPartitions
 }
 
+case class CoalescedHashShuffleSpec(
+    from: ShuffleSpec,
+    partitions: Seq[CoalescedBoundary]) extends ShuffleSpec {
+
+  override def isCompatibleWith(other: ShuffleSpec): Boolean = other match {
+    case SinglePartitionShuffleSpec =>
+      numPartitions == 1
+    case CoalescedHashShuffleSpec(otherParent, otherPartitions) =>
+      partitions == otherPartitions && from.isCompatibleWith(otherParent)
+    case ShuffleSpecCollection(specs) =>
+      specs.exists(isCompatibleWith)
+    case _ =>
+      false
+  }
+
+  override def canCreatePartitioning: Boolean = false
+
+  override def numPartitions: Int = partitions.length
+}
+
 case class KeyGroupedShuffleSpec(
     partitioning: KeyGroupedPartitioning,
     distribution: ClusteredDistribution) extends ShuffleSpec {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
index a924a9ed02e5d..7cb4d5f123253 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/DistributionSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst
 import org.apache.spark.SparkFunSuite
 /* Implicit conversions */
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{Literal, Murmur3Hash, Pmod}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Murmur3Hash, Pmod}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.types.IntegerType
 
@@ -146,63 +146,75 @@ class DistributionSuite extends SparkFunSuite {
       false)
   }
 
-  test("HashPartitioning is the output partitioning") {
-    // HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
-    // the required clustering expressions.
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c")),
-      true)
-
-    checkSatisfied(
-      HashPartitioning(Seq($"b", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c")),
-      true)
-
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      ClusteredDistribution(Seq($"b", $"c")),
-      false)
-
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      ClusteredDistribution(Seq($"d", $"e")),
-      false)
-
-    // When ClusteredDistribution.requireAllClusterKeys is set to true,
-    // HashPartitioning can only satisfy ClusteredDistribution iff its hash expressions are
-    // exactly same as the required clustering expressions.
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
-      true)
-
-    checkSatisfied(
-      HashPartitioning(Seq($"b", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
-      false)
-
-    checkSatisfied(
-      HashPartitioning(Seq($"b", $"a", $"c"), 10),
-      ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
-      false)
-
-    // HashPartitioning cannot satisfy OrderedDistribution
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 10),
-      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
-      false)
+  private def testHashPartitioningLike(
+      partitioningName: String,
+      create: (Seq[Expression], Int) => Partitioning): Unit = {
+
+    test(s"$partitioningName is the output partitioning") {
+      // HashPartitioning can satisfy ClusteredDistribution iff its hash expressions are a subset of
+      // the required clustering expressions.
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 10),
+        ClusteredDistribution(Seq($"a", $"b", $"c")),
+        true)
+
+      checkSatisfied(
+        create(Seq($"b", $"c"), 10),
+        ClusteredDistribution(Seq($"a", $"b", $"c")),
+        true)
+
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 10),
+        ClusteredDistribution(Seq($"b", $"c")),
+        false)
+
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 10),
+        ClusteredDistribution(Seq($"d", $"e")),
+        false)
+
+      // When ClusteredDistribution.requireAllClusterKeys is set to true,
+      // HashPartitioning can only satisfy ClusteredDistribution iff its hash expressions are
+      // exactly same as the required clustering expressions.
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 10),
+        ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+        true)
+
+      checkSatisfied(
+        create(Seq($"b", $"c"), 10),
+        ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+        false)
+
+      checkSatisfied(
+        create(Seq($"b", $"a", $"c"), 10),
+        ClusteredDistribution(Seq($"a", $"b", $"c"), requireAllClusterKeys = true),
+        false)
+
+      // HashPartitioning cannot satisfy OrderedDistribution
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 10),
+        OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
+        false)
+
+      checkSatisfied(
+        create(Seq($"a", $"b", $"c"), 1),
+        OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
+        false) // TODO: this can be relaxed.
+
+      checkSatisfied(
+        create(Seq($"b", $"c"), 10),
+        OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
+        false)
+    }
+  }
 
-    checkSatisfied(
-      HashPartitioning(Seq($"a", $"b", $"c"), 1),
-      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
-      false) // TODO: this can be relaxed.
+  testHashPartitioningLike("HashPartitioning",
+    (expressions, numPartitions) => HashPartitioning(expressions, numPartitions))
 
-    checkSatisfied(
-      HashPartitioning(Seq($"b", $"c"), 10),
-      OrderedDistribution(Seq($"a".asc, $"b".asc, $"c".asc)),
-      false)
-  }
+  testHashPartitioningLike("CoalescedHashPartitioning", (expressions, numPartitions) =>
+      CoalescedHashPartitioning(
+        HashPartitioning(expressions, numPartitions), Seq(CoalescedBoundary(0, numPartitions))))
 
   test("RangePartitioning is the output partitioning") {
     // RangePartitioning can satisfy OrderedDistribution iff its ordering is a prefix
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
index 51e7688732265..6b069d1c97363 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
@@ -62,211 +62,254 @@ class ShuffleSpecSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
-  test("compatibility: HashShuffleSpec on both sides") {
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = true
-    )
-
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10), ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10), ClusteredDistribution(Seq($"a", $"b"))),
-      expected = true
-    )
+  private def testHashShuffleSpecLike(
+      shuffleSpecName: String,
+      create: (HashPartitioning, ClusteredDistribution) => ShuffleSpec): Unit = {
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"b"), 10), ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"d"), 10), ClusteredDistribution(Seq($"c", $"d"))),
-      expected = true
-    )
+    test(s"compatibility: $shuffleSpecName on both sides") {
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"c", $"c", $"d"), 10),
-        ClusteredDistribution(Seq($"c", $"d"))),
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a"), 10), ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"a"), 10), ClusteredDistribution(Seq($"a", $"b"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"d"), 10),
-        ClusteredDistribution(Seq($"a", $"c", $"d"))),
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"b"), 10), ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"d"), 10), ClusteredDistribution(Seq($"c", $"d"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"c", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"c", $"c"))),
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"c", $"c", $"d"), 10),
+          ClusteredDistribution(Seq($"c", $"d"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"c", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"c", $"d"))),
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b", $"b"))),
+        create(HashPartitioning(Seq($"a", $"d"), 10),
+          ClusteredDistribution(Seq($"a", $"c", $"d"))),
+        expected = true
+      )
 
-    // negative cases
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"c"), 5),
-        ClusteredDistribution(Seq($"c", $"d"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b", $"b"))),
+        create(HashPartitioning(Seq($"a", $"c", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"c", $"c"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b", $"b"))),
+        create(HashPartitioning(Seq($"a", $"c", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"c", $"d"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = false
-    )
+      // negative cases
+      checkCompatible(
+        create(HashPartitioning(Seq($"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"c"), 5),
+          ClusteredDistribution(Seq($"c", $"d"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"d"), 10),
-        ClusteredDistribution(Seq($"c", $"d"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"d"), 10),
-        ClusteredDistribution(Seq($"c", $"d"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"d"), 10),
+          ClusteredDistribution(Seq($"c", $"d"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"a"), 10),
-        ClusteredDistribution(Seq($"a", $"b", $"b"))),
-      expected = false
-    )
-  }
+      checkCompatible(
+        create(HashPartitioning(Seq($"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"d"), 10),
+          ClusteredDistribution(Seq($"c", $"d"))),
+        expected = false
+      )
 
-  test("compatibility: Only one side is HashShuffleSpec") {
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      SinglePartitionShuffleSpec,
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"a", $"b", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 1),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      SinglePartitionShuffleSpec,
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b", $"b"))),
+        create(HashPartitioning(Seq($"a", $"b", $"a"), 10),
+          ClusteredDistribution(Seq($"a", $"b", $"b"))),
+        expected = false
+      )
+    }
 
-    checkCompatible(
-      SinglePartitionShuffleSpec,
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 1),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = true
-    )
+    test(s"compatibility: Only one side is $shuffleSpecName") {
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        SinglePartitionShuffleSpec,
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      RangeShuffleSpec(10, ClusteredDistribution(Seq($"a", $"b"))),
-      expected = false
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 1),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        SinglePartitionShuffleSpec,
+        expected = true
+      )
 
-    checkCompatible(
-      RangeShuffleSpec(10, ClusteredDistribution(Seq($"a", $"b"))),
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      expected = false
-    )
+      checkCompatible(
+        SinglePartitionShuffleSpec,
+        create(HashPartitioning(Seq($"a", $"b"), 1),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        expected = true
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-          ClusteredDistribution(Seq($"a", $"b"))))),
-      expected = true
-    )
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
+          ClusteredDistribution(Seq($"a", $"b"))),
+        RangeShuffleSpec(10, ClusteredDistribution(Seq($"a", $"b"))),
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
+      checkCompatible(
+        RangeShuffleSpec(10, ClusteredDistribution(Seq($"a", $"b"))),
+        create(HashPartitioning(Seq($"a", $"b"), 10),
           ClusteredDistribution(Seq($"a", $"b"))),
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-          ClusteredDistribution(Seq($"a", $"b"))))),
-      expected = true
-    )
+        expected = false
+      )
 
-    checkCompatible(
-      HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-        ClusteredDistribution(Seq($"a", $"b"))),
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"a"), 10),
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
           ClusteredDistribution(Seq($"a", $"b"))),
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"c"), 10),
-          ClusteredDistribution(Seq($"a", $"b", $"c"))))),
-      expected = false
-    )
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"a", $"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))))),
+        expected = true
+      )
 
-    checkCompatible(
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"b"), 10),
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
           ClusteredDistribution(Seq($"a", $"b"))),
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-          ClusteredDistribution(Seq($"a", $"b"))))),
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"c"), 10),
-          ClusteredDistribution(Seq($"a", $"b", $"c"))),
-        HashShuffleSpec(HashPartitioning(Seq($"d"), 10),
-          ClusteredDistribution(Seq($"c", $"d"))))),
-      expected = true
-    )
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"a"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))),
+          create(HashPartitioning(Seq($"a", $"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))))),
+        expected = true
+      )
 
-    checkCompatible(
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"b"), 10),
+      checkCompatible(
+        create(HashPartitioning(Seq($"a", $"b"), 10),
           ClusteredDistribution(Seq($"a", $"b"))),
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b"), 10),
-          ClusteredDistribution(Seq($"a", $"b"))))),
-      ShuffleSpecCollection(Seq(
-        HashShuffleSpec(HashPartitioning(Seq($"a", $"b", $"c"), 10),
-          ClusteredDistribution(Seq($"a", $"b", $"c"))),
-        HashShuffleSpec(HashPartitioning(Seq($"c"), 10),
-          ClusteredDistribution(Seq($"c", $"d"))))),
-      expected = false
-    )
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"a"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))),
+          create(HashPartitioning(Seq($"a", $"b", $"c"), 10),
+            ClusteredDistribution(Seq($"a", $"b", $"c"))))),
+        expected = false
+      )
+
+      checkCompatible(
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))),
+          create(HashPartitioning(Seq($"a", $"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))))),
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"a", $"b", $"c"), 10),
+            ClusteredDistribution(Seq($"a", $"b", $"c"))),
+          create(HashPartitioning(Seq($"d"), 10),
+            ClusteredDistribution(Seq($"c", $"d"))))),
+        expected = true
+      )
+
+      checkCompatible(
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))),
+          create(HashPartitioning(Seq($"a", $"b"), 10),
+            ClusteredDistribution(Seq($"a", $"b"))))),
+        ShuffleSpecCollection(Seq(
+          create(HashPartitioning(Seq($"a", $"b", $"c"), 10),
+            ClusteredDistribution(Seq($"a", $"b", $"c"))),
+          create(HashPartitioning(Seq($"c"), 10),
+            ClusteredDistribution(Seq($"c", $"d"))))),
+        expected = false
+      )
+    }
+  }
+
+  testHashShuffleSpecLike("HashShuffleSpec",
+    (partitioning, distribution) => HashShuffleSpec(partitioning, distribution))
+   testHashShuffleSpecLike("CoalescedHashShuffleSpec",
+    (partitioning, distribution) => {
+      val partitions = if (partitioning.numPartitions == 1) {
+        Seq(CoalescedBoundary(0, 1))
+      } else {
+        Seq(CoalescedBoundary(0, 1), CoalescedBoundary(0, partitioning.numPartitions))
+      }
+      CoalescedHashShuffleSpec(HashShuffleSpec(partitioning, distribution), partitions)
+  })
+
+  test("compatibility: CoalescedHashShuffleSpec other specs") {
+      val hashShuffleSpec = HashShuffleSpec(
+        HashPartitioning(Seq($"a", $"b"), 10), ClusteredDistribution(Seq($"a", $"b")))
+      checkCompatible(
+        hashShuffleSpec,
+        CoalescedHashShuffleSpec(hashShuffleSpec, Seq(CoalescedBoundary(0, 10))),
+        expected = false
+      )
+
+      checkCompatible(
+        CoalescedHashShuffleSpec(hashShuffleSpec,
+          Seq(CoalescedBoundary(0, 5), CoalescedBoundary(5, 10))),
+        CoalescedHashShuffleSpec(hashShuffleSpec,
+          Seq(CoalescedBoundary(0, 5), CoalescedBoundary(5, 10))),
+        expected = true
+      )
+
+      checkCompatible(
+        CoalescedHashShuffleSpec(hashShuffleSpec,
+          Seq(CoalescedBoundary(0, 4), CoalescedBoundary(4, 10))),
+        CoalescedHashShuffleSpec(hashShuffleSpec,
+          Seq(CoalescedBoundary(0, 5), CoalescedBoundary(5, 10))),
+        expected = false
+      )
   }
 
   test("compatibility: other specs") {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEShuffleReadExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEShuffleReadExec.scala
index 46ec91dcc0ab2..6b39ac70a62ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEShuffleReadExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEShuffleReadExec.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql.execution.adaptive
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.plans.physical.{CoalescedBoundary, CoalescedHashPartitioning, HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning, SinglePartition, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.exchange.{ReusedExchangeExec, ShuffleExchangeLike}
@@ -75,7 +76,13 @@ case class AQEShuffleReadExec private(
       // partitions is changed.
       child.outputPartitioning match {
         case h: HashPartitioning =>
-          CurrentOrigin.withOrigin(h.origin)(h.copy(numPartitions = partitionSpecs.length))
+          val partitions = partitionSpecs.map {
+            case CoalescedPartitionSpec(start, end, _) => CoalescedBoundary(start, end)
+            // Can not happend due to isCoalescedRead
+            case unexpected =>
+              throw SparkException.internalError(s"Unexpected ShufflePartitionSpec: $unexpected")
+          }
+          CurrentOrigin.withOrigin(h.origin)(CoalescedHashPartitioning(h, partitions))
         case r: RangePartitioning =>
           CurrentOrigin.withOrigin(r.origin)(r.copy(numPartitions = partitionSpecs.length))
         // This can only happen for `REBALANCE_PARTITIONS_BY_NONE`, which uses
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 6d9c43f866a0c..207c66dc4d43b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -2541,6 +2541,20 @@ class DatasetSuite extends QueryTest
     val ds = Seq(1, 2).toDS().persist(StorageLevel.NONE)
     assert(ds.count() == 2)
   }
+
+  test("SPARK-45592: Coaleasced shuffle read is not compatible with hash partitioning") {
+    val ee = spark.range(0, 1000000, 1, 5).map(l => (l, l)).toDF()
+      .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
+    ee.count()
+
+    val minNbrs1 = ee
+      .groupBy("_1").agg(min(col("_2")).as("min_number"))
+      .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
+
+    val join = ee.join(minNbrs1, "_1")
+    assert(join.count() == 1000000)
+  }
+
 }
 
 class DatasetLargeResultCollectingSuite extends QueryTest
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
index 6cab0e0239dc4..40938eb642478 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal}
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.plans.physical
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.plans.physical.{CoalescedBoundary, CoalescedHashPartitioning, HashPartitioning, RangePartitioning, UnknownPartitioning}
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.functions._
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
@@ -264,11 +264,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"), attr("id"))
-    val writePartitioning = if (!coalesce) {
-      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
-    } else {
-      clusteredWritePartitioning(writePartitioningExprs, Some(1))
-    }
+    val writePartitioning = clusteredWritePartitioning(
+      writePartitioningExprs, targetNumPartitions, coalesce)
 
     checkWriteRequirements(
       tableDistribution,
@@ -377,11 +374,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = if (!coalesce) {
-      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
-    } else {
-      clusteredWritePartitioning(writePartitioningExprs, Some(1))
-    }
+    val writePartitioning = clusteredWritePartitioning(
+      writePartitioningExprs, targetNumPartitions, coalesce)
 
     checkWriteRequirements(
       tableDistribution,
@@ -875,11 +869,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = if (!coalesce) {
-      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
-    } else {
-      clusteredWritePartitioning(writePartitioningExprs, Some(1))
-    }
+    val writePartitioning = clusteredWritePartitioning(
+      writePartitioningExprs, targetNumPartitions, coalesce)
 
     checkWriteRequirements(
       tableDistribution,
@@ -963,11 +954,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = if (!coalesce) {
-      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
-    } else {
-      clusteredWritePartitioning(writePartitioningExprs, Some(1))
-    }
+    val writePartitioning = clusteredWritePartitioning(
+      writePartitioningExprs, targetNumPartitions, coalesce)
 
     checkWriteRequirements(
       tableDistribution,
@@ -1154,11 +1142,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     )
 
     val writePartitioningExprs = Seq(truncateExpr)
-    val writePartitioning = if (!coalesce) {
-      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
-    } else {
-      clusteredWritePartitioning(writePartitioningExprs, Some(1))
-    }
+    val writePartitioning = clusteredWritePartitioning(
+      writePartitioningExprs, targetNumPartitions, coalesce)
 
     checkWriteRequirements(
       tableDistribution,
@@ -1422,6 +1407,9 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       case p: physical.HashPartitioning =>
         val resolvedExprs = p.expressions.map(resolveAttrs(_, plan))
         p.copy(expressions = resolvedExprs)
+      case c: physical.CoalescedHashPartitioning =>
+        val resolvedExprs = c.from.expressions.map(resolveAttrs(_, plan))
+        c.copy(from = c.from.copy(expressions = resolvedExprs))
       case _: UnknownPartitioning =>
         // don't check partitioning if no particular one is expected
         actualPartitioning
@@ -1480,9 +1468,16 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
 
   private def clusteredWritePartitioning(
       writePartitioningExprs: Seq[catalyst.expressions.Expression],
-      targetNumPartitions: Option[Int]): physical.Partitioning = {
-    HashPartitioning(writePartitioningExprs,
-      targetNumPartitions.getOrElse(conf.numShufflePartitions))
+      targetNumPartitions: Option[Int],
+      coalesce: Boolean): physical.Partitioning = {
+    val partitioning = HashPartitioning(writePartitioningExprs,
+        targetNumPartitions.getOrElse(conf.numShufflePartitions))
+    if (coalesce)  {
+      CoalescedHashPartitioning(
+        partitioning, Seq(CoalescedBoundary(0, partitioning.numPartitions)))
+    } else {
+      partitioning
+    }
   }
 
   private def partitionSizes(dataSkew: Boolean, coalesce: Boolean): Seq[Option[Long]] = {

From 2a7e3fec1c8e3867afb9bdecf7a02d6ba7b36f90 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 31 Oct 2023 16:47:30 +0900
Subject: [PATCH 087/521] [SPARK-45735][PYTHON][CONNECT][TESTS] Reenable
 CatalogTests without Spark Connect

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/39214 that restores the original Catalog tests in PySpark. That PR mistakenly disabled the tests without Spark Connect:

https://github.com/apache/spark/blob/fc6a5cca06cf15c4a952cb56720f627efdba7cce/python/pyspark/sql/tests/test_catalog.py#L489

### Why are the changes needed?

To restore the test coverage.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Reenabled unittests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43595 from HyukjinKwon/SPARK-45735.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 76d9a70932df97d8ea4cc6e279933dee29a88571)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index cafffdc9ae8b5..b72172a402bfc 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -486,7 +486,7 @@ def test_refresh_table(self):
                 self.assertEqual(spark.table("my_tab").count(), 0)
 
 
-class CatalogTests(ReusedSQLTestCase):
+class CatalogTests(CatalogTestsMixin, ReusedSQLTestCase):
     pass
 
 

From 64242bf6a6425274b83bc1191230437c2d3fbc71 Mon Sep 17 00:00:00 2001
From: zeruibao <zerui.bao@databricks.com>
Date: Tue, 31 Oct 2023 16:46:40 -0700
Subject: [PATCH 088/521] [SPARK-43380][SQL][FOLLOW-UP] Fix slowdown in Avro
 read

### What changes were proposed in this pull request?
Fix slowdown in Avro read. There is a https://github.com/apache/spark/pull/42503 that causes the performance regression. It seems that `SQLConf.get.getConf(confKey)` is very costly. Move it out of `newWriter` function.

### Why are the changes needed?
Need to fix the performance regression of Avro read.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing UT test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43606 from zeruibao/SPARK-43380-FIX-SLOWDOWN.

Authored-by: zeruibao <zerui.bao@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 45f73bc69655a236323be1bcb2988341d2aa5203)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../scala/org/apache/spark/sql/avro/AvroDeserializer.scala   | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index fe0bd7392b636..ec34d10a5ffe8 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -105,6 +105,9 @@ private[sql] class AvroDeserializer(
       s"Cannot convert Avro type $rootAvroType to SQL type ${rootCatalystType.sql}.", ise)
   }
 
+  private lazy val preventReadingIncorrectType = !SQLConf.get
+    .getConf(SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA)
+
   def deserialize(data: Any): Option[Any] = converter(data)
 
   /**
@@ -122,8 +125,6 @@ private[sql] class AvroDeserializer(
         s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
 
     val realDataType = SchemaConverters.toSqlType(avroType, useStableIdForUnionType).dataType
-    val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA
-    val preventReadingIncorrectType = !SQLConf.get.getConf(confKey)
 
     (avroType.getType, catalystType) match {
       case (NULL, NullType) => (updater, ordinal, _) =>

From 1cf1c6a3a8a8cffc5048c584ca1cdae149843d42 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 1 Nov 2023 09:55:45 +0800
Subject: [PATCH 089/521] [SPARK-45749][CORE][WEBUI] Fix `Spark History Server`
 to sort `Duration` column properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR aims to fix an UI regression at Apache Spark 3.2.0 caused by SPARK-34123.

From Apache Spark **3.2.0** to **3.5.0**, `Spark History Server` cannot sort `Duration` column.

After this PR, Spark History Server can sort `Duration` column properly like Apache Spark 3.1.3 and before.

### Why are the changes needed?

Before SPARK-34123, Apache Spark had the `title` attribute for sorting.
- https://github.com/apache/spark/pull/31191
```
<td><span title="{{durationMillisec}}">{{duration}}</span></td>
```

Without `title`, `title-numeric` doesn't work.

### Does this PR introduce _any_ user-facing change?

No. This is a bug fix.

### How was this patch tested?

Manual test. Please use `Safari Private Browsing ` or `Chrome Incognito` mode.

<img width="96" alt="Screenshot 2023-10-31 at 5 47 34 PM" src="https://github.com/apache/spark/assets/9700541/8c8464d2-c58b-465c-8f98-edab1ec2317d">

<img width="94" alt="Screenshot 2023-10-31 at 5 47 29 PM" src="https://github.com/apache/spark/assets/9700541/03e8373d-bda3-4835-90ad-9a45670e853a">

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43613 from dongjoon-hyun/SPARK-45749.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit f72510ca9e04ae88660346de440b231fc8225698)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../resources/org/apache/spark/ui/static/historypage.js    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/historypage.js b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
index b334bceb5a039..68dc8ba316dbf 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/historypage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/historypage.js
@@ -192,7 +192,12 @@ $(document).ready(function() {
           },
           {name: startedColumnName, data: 'startTime' },
           {name: completedColumnName, data: 'endTime' },
-          {name: durationColumnName, type: "title-numeric", data: 'duration' },
+          {
+            name: durationColumnName,
+            type: "title-numeric",
+            data: 'duration',
+            render:  (id, type, row) => `<span title="${row.durationMillisec}">${row.duration}</span>`
+          },
           {name: 'user', data: 'sparkUser' },
           {name: 'lastUpdated', data: 'lastUpdated' },
           {

From 4ccadd67a87fb76e105ab527d01f27ff9fed95cc Mon Sep 17 00:00:00 2001
From: chenyu <119398199+chenyu-opensource@users.noreply.github.com>
Date: Wed, 1 Nov 2023 17:12:45 +0800
Subject: [PATCH 090/521] [SPARK-45751][DOCS] Update the default value for
 spark.executor.logs.rolling.maxRetainedFile

**What changes were proposed in this pull request?**
The PR updates the default value of 'spark.executor.logs.rolling.maxRetainedFiles' in configuration.html on the website

**Why are the changes needed?**
The default value of 'spark.executor.logs.rolling.maxRetainedFiles' is -1, but the website is wrong.

**Does this PR introduce any user-facing change?**
No

**How was this patch tested?**
It doesn't need to.

**Was this patch authored or co-authored using generative AI tooling?**
No

Closes #43618 from chenyu-opensource/branch-SPARK-45751.

Authored-by: chenyu <119398199+chenyu-opensource@users.noreply.github.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit e6b4fa835de3f6d0057bf3809ea369d785967bcd)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 4b0b9b3e3c260..25080784f7374 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -658,7 +658,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.executor.logs.rolling.maxRetainedFiles</code></td>
-  <td>(none)</td>
+  <td>-1</td>
   <td>
     Sets the number of latest rolling log files that are going to be retained by the system.
     Older log files will be deleted. Disabled by default.

From cec4e488dc7d6fa64dbf3b7bc004ade87a4e27d7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Sat, 4 Nov 2023 01:16:32 +0800
Subject: [PATCH 091/521] [SPARK-44843][TESTS] Double streamingTimeout for
 StateStoreMetricsTest to make RocksDBStateStore related streaming tests
 reliable

### What changes were proposed in this pull request?

This PR increases streamingTimeout and the check interval for StateStoreMetricsTest to make RocksDBStateStore-related streaming tests reliable, hopefully.

### Why are the changes needed?

```
SPARK-35896: metrics in StateOperatorProgress are output correctly (RocksDBStateStore with changelog checkpointing) *** FAILED *** (1 minute)
[info]   Timed out waiting for stream: The code passed to failAfter did not complete within 60 seconds.
[info]   java.base/java.lang.Thread.getStackTrace(Thread.java:1619)
```

The probability of these tests failing is close to 100%, which seriously affects the UX of making PRs for the contributors.

https://github.com/yaooqinn/spark/actions/runs/6744173341/job/18333952141

### Does this PR introduce _any_ user-facing change?

no, test only

### How was this patch tested?

this can be verified by `sql - slow test` job in CI

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43647 from yaooqinn/SPARK-44843.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit afdce266f0ffeb068d47eca2f2af1bcba66b0e95)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../apache/spark/sql/streaming/StateStoreMetricsTest.scala  | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
index 57ced748cd9f0..07837f5c06473 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.streaming
 
+import org.scalatest.time.SpanSugar._
+
 import org.apache.spark.sql.execution.streaming.StreamExecution
 
 trait StateStoreMetricsTest extends StreamTest {
@@ -24,6 +26,8 @@ trait StateStoreMetricsTest extends StreamTest {
   private var lastCheckedRecentProgressIndex = -1
   private var lastQuery: StreamExecution = null
 
+  override val streamingTimeout = 120.seconds
+
   override def beforeEach(): Unit = {
     super.beforeEach()
     lastCheckedRecentProgressIndex = -1
@@ -106,7 +110,7 @@ trait StateStoreMetricsTest extends StreamTest {
     AssertOnQuery(s"Check operator progress metrics: operatorName = $operatorName, " +
       s"numShufflePartitions = $numShufflePartitions, " +
       s"numStateStoreInstances = $numStateStoreInstances") { q =>
-      eventually(timeout(streamingTimeout)) {
+      eventually(timeout(streamingTimeout), interval(200.milliseconds)) {
         val (progressesSinceLastCheck, lastCheckedProgressIndex, numStateOperators) =
           retrieveProgressesSinceLastCheck(q)
         assert(operatorIndex < numStateOperators, s"Invalid operator Index: $operatorIndex")

From 69a980984d001499b502d32bad2228c0e0f59ba1 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 4 Nov 2023 09:23:58 -0700
Subject: [PATCH 092/521] [SPARK-45791][CONNECT][TESTS] Rename
 `SparkConnectSessionHodlerSuite.scala` to
 `SparkConnectSessionHolderSuite.scala`

### What changes were proposed in this pull request?

This PR aims to fix a typo `Hodler` in file name.
- `SparkConnectSessionHodlerSuite.scala` (from)
- `SparkConnectSessionHolderSuite.scala` (to)

It's also unmatched with the class name in the file because class name itself is correct.

https://github.com/apache/spark/blob/3363c2af3f6a59363135451d251f25e328a4fddf/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHodlerSuite.scala#L37

### Why are the changes needed?

This is a typo from the original PR.
- https://github.com/apache/spark/pull/41580

Since the original PR is shipped as Apache Spark 3.5.0, I created a JIRA instead of a follow-up. We need to backport this patch to `branch-3.5`.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43657 from dongjoon-hyun/SPARK-45791.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6d669fa957463851af463d0ba03d6e6ee76e2cda)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 ...sionHodlerSuite.scala => SparkConnectSessionHolderSuite.scala} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/{SparkConnectSessionHodlerSuite.scala => SparkConnectSessionHolderSuite.scala} (100%)

diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHodlerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
similarity index 100%
rename from connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHodlerSuite.scala
rename to connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala

From d36e3e62c72ae121ebf3404db7c4cc51fe66066b Mon Sep 17 00:00:00 2001
From: Kazuyuki Tanimura <ktanimura@apple.com>
Date: Tue, 7 Nov 2023 09:06:00 -0800
Subject: [PATCH 093/521] [SPARK-45786][SQL] Fix inaccurate Decimal
 multiplication and division results

### What changes were proposed in this pull request?
This PR fixes inaccurate Decimal multiplication and division results.

### Why are the changes needed?
Decimal multiplication and division results may be inaccurate due to rounding issues.
#### Multiplication:
```
scala> sql("select  -14120025096157587712113961295153.858047 * -0.4652").show(truncate=false)
+----------------------------------------------------+
|(-14120025096157587712113961295153.858047 * -0.4652)|
+----------------------------------------------------+
|6568635674732509803675414794505.574764              |
+----------------------------------------------------+
```
The correct answer is `6568635674732509803675414794505.574763`

Please note that the last digit is `3` instead of `4` as

```
scala> java.math.BigDecimal("-14120025096157587712113961295153.858047").multiply(java.math.BigDecimal("-0.4652"))
val res21: java.math.BigDecimal = 6568635674732509803675414794505.5747634644
```
Since the factional part `.574763` is followed by `4644`, it should not be rounded up.

#### Division:
```
scala> sql("select -0.172787979 / 533704665545018957788294905796.5").show(truncate=false)
+-------------------------------------------------+
|(-0.172787979 / 533704665545018957788294905796.5)|
+-------------------------------------------------+
|-3.237521E-31                                    |
+-------------------------------------------------+
```
The correct answer is `-3.237520E-31`

Please note that the last digit is `0` instead of `1` as

```
scala> java.math.BigDecimal("-0.172787979").divide(java.math.BigDecimal("533704665545018957788294905796.5"), 100, java.math.RoundingMode.DOWN)
val res22: java.math.BigDecimal = -3.237520489418037889998826491401059986665344697406144511563561222578738E-31
```
Since the factional part `.237520` is followed by `4894...`, it should not be rounded up.

### Does this PR introduce _any_ user-facing change?
Yes, users will see correct Decimal multiplication and division results.
Directly multiplying and dividing with `org.apache.spark.sql.types.Decimal()` (not via SQL) will return 39 digit at maximum instead of 38 at maximum and round down instead of round half-up

### How was this patch tested?
Test added

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43678 from kazuyukitanimura/SPARK-45786.

Authored-by: Kazuyuki Tanimura <ktanimura@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 5ef3a846f52ab90cb7183953cff3080449d0b57b)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/types/Decimal.scala  |   8 +-
 .../ArithmeticExpressionSuite.scala           | 107 ++++++++++++++++++
 .../ansi/decimalArithmeticOperations.sql.out  |  14 +--
 3 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index afe73635a6824..77e9aa06c830c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -499,7 +499,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   def / (that: Decimal): Decimal =
     if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal,
-      DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode))
+      DecimalType.MAX_SCALE + 1, MATH_CONTEXT.getRoundingMode))
 
   def % (that: Decimal): Decimal =
     if (that.isZero) null
@@ -547,7 +547,11 @@ object Decimal {
 
   val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
 
-  private val MATH_CONTEXT = new MathContext(DecimalType.MAX_PRECISION, RoundingMode.HALF_UP)
+  // SPARK-45786 Using RoundingMode.HALF_UP with MathContext may cause inaccurate SQL results
+  // because TypeCoercion later rounds again. Instead, always round down and use 1 digit longer
+  // precision than DecimalType.MAX_PRECISION. Then, TypeCoercion will properly round up/down
+  // the last extra digit.
+  private val MATH_CONTEXT = new MathContext(DecimalType.MAX_PRECISION + 1, RoundingMode.DOWN)
 
   private[sql] val ZERO = Decimal(0)
   private[sql] val ONE = Decimal(1)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index e21793ab506c4..568dcd10d1166 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.math.RoundingMode
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, Period}
 import java.time.temporal.ChronoUnit
@@ -225,6 +226,112 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     }
   }
 
+  test("SPARK-45786: Decimal multiply, divide, remainder, quot") {
+    // Some known cases
+    checkEvaluation(
+      Multiply(
+        Literal(Decimal(BigDecimal("-14120025096157587712113961295153.858047"), 38, 6)),
+        Literal(Decimal(BigDecimal("-0.4652"), 4, 4))
+      ),
+      Decimal(BigDecimal("6568635674732509803675414794505.574763"))
+    )
+    checkEvaluation(
+      Multiply(
+        Literal(Decimal(BigDecimal("-240810500742726"), 15, 0)),
+        Literal(Decimal(BigDecimal("-5677.6988688550027099967697071"), 29, 25))
+      ),
+      Decimal(BigDecimal("1367249507675382200.164877854336665327"))
+    )
+    checkEvaluation(
+      Divide(
+        Literal(Decimal(BigDecimal("-0.172787979"), 9, 9)),
+        Literal(Decimal(BigDecimal("533704665545018957788294905796.5"), 31, 1))
+      ),
+      Decimal(BigDecimal("-3.237520E-31"))
+    )
+    checkEvaluation(
+      Divide(
+        Literal(Decimal(BigDecimal("-0.574302343618"), 12, 12)),
+        Literal(Decimal(BigDecimal("-795826820326278835912868.106"), 27, 3))
+      ),
+      Decimal(BigDecimal("7.21642358550E-25"))
+    )
+
+    // Random tests
+    val rand = scala.util.Random
+    def makeNum(p: Int, s: Int): String = {
+      val int1 = rand.nextLong()
+      val int2 = rand.nextLong().abs
+      val frac1 = rand.nextLong().abs
+      val frac2 = rand.nextLong().abs
+      s"$int1$int2".take(p - s + (int1 >>> 63).toInt) + "." + s"$frac1$frac2".take(s)
+    }
+
+    (0 until 100).foreach { _ =>
+      val p1 = rand.nextInt(38) + 1 // 1 <= p1 <= 38
+      val s1 = rand.nextInt(p1 + 1) // 0 <= s1 <= p1
+      val p2 = rand.nextInt(38) + 1
+      val s2 = rand.nextInt(p2 + 1)
+
+      val n1 = makeNum(p1, s1)
+      val n2 = makeNum(p2, s2)
+
+      val mulActual = Multiply(
+        Literal(Decimal(BigDecimal(n1), p1, s1)),
+        Literal(Decimal(BigDecimal(n2), p2, s2))
+      )
+      val mulExact = new java.math.BigDecimal(n1).multiply(new java.math.BigDecimal(n2))
+
+      val divActual = Divide(
+        Literal(Decimal(BigDecimal(n1), p1, s1)),
+        Literal(Decimal(BigDecimal(n2), p2, s2))
+      )
+      val divExact = new java.math.BigDecimal(n1)
+        .divide(new java.math.BigDecimal(n2), 100, RoundingMode.DOWN)
+
+      val remActual = Remainder(
+        Literal(Decimal(BigDecimal(n1), p1, s1)),
+        Literal(Decimal(BigDecimal(n2), p2, s2))
+      )
+      val remExact = new java.math.BigDecimal(n1).remainder(new java.math.BigDecimal(n2))
+
+      val quotActual = IntegralDivide(
+        Literal(Decimal(BigDecimal(n1), p1, s1)),
+        Literal(Decimal(BigDecimal(n2), p2, s2))
+      )
+      val quotExact =
+        new java.math.BigDecimal(n1).divideToIntegralValue(new java.math.BigDecimal(n2))
+
+      Seq(true, false).foreach { allowPrecLoss =>
+        withSQLConf(SQLConf.DECIMAL_OPERATIONS_ALLOW_PREC_LOSS.key -> allowPrecLoss.toString) {
+          val mulType = Multiply(null, null).resultDecimalType(p1, s1, p2, s2)
+          val mulResult = Decimal(mulExact.setScale(mulType.scale, RoundingMode.HALF_UP))
+          val mulExpected =
+            if (mulResult.precision > DecimalType.MAX_PRECISION) null else mulResult
+          checkEvaluation(mulActual, mulExpected)
+
+          val divType = Divide(null, null).resultDecimalType(p1, s1, p2, s2)
+          val divResult = Decimal(divExact.setScale(divType.scale, RoundingMode.HALF_UP))
+          val divExpected =
+            if (divResult.precision > DecimalType.MAX_PRECISION) null else divResult
+          checkEvaluation(divActual, divExpected)
+
+          val remType = Remainder(null, null).resultDecimalType(p1, s1, p2, s2)
+          val remResult = Decimal(remExact.setScale(remType.scale, RoundingMode.HALF_UP))
+          val remExpected =
+            if (remResult.precision > DecimalType.MAX_PRECISION) null else remResult
+          checkEvaluation(remActual, remExpected)
+
+          val quotType = IntegralDivide(null, null).resultDecimalType(p1, s1, p2, s2)
+          val quotResult = Decimal(quotExact.setScale(quotType.scale, RoundingMode.HALF_UP))
+          val quotExpected =
+            if (quotResult.precision > DecimalType.MAX_PRECISION) null else quotResult
+          checkEvaluation(quotActual, quotExpected.toLong)
+        }
+      }
+    }
+  }
+
   private def testDecimalAndDoubleType(testFunc: (Int => Any) => Unit): Unit = {
     testFunc(_.toDouble)
     testFunc(Decimal(_))
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 699c916fd8fdb..9593291fae21d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -155,7 +155,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "1000000000000000000000000000000000000.00000000000000000000000000000000000000"
+    "value" : "1000000000000000000000000000000000000.000000000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -204,7 +204,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "10123456789012345678901234567890123456.00000000000000000000000000000000000000"
+    "value" : "10123456789012345678901234567890123456.000000000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -229,7 +229,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "101234567890123456789012345678901234.56000000000000000000000000000000000000"
+    "value" : "101234567890123456789012345678901234.560000000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -254,7 +254,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "10123456789012345678901234567890123.45600000000000000000000000000000000000"
+    "value" : "10123456789012345678901234567890123.456000000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -279,7 +279,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "1012345678901234567890123456789012.34560000000000000000000000000000000000"
+    "value" : "1012345678901234567890123456789012.345600000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -304,7 +304,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "101234567890123456789012345678901.23456000000000000000000000000000000000"
+    "value" : "101234567890123456789012345678901.234560000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -337,7 +337,7 @@ org.apache.spark.SparkArithmeticException
     "config" : "\"spark.sql.ansi.enabled\"",
     "precision" : "38",
     "scale" : "6",
-    "value" : "101234567890123456789012345678901.23456000000000000000000000000000000000"
+    "value" : "101234567890123456789012345678901.234560000000000000000000000000000000000"
   },
   "queryContext" : [ {
     "objectType" : "",

From eac87e3985fa614c790bef99cbf9b1d9f3a1f513 Mon Sep 17 00:00:00 2001
From: chenyu <119398199+chenyu-opensource@users.noreply.github.com>
Date: Wed, 8 Nov 2023 19:16:48 +0800
Subject: [PATCH 094/521] [SPARK-45829][DOCS] Update the default value for
 spark.executor.logs.rolling.maxSize

**What changes were proposed in this pull request?**
The PR updates the default value of 'spark.executor.logs.rolling.maxSize' in configuration.html on the website

**Why are the changes needed?**
The default value of 'spark.executor.logs.rolling.maxSize' is 1024 * 1024, but the website is wrong.

**Does this PR introduce any user-facing change?**
No

**How was this patch tested?**
It doesn't need to.

**Was this patch authored or co-authored using generative AI tooling?**
No

Closes #43712 from chenyu-opensource/branch-SPARK-45829.

Authored-by: chenyu <119398199+chenyu-opensource@users.noreply.github.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit a9127068194a48786df4f429ceb4f908c71f7138)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 25080784f7374..4604360dda287 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -676,7 +676,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.executor.logs.rolling.maxSize</code></td>
-  <td>(none)</td>
+  <td>1024 * 1024</td>
   <td>
     Set the max size of the file in bytes by which the executor logs will be rolled over.
     Rolling is disabled by default. See <code>spark.executor.logs.rolling.maxRetainedFiles</code>

From 85fbb3aca54e93a3a66c4eb5743f70486a8383fc Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 8 Nov 2023 09:18:34 -0800
Subject: [PATCH 095/521] [SPARK-45509][SQL][3.5] Fix df column reference
 behavior for Spark Connect

backport https://github.com/apache/spark/pull/43465 to 3.5

### What changes were proposed in this pull request?

This PR fixes a few problems of column resolution for Spark Connect, to make the behavior closer to classic Spark SQL (unfortunately we still have some behavior differences in corner cases).
1. resolve df column references in both `resolveExpressionByPlanChildren` and `resolveExpressionByPlanOutput`. Previously it's only in `resolveExpressionByPlanChildren`.
2. when the plan id has multiple matches, fail with `AMBIGUOUS_COLUMN_REFERENCE`

### Why are the changes needed?

fix behavior differences between spark connect and classic spark sql

### Does this PR introduce _any_ user-facing change?

Yes, for spark connect scala client

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43699 from cloud-fan/backport.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 .../main/resources/error/error-classes.json   |  9 ++
 .../apache/spark/sql/ClientE2ETestSuite.scala | 58 +++++++++++
 docs/sql-error-conditions.md                  |  9 ++
 python/pyspark/pandas/indexes/multi.py        |  2 +-
 python/pyspark/sql/connect/plan.py            |  4 +-
 .../analysis/ColumnResolutionHelper.scala     | 98 +++++++++++--------
 6 files changed, 138 insertions(+), 42 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 9bc65ae32a276..2d50fe1a1a1a8 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -28,6 +28,15 @@
     ],
     "sqlState" : "42702"
   },
+  "AMBIGUOUS_COLUMN_REFERENCE" : {
+    "message" : [
+      "Column <name> is ambiguous. It's because you joined several DataFrame together, and some of these DataFrames are the same.",
+      "This column points to one of the DataFrame but Spark is unable to figure out which one.",
+      "Please alias the DataFrames with different names via `DataFrame.alias` before joining them,",
+      "and specify the column using qualified name, e.g. `df.alias(\"a\").join(df.alias(\"b\"), col(\"a.id\") > col(\"b.id\"))`."
+    ],
+    "sqlState" : "42702"
+  },
   "AMBIGUOUS_LATERAL_COLUMN_ALIAS" : {
     "message" : [
       "Lateral column alias <name> is ambiguous and has <n> matches."
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index df36b53791a81..feefd19000d1d 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -767,6 +767,64 @@ class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper with PrivateM
     assert(joined2.schema.catalogString === "struct<id:bigint,a:double>")
   }
 
+  test("SPARK-45509: ambiguous column reference") {
+    val session = spark
+    import session.implicits._
+    val df1 = Seq(1 -> "a").toDF("i", "j")
+    val df1_filter = df1.filter(df1("i") > 0)
+    val df2 = Seq(2 -> "b").toDF("i", "y")
+
+    checkSameResult(
+      Seq(Row(1)),
+      // df1("i") is not ambiguous, and it's still valid in the filtered df.
+      df1_filter.select(df1("i")))
+
+    val e1 = intercept[AnalysisException] {
+      // df1("i") is not ambiguous, but it's not valid in the projected df.
+      df1.select((df1("i") + 1).as("plus")).select(df1("i")).collect()
+    }
+    assert(e1.getMessage.contains("MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_MISSING_FROM_INPUT"))
+
+    checkSameResult(
+      Seq(Row(1, "a")),
+      // All these column references are not ambiguous and are still valid after join.
+      df1.join(df2, df1("i") + 1 === df2("i")).sort(df1("i").desc).select(df1("i"), df1("j")))
+
+    val e2 = intercept[AnalysisException] {
+      // df1("i") is ambiguous as df1 appears in both join sides.
+      df1.join(df1, df1("i") === 1).collect()
+    }
+    assert(e2.getMessage.contains("AMBIGUOUS_COLUMN_REFERENCE"))
+
+    val e3 = intercept[AnalysisException] {
+      // df1("i") is ambiguous as df1 appears in both join sides.
+      df1.join(df1).select(df1("i")).collect()
+    }
+    assert(e3.getMessage.contains("AMBIGUOUS_COLUMN_REFERENCE"))
+
+    val e4 = intercept[AnalysisException] {
+      // df1("i") is ambiguous as df1 appears in both join sides (df1_filter contains df1).
+      df1.join(df1_filter, df1("i") === 1).collect()
+    }
+    assert(e4.getMessage.contains("AMBIGUOUS_COLUMN_REFERENCE"))
+
+    checkSameResult(
+      Seq(Row("a")),
+      // df1_filter("i") is not ambiguous as df1_filter does not exist in the join left side.
+      df1.join(df1_filter, df1_filter("i") === 1).select(df1_filter("j")))
+
+    val e5 = intercept[AnalysisException] {
+      // df1("i") is ambiguous as df1 appears in both sides of the first join.
+      df1.join(df1_filter, df1_filter("i") === 1).join(df2, df1("i") === 1).collect()
+    }
+    assert(e5.getMessage.contains("AMBIGUOUS_COLUMN_REFERENCE"))
+
+    checkSameResult(
+      Seq(Row("a")),
+      // df1_filter("i") is not ambiguous as df1_filter only appears once.
+      df1.join(df1_filter).join(df2, df1_filter("i") === 1).select(df1_filter("j")))
+  }
+
   test("broadcast join") {
     withSQLConf("spark.sql.autoBroadcastJoinThreshold" -> "-1") {
       val left = spark.range(100).select(col("id"), rand(10).as("a"))
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 90d21f9758573..0cf05748f58f0 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -55,6 +55,15 @@ See '`<docroot>`/sql-migration-guide.html#query-engine'.
 
 Column or field `<name>` is ambiguous and has `<n>` matches.
 
+### AMBIGUOUS_COLUMN_REFERENCE
+
+[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Column `<name>` is ambiguous. It's because you joined several DataFrame together, and some of these DataFrames are the same.
+This column points to one of the DataFrame but Spark is unable to figure out which one.
+Please alias the DataFrames with different names via `DataFrame.alias` before joining them,
+and specify the column using qualified name, e.g. `df.alias("a").join(df.alias("b"), col("a.id") > col("b.id"))`.
+
 ### AMBIGUOUS_LATERAL_COLUMN_ALIAS
 
 [SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py
index dd93e31d0235e..74e0b328e4dfb 100644
--- a/python/pyspark/pandas/indexes/multi.py
+++ b/python/pyspark/pandas/indexes/multi.py
@@ -815,7 +815,7 @@ def symmetric_difference(  # type: ignore[override]
         sdf_symdiff = sdf_self.union(sdf_other).subtract(sdf_self.intersect(sdf_other))
 
         if sort:
-            sdf_symdiff = sdf_symdiff.sort(*self._internal.index_spark_columns)
+            sdf_symdiff = sdf_symdiff.sort(*self._internal.index_spark_column_names)
 
         internal = InternalFrame(
             spark_frame=sdf_symdiff,
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 9af5823dd8b84..b49274e399c48 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -2123,7 +2123,9 @@ def __init__(
         self._input_grouping_cols = input_grouping_cols
         self._other_grouping_cols = other_grouping_cols
         self._other = cast(LogicalPlan, other)
-        self._func = function._build_common_inline_user_defined_function(*cols)
+        # The function takes entire DataFrame as inputs, no need to do
+        # column binding (no input columns).
+        self._func = function._build_common_inline_user_defined_function()
 
     def plan(self, session: "SparkConnectClient") -> proto.Relation:
         assert self._child is not None
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index 98cbdea72d53b..c48006286be9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -29,10 +29,10 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.toPrettySQL
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
 import org.apache.spark.sql.internal.SQLConf
 
-trait ColumnResolutionHelper extends Logging {
+trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
   def conf: SQLConf
 
@@ -337,7 +337,7 @@ trait ColumnResolutionHelper extends Logging {
       throws: Boolean = false,
       allowOuter: Boolean = false): Expression = {
     resolveExpression(
-      expr,
+      tryResolveColumnByPlanId(expr, plan),
       resolveColumnByName = nameParts => {
         plan.resolve(nameParts, conf.resolver)
       },
@@ -358,21 +358,8 @@ trait ColumnResolutionHelper extends Logging {
       e: Expression,
       q: LogicalPlan,
       allowOuter: Boolean = false): Expression = {
-    val newE = if (e.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty)) {
-      // If the TreeNodeTag 'LogicalPlan.PLAN_ID_TAG' is attached, it means that the plan and
-      // expression are from Spark Connect, and need to be resolved in this way:
-      //    1, extract the attached plan id from the expression (UnresolvedAttribute only for now);
-      //    2, top-down traverse the query plan to find the plan node that matches the plan id;
-      //    3, if can not find the matching node, fail the analysis due to illegal references;
-      //    4, resolve the expression with the matching node, if any error occurs here, apply the
-      //    old code path;
-      resolveExpressionByPlanId(e, q)
-    } else {
-      e
-    }
-
     resolveExpression(
-      newE,
+      tryResolveColumnByPlanId(e, q),
       resolveColumnByName = nameParts => {
         q.resolveChildren(nameParts, conf.resolver)
       },
@@ -392,39 +379,46 @@ trait ColumnResolutionHelper extends Logging {
     }
   }
 
-  private def resolveExpressionByPlanId(
+  // If the TreeNodeTag 'LogicalPlan.PLAN_ID_TAG' is attached, it means that the plan and
+  // expression are from Spark Connect, and need to be resolved in this way:
+  //    1. extract the attached plan id from UnresolvedAttribute;
+  //    2. top-down traverse the query plan to find the plan node that matches the plan id;
+  //    3. if can not find the matching node, fail the analysis due to illegal references;
+  //    4. if more than one matching nodes are found, fail due to ambiguous column reference;
+  //    5. resolve the expression with the matching node, if any error occurs here, return the
+  //       original expression as it is.
+  private def tryResolveColumnByPlanId(
       e: Expression,
-      q: LogicalPlan): Expression = {
-    if (!e.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty)) {
-      return e
-    }
-
-    e match {
-      case u: UnresolvedAttribute =>
-        resolveUnresolvedAttributeByPlanId(u, q).getOrElse(u)
-      case _ =>
-        e.mapChildren(c => resolveExpressionByPlanId(c, q))
-    }
+      q: LogicalPlan,
+      idToPlan: mutable.HashMap[Long, LogicalPlan] = mutable.HashMap.empty): Expression = e match {
+    case u: UnresolvedAttribute =>
+      resolveUnresolvedAttributeByPlanId(
+        u, q, idToPlan: mutable.HashMap[Long, LogicalPlan]
+      ).getOrElse(u)
+    case _ if e.containsPattern(UNRESOLVED_ATTRIBUTE) =>
+      e.mapChildren(c => tryResolveColumnByPlanId(c, q, idToPlan))
+    case _ => e
   }
 
   private def resolveUnresolvedAttributeByPlanId(
       u: UnresolvedAttribute,
-      q: LogicalPlan): Option[NamedExpression] = {
+      q: LogicalPlan,
+      idToPlan: mutable.HashMap[Long, LogicalPlan]): Option[NamedExpression] = {
     val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG)
     if (planIdOpt.isEmpty) return None
     val planId = planIdOpt.get
     logDebug(s"Extract plan_id $planId from $u")
 
-    val planOpt = q.find(_.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(planId))
-    if (planOpt.isEmpty) {
-      // For example:
-      //  df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
-      //  df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
-      //  df1.select(df2.a)   <-   illegal reference df2.a
-      throw new AnalysisException(s"When resolving $u, " +
-        s"fail to find subplan with plan_id=$planId in $q")
-    }
-    val plan = planOpt.get
+    val plan = idToPlan.getOrElseUpdate(planId, {
+      findPlanById(u, planId, q).getOrElse {
+        // For example:
+        //  df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
+        //  df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
+        //  df1.select(df2.a)   <-   illegal reference df2.a
+        throw new AnalysisException(s"When resolving $u, " +
+          s"fail to find subplan with plan_id=$planId in $q")
+      }
+    })
 
     try {
       plan.resolve(u.nameParts, conf.resolver)
@@ -434,4 +428,28 @@ trait ColumnResolutionHelper extends Logging {
         None
     }
   }
+
+  private def findPlanById(
+      u: UnresolvedAttribute,
+      id: Long,
+      plan: LogicalPlan): Option[LogicalPlan] = {
+    if (plan.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(id)) {
+      Some(plan)
+    } else if (plan.children.length == 1) {
+      findPlanById(u, id, plan.children.head)
+    } else if (plan.children.length > 1) {
+      val matched = plan.children.flatMap(findPlanById(u, id, _))
+      if (matched.length > 1) {
+        throw new AnalysisException(
+          errorClass = "AMBIGUOUS_COLUMN_REFERENCE",
+          messageParameters = Map("name" -> toSQLId(u.nameParts)),
+          origin = u.origin
+        )
+      } else {
+        matched.headOption
+      }
+    } else {
+      None
+    }
+  }
 }

From 35d00618d92e855d7b0bd2551b48309d07f4d180 Mon Sep 17 00:00:00 2001
From: xieshuaihu <xieshuaihu@agora.io>
Date: Thu, 9 Nov 2023 15:56:40 +0800
Subject: [PATCH 096/521] [SPARK-45814][CONNECT][SQL] Make
 ArrowConverters.createEmptyArrowBatch call close() to avoid memory leak

### What changes were proposed in this pull request?

Make `ArrowBatchIterator` implement `AutoCloseable` and `ArrowConverters.createEmptyArrowBatch()` call close() to avoid memory leak.

### Why are the changes needed?

`ArrowConverters.createEmptyArrowBatch` don't call `super.hasNext`, if `TaskContext.get` returns `None`, then memory allocated in `ArrowBatchIterator` is leaked.

In spark connect, `createEmptyArrowBatch` is called in [SparkConnectPlanner](https://github.com/apache/spark/blob/master/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala#L2558) and [SparkConnectPlanExecution](https://github.com/apache/spark/blob/master/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala#L224), which cause a long running driver consume all off-heap memory specified by `-XX:MaxDirectMemorySize`.

This is the exception stack:
```
org.apache.arrow.memory.OutOfMemoryException: Failure allocating buffer.
	at io.netty.buffer.PooledByteBufAllocatorL.allocate(PooledByteBufAllocatorL.java:67)
	at org.apache.arrow.memory.NettyAllocationManager.<init>(NettyAllocationManager.java:77)
	at org.apache.arrow.memory.NettyAllocationManager.<init>(NettyAllocationManager.java:84)
	at org.apache.arrow.memory.NettyAllocationManager$1.create(NettyAllocationManager.java:34)
	at org.apache.arrow.memory.BaseAllocator.newAllocationManager(BaseAllocator.java:354)
	at org.apache.arrow.memory.BaseAllocator.newAllocationManager(BaseAllocator.java:349)
	at org.apache.arrow.memory.BaseAllocator.bufferWithoutReservation(BaseAllocator.java:337)
	at org.apache.arrow.memory.BaseAllocator.buffer(BaseAllocator.java:315)
	at org.apache.arrow.memory.BaseAllocator.buffer(BaseAllocator.java:279)
	at org.apache.arrow.vector.BaseValueVector.allocFixedDataAndValidityBufs(BaseValueVector.java:192)
	at org.apache.arrow.vector.BaseFixedWidthVector.allocateBytes(BaseFixedWidthVector.java:338)
	at org.apache.arrow.vector.BaseFixedWidthVector.allocateNew(BaseFixedWidthVector.java:308)
	at org.apache.arrow.vector.BaseFixedWidthVector.allocateNew(BaseFixedWidthVector.java:273)
	at org.apache.spark.sql.execution.arrow.ArrowWriter$.$anonfun$create$1(ArrowWriter.scala:44)
	at scala.collection.StrictOptimizedIterableOps.map(StrictOptimizedIterableOps.scala:100)
	at scala.collection.StrictOptimizedIterableOps.map$(StrictOptimizedIterableOps.scala:87)
	at scala.collection.convert.JavaCollectionWrappers$JListWrapper.map(JavaCollectionWrappers.scala:103)
	at org.apache.spark.sql.execution.arrow.ArrowWriter$.create(ArrowWriter.scala:43)
	at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchIterator.<init>(ArrowConverters.scala:93)
	at org.apache.spark.sql.execution.arrow.ArrowConverters$ArrowBatchWithSchemaIterator.<init>(ArrowConverters.scala:138)
	at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.<init>(ArrowConverters.scala:231)
	at org.apache.spark.sql.execution.arrow.ArrowConverters$.createEmptyArrowBatch(ArrowConverters.scala:229)
	at org.apache.spark.sql.connect.planner.SparkConnectPlanner.handleSqlCommand(SparkConnectPlanner.scala:2481)
	at org.apache.spark.sql.connect.planner.SparkConnectPlanner.process(SparkConnectPlanner.scala:2426)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.handleCommand(ExecuteThreadRunner.scala:202)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1(ExecuteThreadRunner.scala:158)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.$anonfun$executeInternal$1$adapted(ExecuteThreadRunner.scala:132)
	at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$2(SessionHolder.scala:189)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900)
	at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withSession$1(SessionHolder.scala:189)
	at org.apache.spark.JobArtifactSet$.withActiveJobArtifactState(JobArtifactSet.scala:94)
	at org.apache.spark.sql.connect.service.SessionHolder.$anonfun$withContextClassLoader$1(SessionHolder.scala:176)
	at org.apache.spark.util.Utils$.withContextClassLoader(Utils.scala:178)
	at org.apache.spark.sql.connect.service.SessionHolder.withContextClassLoader(SessionHolder.scala:175)
	at org.apache.spark.sql.connect.service.SessionHolder.withSession(SessionHolder.scala:188)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.executeInternal(ExecuteThreadRunner.scala:132)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner.org$apache$spark$sql$connect$execution$ExecuteThreadRunner$$execute(ExecuteThreadRunner.scala:84)
	at org.apache.spark.sql.connect.execution.ExecuteThreadRunner$ExecutionThread.run(ExecuteThreadRunner.scala:228)
Caused by: io.netty.util.internal.OutOfDirectMemoryError: failed to allocate 4194304 byte(s) of direct memory (used: 1069547799, max: 1073741824)
	at io.netty.util.internal.PlatformDependent.incrementMemoryCounter(PlatformDependent.java:845)
	at io.netty.util.internal.PlatformDependent.allocateDirectNoCleaner(PlatformDependent.java:774)
	at io.netty.buffer.PoolArena$DirectArena.allocateDirect(PoolArena.java:721)
	at io.netty.buffer.PoolArena$DirectArena.newChunk(PoolArena.java:696)
	at io.netty.buffer.PoolArena.allocateNormal(PoolArena.java:215)
	at io.netty.buffer.PoolArena.tcacheAllocateSmall(PoolArena.java:180)
	at io.netty.buffer.PoolArena.allocate(PoolArena.java:137)
	at io.netty.buffer.PoolArena.allocate(PoolArena.java:129)
	at io.netty.buffer.PooledByteBufAllocatorL$InnerAllocator.newDirectBufferL(PooledByteBufAllocatorL.java:181)
	at io.netty.buffer.PooledByteBufAllocatorL$InnerAllocator.directBuffer(PooledByteBufAllocatorL.java:214)
	at io.netty.buffer.PooledByteBufAllocatorL.allocate(PooledByteBufAllocatorL.java:58)
	... 37 more
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43691 from xieshuaihu/spark-45814.

Authored-by: xieshuaihu <xieshuaihu@agora.io>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit c128f811820e5a31ddd5bd1c95ed8dd49017eaea)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../sql/execution/arrow/ArrowConverters.scala | 25 +++++++++++++------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 86dd7984b5859..a843582e9c2c5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -80,7 +80,7 @@ private[sql] object ArrowConverters extends Logging {
       maxRecordsPerBatch: Long,
       timeZoneId: String,
       errorOnDuplicatedFieldNames: Boolean,
-      context: TaskContext) extends Iterator[Array[Byte]] {
+      context: TaskContext) extends Iterator[Array[Byte]] with AutoCloseable {
 
     protected val arrowSchema =
       ArrowUtils.toArrowSchema(schema, timeZoneId, errorOnDuplicatedFieldNames)
@@ -93,13 +93,11 @@ private[sql] object ArrowConverters extends Logging {
     protected val arrowWriter = ArrowWriter.create(root)
 
     Option(context).foreach {_.addTaskCompletionListener[Unit] { _ =>
-      root.close()
-      allocator.close()
+      close()
     }}
 
     override def hasNext: Boolean = rowIter.hasNext || {
-      root.close()
-      allocator.close()
+      close()
       false
     }
 
@@ -124,6 +122,11 @@ private[sql] object ArrowConverters extends Logging {
 
       out.toByteArray
     }
+
+    override def close(): Unit = {
+        root.close()
+        allocator.close()
+    }
   }
 
   private[sql] class ArrowBatchWithSchemaIterator(
@@ -226,11 +229,19 @@ private[sql] object ArrowConverters extends Logging {
       schema: StructType,
       timeZoneId: String,
       errorOnDuplicatedFieldNames: Boolean): Array[Byte] = {
-    new ArrowBatchWithSchemaIterator(
+    val batches = new ArrowBatchWithSchemaIterator(
         Iterator.empty, schema, 0L, 0L,
         timeZoneId, errorOnDuplicatedFieldNames, TaskContext.get) {
       override def hasNext: Boolean = true
-    }.next()
+    }
+    Utils.tryWithSafeFinally {
+      batches.next()
+    } {
+      // If taskContext is null, `batches.close()` should be called to avoid memory leak.
+      if (TaskContext.get() == null) {
+        batches.close()
+      }
+    }
   }
 
   /**

From fbc150fbbb702f18ca12c6e6dec3fe01dbe76612 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 9 Nov 2023 16:23:38 +0800
Subject: [PATCH 097/521] [SPARK-45847][SQL][TESTS] CliSuite flakiness due to
 non-sequential guarantee for stdout&stderr

### What changes were proposed in this pull request?

In CliSuite, This PR adds a retry for tests that write errors to STDERR.

### Why are the changes needed?

To fix flakiness tests as below
https://github.com/chenhao-db/apache-spark/actions/runs/6791437199/job/18463313766
https://github.com/dongjoon-hyun/spark/actions/runs/6753670527/job/18361206900

```sql
[info]   Spark master: local, Application Id: local-1699402393189
[info]   spark-sql> /* SELECT /*+ HINT() 4; */;
[info]
[info]   [PARSE_SYNTAX_ERROR] Syntax error at or near ';'. SQLSTATE: 42601 (line 1, pos 26)
[info]
[info]   == SQL ==
[info]   /* SELECT /*+ HINT() 4; */;
[info]   --------------------------^^^
[info]
[info]   spark-sql> /* SELECT /*+ HINT() 4; */ SELECT 1;
[info]   1
[info]   Time taken: 1.499 seconds, Fetched 1 row(s)
[info]
[info]   [UNCLOSED_BRACKETED_COMMENT] Found an unclosed bracketed comment. Please, append */ at the end of the comment. SQLSTATE: 42601
[info]   == SQL ==
[info]   /* Here is a unclosed bracketed comment SELECT 1;
[info]   spark-sql> /* Here is a unclosed bracketed comment SELECT 1;
[info]   spark-sql> /* SELECT /*+ HINT() */ 4; */;
[info]   spark-sql>
```

As you can see the fragment above, the query on the 3rd line from the bottom, came from STDOUT, was printed later than its error output, came from STDERR.

In this scenario, the error output would not match anything and would simply go unnoticed. Finally, timed out and failed.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests and CI

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43725 from yaooqinn/SPARK-45847.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 06d8cbe073499ff16bca3165e2de1192daad3984)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/hive/thriftserver/CliSuite.scala      | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 8ba9ea28a5a96..343b32e6227c2 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -383,7 +383,7 @@ class CliSuite extends SparkFunSuite {
     )
   }
 
-  test("SPARK-11188 Analysis error reporting") {
+  testRetry("SPARK-11188 Analysis error reporting") {
     runCliWithin(timeout = 2.minute,
       errorResponses = Seq("AnalysisException"))(
       "select * from nonexistent_table;" -> "nonexistent_table"
@@ -551,7 +551,7 @@ class CliSuite extends SparkFunSuite {
     )
   }
 
-  test("SparkException with root cause will be printStacktrace") {
+  testRetry("SparkException with root cause will be printStacktrace") {
     // If it is not in silent mode, will print the stacktrace
     runCliWithin(
       1.minute,
@@ -575,8 +575,8 @@ class CliSuite extends SparkFunSuite {
     runCliWithin(1.minute)("SELECT MAKE_DATE(-44, 3, 15);" -> "-0044-03-15")
   }
 
-  test("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
-    runCliWithin(4.minute)(
+  testRetry("SPARK-33100: Ignore a semicolon inside a bracketed comment in spark-sql") {
+    runCliWithin(1.minute)(
       "/* SELECT 'test';*/ SELECT 'test';" -> "test",
       ";;/* SELECT 'test';*/ SELECT 'test';" -> "test",
       "/* SELECT 'test';*/;; SELECT 'test';" -> "test",
@@ -623,8 +623,8 @@ class CliSuite extends SparkFunSuite {
     )
   }
 
-  test("SPARK-37555: spark-sql should pass last unclosed comment to backend") {
-    runCliWithin(5.minute)(
+  testRetry("SPARK-37555: spark-sql should pass last unclosed comment to backend") {
+    runCliWithin(1.minute)(
       // Only unclosed comment.
       "/* SELECT /*+ HINT() 4; */;".stripMargin -> "Syntax error at or near ';'",
       // Unclosed nested bracketed comment.
@@ -637,7 +637,7 @@ class CliSuite extends SparkFunSuite {
     )
   }
 
-  test("SPARK-37694: delete [jar|file|archive] shall use spark sql processor") {
+  testRetry("SPARK-37694: delete [jar|file|archive] shall use spark sql processor") {
     runCliWithin(2.minute, errorResponses = Seq("ParseException"))(
       "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'.(line 1, pos 7)")
   }
@@ -678,7 +678,7 @@ class CliSuite extends SparkFunSuite {
     SparkSQLEnv.stop()
   }
 
-  test("SPARK-39068: support in-memory catalog and running concurrently") {
+  testRetry("SPARK-39068: support in-memory catalog and running concurrently") {
     val extraConf = Seq("-c", s"${StaticSQLConf.CATALOG_IMPLEMENTATION.key}=in-memory")
     val cd = new CountDownLatch(2)
     def t: Thread = new Thread {
@@ -698,7 +698,7 @@ class CliSuite extends SparkFunSuite {
   }
 
   // scalastyle:off line.size.limit
-  test("formats of error messages") {
+  testRetry("formats of error messages") {
     def check(format: ErrorMessageFormat.Value, errorMessage: String, silent: Boolean): Unit = {
       val expected = errorMessage.split(System.lineSeparator()).map("" -> _)
       runCliWithin(
@@ -810,7 +810,6 @@ class CliSuite extends SparkFunSuite {
       s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
     val catalogDriver =
       s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
-    val database = s"-database $catalogName.SYS"
     val catalogConfigs =
       Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory")
         .flatMap(Seq("--conf", _))

From 0b68e1700f60ad1a32f066c10a0f76bea893b7ce Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 10 Nov 2023 21:09:43 +0800
Subject: [PATCH 098/521] [SPARK-45878][SQL][TESTS] Fix
 ConcurrentModificationException in CliSuite

### What changes were proposed in this pull request?

This PR changes the ArrayBuffer for logs to immutable for reading to prevent ConcurrentModificationException which hides the actual cause of failure

### Why are the changes needed?

```scala
[info] - SPARK-29022 Commands using SerDe provided in ADD JAR sql *** FAILED *** (11 seconds, 105 milliseconds)
[info]   java.util.ConcurrentModificationException: mutation occurred during iteration
[info]   at scala.collection.mutable.MutationTracker$.checkMutations(MutationTracker.scala:43)
[info]   at scala.collection.mutable.CheckedIndexedSeqView$CheckedIterator.hasNext(CheckedIndexedSeqView.scala:47)
[info]   at scala.collection.IterableOnceOps.addString(IterableOnce.scala:1247)
[info]   at scala.collection.IterableOnceOps.addString$(IterableOnce.scala:1241)
[info]   at scala.collection.AbstractIterable.addString(Iterable.scala:933)
[info]   at org.apache.spark.sql.hive.thriftserver.CliSuite.runCliWithin(CliSuite.scala:205)
[info]   at org.apache.spark.sql.hive.thriftserver.CliSuite.$anonfun$new$20(CliSuite.scala:501)
```

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43749 from yaooqinn/SPARK-45878.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit b347237735094e9092f4100583ed1d6f3eacf1f6)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/sql/hive/thriftserver/CliSuite.scala      | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 343b32e6227c2..38dcd1d8b00af 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -193,7 +193,7 @@ class CliSuite extends SparkFunSuite {
       ThreadUtils.awaitResult(foundAllExpectedAnswers.future, timeoutForQuery)
       log.info("Found all expected output.")
     } catch { case cause: Throwable =>
-      val message =
+      val message = lock.synchronized {
         s"""
            |=======================
            |CliSuite failure output
@@ -207,6 +207,7 @@ class CliSuite extends SparkFunSuite {
            |End CliSuite failure output
            |===========================
          """.stripMargin
+      }
       logError(message, cause)
       fail(message, cause)
     } finally {

From 68b531dd2b485fa2203d6a2bd2de90afc97a13bb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 10 Nov 2023 07:50:17 -0800
Subject: [PATCH 099/521] [SPARK-45883][BUILD] Upgrade ORC to 1.9.2

### What changes were proposed in this pull request?

This PR aims to upgrade ORC to 1.9.2 for Apache Spark 4.0.0 and 3.5.1.

### Why are the changes needed?

To bring the latest bug fixes.
- https://github.com/apache/orc/releases/tag/v1.9.2

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43754 from dongjoon-hyun/SPARK-45883.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 917947e62e1e67f49a83c1ffb0833b61f0c48eb6)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++---
 pom.xml                               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 1d02f8dba567e..9ab51dfa011a2 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.1/shaded-protobuf/orc-core-1.9.1-shaded-protobuf.jar
-orc-mapreduce/1.9.1/shaded-protobuf/orc-mapreduce-1.9.1-shaded-protobuf.jar
-orc-shims/1.9.1//orc-shims-1.9.1.jar
+orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar
+orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar
+orc-shims/1.9.2//orc-shims-1.9.2.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index be8400c33bf2b..14e0ab3e0f620 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,7 +141,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.13.1</parquet.version>
-    <orc.version>1.9.1</orc.version>
+    <orc.version>1.9.2</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>9.4.52.v20230823</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>

From 19d225bf3f56d392ebb4e7727bd30109b1b75bf5 Mon Sep 17 00:00:00 2001
From: "longfei.jiang" <longfei.jiang@kyligence.io>
Date: Sat, 11 Nov 2023 13:49:18 +0800
Subject: [PATCH 100/521] [MINOR][DOCS] Fix the example value in the docs

### What changes were proposed in this pull request?

fix the example value

### Why are the changes needed?

for doc

### Does this PR introduce _any_ user-facing change?

Yes

### How was this patch tested?

Just example value in the docs, no need to test.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43750 from jlfsdtc/fix_typo_in_doc.

Authored-by: longfei.jiang <longfei.jiang@kyligence.io>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit b501a223bfcf4ddbcb0b2447aa06c549051630b0)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/sql-ref-datetime-pattern.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md
index 5e28a18acefa4..e5d5388f262e4 100644
--- a/docs/sql-ref-datetime-pattern.md
+++ b/docs/sql-ref-datetime-pattern.md
@@ -41,7 +41,7 @@ Spark uses pattern letters in the following table for date and timestamp parsing
 |**a**|am-pm-of-day|am-pm|PM|
 |**h**|clock-hour-of-am-pm (1-12)|number(2)|12|
 |**K**|hour-of-am-pm (0-11)|number(2)|0|
-|**k**|clock-hour-of-day (1-24)|number(2)|0|
+|**k**|clock-hour-of-day (1-24)|number(2)|1|
 |**H**|hour-of-day (0-23)|number(2)|0|
 |**m**|minute-of-hour|number(2)|30|
 |**s**|second-of-minute|number(2)|55|

From 5c7a55f331b7a41f37b55aff3c5fb29af7916d06 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Sun, 12 Nov 2023 14:34:32 -0800
Subject: [PATCH 101/521] [SPARK-45896][SQL] Construct `ValidateExternalType`
 with the correct expected type

### What changes were proposed in this pull request?

When creating a serializer for a `Map` or `Seq` with an element of type `Option`, pass an expected type of `Option`  to `ValidateExternalType` rather than the `Option`'s type argument.

### Why are the changes needed?

In 3.4.1, 3.5.0, and master, the following code gets an error:
```
scala> val df = Seq(Seq(Some(Seq(0)))).toDF("a")
val df = Seq(Seq(Some(Seq(0)))).toDF("a")
org.apache.spark.SparkRuntimeException: [EXPRESSION_ENCODING_FAILED] Failed to encode a value of the expressions: mapobjects(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -1), mapobjects(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -2), assertnotnull(validateexternaltype(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -2), IntegerType, IntegerType)), unwrapoption(ObjectType(interface scala.collection.immutable.Seq), validateexternaltype(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -1), ArrayType(IntegerType,false), ObjectType(class scala.Option))), None), input[0, scala.collection.immutable.Seq, true], None) AS value#0 to a row. SQLSTATE: 42846
...
Caused by: java.lang.RuntimeException: scala.Some is not a valid external type for schema of array<int>
  at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.MapObjects_0$(Unknown Source)
...

```
However, this code works in 3.3.3.

Similarly, this code gets an error:
```
scala> val df = Seq(Seq(Some(java.sql.Timestamp.valueOf("2023-01-01 00:00:00")))).toDF("a")
val df = Seq(Seq(Some(java.sql.Timestamp.valueOf("2023-01-01 00:00:00")))).toDF("a")
org.apache.spark.SparkRuntimeException: [EXPRESSION_ENCODING_FAILED] Failed to encode a value of the expressions: mapobjects(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -1), staticinvoke(class org.apache.spark.sql.catalyst.util.DateTimeUtils$, TimestampType, fromJavaTimestamp, unwrapoption(ObjectType(class java.sql.Timestamp), validateexternaltype(lambdavariable(MapObject, ObjectType(class java.lang.Object), true, -1), TimestampType, ObjectType(class scala.Option))), true, false, true), input[0, scala.collection.immutable.Seq, true], None) AS value#0 to a row. SQLSTATE: 42846
...
Caused by: java.lang.RuntimeException: scala.Some is not a valid external type for schema of timestamp
...
```
As with the first example, this code works in 3.3.3.

`SerializerBuildHelper#validateAndSerializeElement` will construct `ValidateExternalType` with an expected type of the `Option`'s type parameter. Therefore, for element types `Option[Seq/Date/Timestamp/BigDecimal]`, `ValidateExternalType` will try to validate that the element is of the contained type (e.g., `BigDecimal`) rather than of type `Option`. Since the element type is of type `Option`, the validation fails.

Validation currently works by accident for element types `Option[Map/<primitive-type]`, simply because in that case `ValidateExternalType` ignores that passed expected type and tries to validate based on the encoder's `clsTag` field (which, for the `OptionEncoder`, will be class `Option`).

### Does this PR introduce _any_ user-facing change?

Other than fixing the bug, no.

### How was this patch tested?

New unit tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43770 from bersprockets/encoding_error.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit e440f3245243a31e7bdfe945e1ce7194609b78fb)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/SerializerBuildHelper.scala   |  7 ++++++-
 .../catalyst/encoders/ExpressionEncoderSuite.scala   | 12 ++++++++++++
 .../scala/org/apache/spark/sql/DatasetSuite.scala    |  9 +++++++++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 27090ff6fa5d6..cd087514f4be3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -450,10 +450,15 @@ object SerializerBuildHelper {
   private def validateAndSerializeElement(
       enc: AgnosticEncoder[_],
       nullable: Boolean): Expression => Expression = { input =>
+    val expected = enc match {
+      case OptionEncoder(_) => lenientExternalDataTypeFor(enc)
+      case _ => enc.dataType
+    }
+
     expressionWithNullSafety(
       createSerializer(
         enc,
-        ValidateExternalType(input, enc.dataType, lenientExternalDataTypeFor(enc))),
+        ValidateExternalType(input, expected, lenientExternalDataTypeFor(enc))),
       nullable,
       WalkedTypePath())
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 9d2051b01d62e..724a91806c7e0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -477,6 +477,18 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   encodeDecodeTest(Option.empty[Int], "empty option of int")
   encodeDecodeTest(Option("abc"), "option of string")
   encodeDecodeTest(Option.empty[String], "empty option of string")
+  encodeDecodeTest(Seq(Some(Seq(0))), "SPARK-45896: seq of option of seq")
+  encodeDecodeTest(Map(0 -> Some(Seq(0))), "SPARK-45896: map of option of seq")
+  encodeDecodeTest(Seq(Some(Timestamp.valueOf("2023-01-01 00:00:00"))),
+    "SPARK-45896: seq of option of timestamp")
+  encodeDecodeTest(Map(0 -> Some(Timestamp.valueOf("2023-01-01 00:00:00"))),
+    "SPARK-45896: map of option of timestamp")
+  encodeDecodeTest(Seq(Some(Date.valueOf("2023-01-01"))),
+    "SPARK-45896: seq of option of date")
+  encodeDecodeTest(Map(0 -> Some(Date.valueOf("2023-01-01"))),
+    "SPARK-45896: map of option of date")
+  encodeDecodeTest(Seq(Some(BigDecimal(200))), "SPARK-45896: seq of option of bigdecimal")
+  encodeDecodeTest(Map(0 -> Some(BigDecimal(200))), "SPARK-45896: map of option of bigdecimal")
 
   encodeDecodeTest(ScroogeLikeExample(1),
     "SPARK-40385 class with only a companion object constructor")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 207c66dc4d43b..0878ae134e9d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -270,6 +270,13 @@ class DatasetSuite extends QueryTest
       (ClassData("one", 2), 1L), (ClassData("two", 3), 1L))
   }
 
+  test("SPARK-45896: seq of option of seq") {
+    val ds = Seq(DataSeqOptSeq(Seq(Some(Seq(0))))).toDS()
+    checkDataset(
+      ds,
+      DataSeqOptSeq(Seq(Some(List(0)))))
+  }
+
   test("select") {
     val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
     checkDataset(
@@ -2629,6 +2636,8 @@ case class ClassNullableData(a: String, b: Integer)
 case class NestedStruct(f: ClassData)
 case class DeepNestedStruct(f: NestedStruct)
 
+case class DataSeqOptSeq(a: Seq[Option[Seq[Int]]])
+
 /**
  * A class used to test serialization using encoders. This class throws exceptions when using
  * Java serialization -- so the only way it can be "serialized" is through our encoders.

From 6313e6cc5090036eacb9c234584705d4b398c39e Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Mon, 13 Nov 2023 19:31:22 +0800
Subject: [PATCH 102/521] [SPARK-45906][YARN] Fix error message extraction from
 ResourceNotFoundException

### What changes were proposed in this pull request?

This PR aims to fix the error message extraction from `ResourceNotFoundException`, the current wrong implementation also has a potential NPE issue.

### Why are the changes needed?

This bug is introduced in SPARK-43202, previously, `e.getCause()` is used to unwrap `InvocationTargetException`, after replacing reflection invocation with direct API calling, we should not apply `getCause()`.

### Does this PR introduce _any_ user-facing change?

Yes, bug fix.

### How was this patch tested?

Review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43782 from pan3793/SPARK-45906.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit c29b127dcdd99b0038e96b90177b44b828b32c4b)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/deploy/yarn/ResourceRequestHelper.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
index 0dd4e0a6c8ad9..f9aa11c4d48d6 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ResourceRequestHelper.scala
@@ -168,7 +168,7 @@ private object ResourceRequestHelper extends Logging {
           if (numResourceErrors < 2) {
             logWarning(s"YARN doesn't know about resource $name, your resource discovery " +
               s"has to handle properly discovering and isolating the resource! Error: " +
-              s"${e.getCause.getMessage}")
+              s"${e.getMessage}")
             numResourceErrors += 1
           }
       }

From d9f0c44e7f24cba95f7bf1737bb52ff73a7b9094 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Tue, 14 Nov 2023 12:09:37 +0800
Subject: [PATCH 103/521] [SPARK-45770][SQL][PYTHON][CONNECT][3.5] Introduce
 plan DataFrameDropColumns for Dataframe.drop

### What changes were proposed in this pull request?
backport https://github.com/apache/spark/pull/43683 to 3.5

### Why are the changes needed?
to fix a connect bug

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #43776 from zhengruifeng/sql_drop_plan_35.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 python/pyspark/sql/tests/test_dataframe.py    | 37 ++++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      |  1 +
 .../ResolveDataFrameDropColumns.scala         | 49 +++++++++++++++++++
 .../plans/logical/basicLogicalOperators.scala | 14 ++++++
 .../sql/catalyst/trees/TreePatterns.scala     |  1 +
 .../scala/org/apache/spark/sql/Dataset.scala  | 15 +-----
 6 files changed, 104 insertions(+), 13 deletions(-)
 create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala

diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 33049233dee98..5907c8c09fb46 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -106,6 +106,43 @@ def test_drop(self):
         self.assertEqual(df.drop(col("name"), col("age")).columns, ["active"])
         self.assertEqual(df.drop(col("name"), col("age"), col("random")).columns, ["active"])
 
+    def test_drop_join(self):
+        left_df = self.spark.createDataFrame(
+            [(1, "a"), (2, "b"), (3, "c")],
+            ["join_key", "value1"],
+        )
+        right_df = self.spark.createDataFrame(
+            [(1, "aa"), (2, "bb"), (4, "dd")],
+            ["join_key", "value2"],
+        )
+        joined_df = left_df.join(
+            right_df,
+            on=left_df["join_key"] == right_df["join_key"],
+            how="left",
+        )
+
+        dropped_1 = joined_df.drop(left_df["join_key"])
+        self.assertEqual(dropped_1.columns, ["value1", "join_key", "value2"])
+        self.assertEqual(
+            dropped_1.sort("value1").collect(),
+            [
+                Row(value1="a", join_key=1, value2="aa"),
+                Row(value1="b", join_key=2, value2="bb"),
+                Row(value1="c", join_key=None, value2=None),
+            ],
+        )
+
+        dropped_2 = joined_df.drop(right_df["join_key"])
+        self.assertEqual(dropped_2.columns, ["join_key", "value1", "value2"])
+        self.assertEqual(
+            dropped_2.sort("value1").collect(),
+            [
+                Row(join_key=1, value1="a", value2="aa"),
+                Row(join_key=2, value1="b", value2="bb"),
+                Row(join_key=3, value1="c", value2=None),
+            ],
+        )
+
     def test_with_columns_renamed(self):
         df = self.spark.createDataFrame([("Alice", 50), ("Alice", 60)], ["name", "age"])
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8e3c9b30c61bf..80cb5d8c60876 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -307,6 +307,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveWindowFrame ::
       ResolveNaturalAndUsingJoin ::
       ResolveOutputRelation ::
+      new ResolveDataFrameDropColumns(catalogManager) ::
       ExtractWindowExpressions ::
       GlobalAggregates ::
       ResolveAggregateFunctions ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
new file mode 100644
index 0000000000000..2642b4a1c5daa
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.plans.logical.{DataFrameDropColumns, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.DF_DROP_COLUMNS
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
+/**
+ * A rule that rewrites DataFrameDropColumns to Project.
+ * Note that DataFrameDropColumns allows and ignores non-existing columns.
+ */
+class ResolveDataFrameDropColumns(val catalogManager: CatalogManager)
+  extends Rule[LogicalPlan] with ColumnResolutionHelper  {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
+    _.containsPattern(DF_DROP_COLUMNS)) {
+    case d: DataFrameDropColumns if d.childrenResolved =>
+      // expressions in dropList can be unresolved, e.g.
+      //   df.drop(col("non-existing-column"))
+      val dropped = d.dropList.map {
+        case u: UnresolvedAttribute =>
+          resolveExpressionByPlanChildren(u, d.child)
+        case e => e
+      }
+      val remaining = d.child.output.filterNot(attr => dropped.exists(_.semanticEquals(attr)))
+      if (remaining.size == d.child.output.size) {
+        d.child
+      } else {
+        Project(remaining, d.child)
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 96b67fc52e0d7..0e460706fc5b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -235,6 +235,20 @@ object Project {
   }
 }
 
+case class DataFrameDropColumns(dropList: Seq[Expression], child: LogicalPlan) extends UnaryNode {
+  override def output: Seq[Attribute] = Nil
+
+  override def maxRows: Option[Long] = child.maxRows
+  override def maxRowsPerPartition: Option[Long] = child.maxRowsPerPartition
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(DF_DROP_COLUMNS)
+
+  override lazy val resolved: Boolean = false
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): DataFrameDropColumns =
+    copy(child = newChild)
+}
+
 /**
  * Applies a [[Generator]] to a stream of input rows, combining the
  * output of each into a new stream of rows.  This operation is similar to a `flatMap` in functional
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index b806ebbed52d0..bf7b2db1719f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -105,6 +105,7 @@ object TreePattern extends Enumeration  {
   val AS_OF_JOIN: Value = Value
   val COMMAND: Value = Value
   val CTE: Value = Value
+  val DF_DROP_COLUMNS: Value = Value
   val DISTINCT_LIKE: Value = Value
   val EVAL_PYTHON_UDF: Value = Value
   val EVAL_PYTHON_UDTF: Value = Value
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index e047b927b9057..f53c6ddaa3880 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -3013,19 +3013,8 @@ class Dataset[T] private[sql](
    * @since 3.4.0
    */
   @scala.annotation.varargs
-  def drop(col: Column, cols: Column*): DataFrame = {
-    val allColumns = col +: cols
-    val expressions = (for (col <- allColumns) yield col match {
-      case Column(u: UnresolvedAttribute) =>
-        queryExecution.analyzed.resolveQuoted(
-          u.name, sparkSession.sessionState.analyzer.resolver).getOrElse(u)
-      case Column(expr: Expression) => expr
-    })
-    val attrs = this.logicalPlan.output
-    val colsAfterDrop = attrs.filter { attr =>
-      expressions.forall(expression => !attr.semanticEquals(expression))
-    }.map(attr => Column(attr))
-    select(colsAfterDrop : _*)
+  def drop(col: Column, cols: Column*): DataFrame = withPlan {
+    DataFrameDropColumns((col +: cols).map(_.expr), logicalPlan)
   }
 
   /**

From 556caeace66bc12ae12ae304ade21fc24c437af9 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 14 Nov 2023 19:53:50 +0800
Subject: [PATCH 104/521] [SPARK-45882][SQL][3.5] BroadcastHashJoinExec
 propagate partitioning should respect CoalescedHashPartitioning

This pr backport https://github.com/apache/spark/pull/43753 to branch-3.5

### What changes were proposed in this pull request?

Add HashPartitioningLike trait and make HashPartitioning and CoalescedHashPartitioning extend it. When we propagate output partiitoning, we should handle HashPartitioningLike instead of HashPartitioning. This pr also changes the BroadcastHashJoinExec to use HashPartitioningLike to avoid regression.

### Why are the changes needed?

Avoid unnecessary shuffle exchange.

### Does this PR introduce _any_ user-facing change?

yes, avoid regression

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43792 from ulysses-you/partitioning-3.5.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../plans/physical/partitioning.scala         | 46 +++++++++----------
 .../joins/BroadcastHashJoinExec.scala         | 11 +++--
 .../org/apache/spark/sql/JoinSuite.scala      | 28 ++++++++++-
 3 files changed, 54 insertions(+), 31 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 1eefe65859bdd..211b5a05eb70c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -258,18 +258,8 @@ case object SinglePartition extends Partitioning {
     SinglePartitionShuffleSpec
 }
 
-/**
- * Represents a partitioning where rows are split up across partitions based on the hash
- * of `expressions`.  All rows where `expressions` evaluate to the same values are guaranteed to be
- * in the same partition.
- *
- * Since [[StatefulOpClusteredDistribution]] relies on this partitioning and Spark requires
- * stateful operators to retain the same physical partitioning during the lifetime of the query
- * (including restart), the result of evaluation on `partitionIdExpression` must be unchanged
- * across Spark versions. Violation of this requirement may bring silent correctness issue.
- */
-case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
-  extends Expression with Partitioning with Unevaluable {
+trait HashPartitioningLike extends Expression with Partitioning with Unevaluable {
+  def expressions: Seq[Expression]
 
   override def children: Seq[Expression] = expressions
   override def nullable: Boolean = false
@@ -294,6 +284,20 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
       }
     }
   }
+}
+
+/**
+ * Represents a partitioning where rows are split up across partitions based on the hash
+ * of `expressions`.  All rows where `expressions` evaluate to the same values are guaranteed to be
+ * in the same partition.
+ *
+ * Since [[StatefulOpClusteredDistribution]] relies on this partitioning and Spark requires
+ * stateful operators to retain the same physical partitioning during the lifetime of the query
+ * (including restart), the result of evaluation on `partitionIdExpression` must be unchanged
+ * across Spark versions. Violation of this requirement may bring silent correctness issue.
+ */
+case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
+  extends HashPartitioningLike {
 
   override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec =
     HashShuffleSpec(this, distribution)
@@ -306,7 +310,6 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
 
   override protected def withNewChildrenInternal(
     newChildren: IndexedSeq[Expression]): HashPartitioning = copy(expressions = newChildren)
-
 }
 
 case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int)
@@ -316,25 +319,18 @@ case class CoalescedBoundary(startReducerIndex: Int, endReducerIndex: Int)
  * fewer number of partitions.
  */
 case class CoalescedHashPartitioning(from: HashPartitioning, partitions: Seq[CoalescedBoundary])
-  extends Expression with Partitioning with Unevaluable {
-
-  override def children: Seq[Expression] = from.expressions
-  override def nullable: Boolean = from.nullable
-  override def dataType: DataType = from.dataType
+  extends HashPartitioningLike {
 
-  override def satisfies0(required: Distribution): Boolean = from.satisfies0(required)
+  override def expressions: Seq[Expression] = from.expressions
 
   override def createShuffleSpec(distribution: ClusteredDistribution): ShuffleSpec =
     CoalescedHashShuffleSpec(from.createShuffleSpec(distribution), partitions)
 
-  override protected def withNewChildrenInternal(
-    newChildren: IndexedSeq[Expression]): CoalescedHashPartitioning =
-      copy(from = from.copy(expressions = newChildren))
-
   override val numPartitions: Int = partitions.length
 
-  override def toString: String = from.toString
-  override def sql: String = from.sql
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): CoalescedHashPartitioning =
+    copy(from = from.copy(expressions = newChildren))
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index 9f9f874314639..b82cee2c0fbe7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Distribution, HashPartitioning, Partitioning, PartitioningCollection, UnspecifiedDistribution}
+import org.apache.spark.sql.catalyst.plans.physical.{BroadcastDistribution, Distribution, HashPartitioningLike, Partitioning, PartitioningCollection, UnspecifiedDistribution}
 import org.apache.spark.sql.execution.{CodegenSupport, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 
@@ -73,7 +73,7 @@ case class BroadcastHashJoinExec(
     joinType match {
       case _: InnerLike if conf.broadcastHashJoinOutputPartitioningExpandLimit > 0 =>
         streamedPlan.outputPartitioning match {
-          case h: HashPartitioning => expandOutputPartitioning(h)
+          case h: HashPartitioningLike => expandOutputPartitioning(h)
           case c: PartitioningCollection => expandOutputPartitioning(c)
           case other => other
         }
@@ -99,7 +99,7 @@ case class BroadcastHashJoinExec(
   private def expandOutputPartitioning(
       partitioning: PartitioningCollection): PartitioningCollection = {
     PartitioningCollection(partitioning.partitionings.flatMap {
-      case h: HashPartitioning => expandOutputPartitioning(h).partitionings
+      case h: HashPartitioningLike => expandOutputPartitioning(h).partitionings
       case c: PartitioningCollection => Seq(expandOutputPartitioning(c))
       case other => Seq(other)
     })
@@ -111,11 +111,12 @@ case class BroadcastHashJoinExec(
   // the expanded partitioning will have the following expressions:
   // Seq("a", "b", "c"), Seq("a", "b", "y"), Seq("a", "x", "c"), Seq("a", "x", "y").
   // The expanded expressions are returned as PartitioningCollection.
-  private def expandOutputPartitioning(partitioning: HashPartitioning): PartitioningCollection = {
+  private def expandOutputPartitioning(
+      partitioning: HashPartitioningLike): PartitioningCollection = {
     PartitioningCollection(partitioning.multiTransformDown {
       case e: Expression if streamedKeyToBuildKeyMapping.contains(e.canonicalized) =>
         e +: streamedKeyToBuildKeyMapping(e.canonicalized)
-    }.asInstanceOf[Stream[HashPartitioning]]
+    }.asInstanceOf[Stream[HashPartitioningLike]]
       .take(conf.broadcastHashJoinOutputPartitioningExpandLimit))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 14f1fb27906a1..9dcf7ec29048d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, HintInfo, Join, JoinHint, NO_BROADCAST_AND_REPLICATION}
 import org.apache.spark.sql.execution.{BinaryExecNode, FilterExec, ProjectExec, SortExec, SparkPlan, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.execution.python.BatchEvalPythonExec
 import org.apache.spark.sql.internal.SQLConf
@@ -1729,4 +1729,30 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
     checkAnswer(joined, expected)
   }
+
+  test("SPARK-45882: BroadcastHashJoinExec propagate partitioning should respect " +
+    "CoalescedHashPartitioning") {
+    val cached = spark.sql(
+      """
+        |select /*+ broadcast(testData) */ key, value, a
+        |from testData join (
+        | select a from testData2 group by a
+        |)tmp on key = a
+        |""".stripMargin).cache()
+    try {
+      val df = cached.groupBy("key").count()
+      val expected = Seq(Row(1, 1), Row(2, 1), Row(3, 1))
+      assert(find(df.queryExecution.executedPlan) {
+        case _: ShuffleExchangeLike => true
+        case _ => false
+      }.size == 1, df.queryExecution)
+      checkAnswer(df, expected)
+      assert(find(df.queryExecution.executedPlan) {
+        case _: ShuffleExchangeLike => true
+        case _ => false
+      }.isEmpty, df.queryExecution)
+    } finally {
+      cached.unpersist()
+    }
+  }
 }

From 4ca65c69a33da33f66969477bc8a6f88154ed305 Mon Sep 17 00:00:00 2001
From: Maryann Xue <maryann.xue@gmail.com>
Date: Tue, 14 Nov 2023 08:51:26 -0800
Subject: [PATCH 105/521] [SPARK-45592][SPARK-45282][SQL] Correctness issue in
 AQE with InMemoryTableScanExec

### What changes were proposed in this pull request?

This PR fixes an correctness issue while enabling AQE for SQL Cache. This issue was caused by AQE coalescing the top-level shuffle in the physical plan of InMemoryTableScan and wrongfully reported the output partitioning of that InMemoryTableScan as HashPartitioning as if it had not been coalesced. The caller query of that InMemoryTableScan in turn failed to align the partitions correctly and output incorrect join results.

The fix addresses the issue by disabling coalescing in InMemoryTableScan for shuffles in the final stage. This fix also guarantees that AQE enabled for SQL cache vs. disabled would always be a performance win, since AQE optimizations are applied to all non-top-level stages and meanwhile no extra shuffle would be introduced between the parent query and the cached relation (if coalescing in top-level shuffles of InMemoryTableScan was not disabled, an extra shuffle would end up being added on top of the cached relation when the cache is used in a join query and the partition key matches the join key in order to avoid the correctness issue).

### Why are the changes needed?

To fix correctness issue and to avoid potential AQE perf regressions in queries using SQL cache.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added UTs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43760 from maryannxue/spark-45592.

Authored-by: Maryann Xue <maryann.xue@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 128f5523194d5241c7b0f08b5be183288128ba16)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  9 ++++
 .../spark/sql/execution/CacheManager.scala    |  5 +-
 .../adaptive/AdaptiveSparkPlanExec.scala      |  8 ++-
 .../apache/spark/sql/CachedTableSuite.scala   | 52 +++++++++++++------
 .../org/apache/spark/sql/DatasetSuite.scala   | 33 ++++++++----
 5 files changed, 79 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 4ea0cd5bcc126..70bd21ac1709d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -657,6 +657,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val ADAPTIVE_EXECUTION_APPLY_FINAL_STAGE_SHUFFLE_OPTIMIZATIONS =
+    buildConf("spark.sql.adaptive.applyFinalStageShuffleOptimizations")
+      .internal()
+      .doc("Configures whether adaptive query execution (if enabled) should apply shuffle " +
+        "coalescing and local shuffle read optimization for the final query stage.")
+      .version("3.4.2")
+      .booleanConf
+      .createWithDefault(true)
+
   val ADAPTIVE_EXECUTION_LOG_LEVEL = buildConf("spark.sql.adaptive.logLevel")
     .internal()
     .doc("Configures the log level for adaptive execution logging of plan changes. The value " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index e906c74f8a5ee..9b79865149abd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -402,8 +402,9 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     if (session.conf.get(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING)) {
       // Bucketed scan only has one time overhead but can have multi-times benefits in cache,
       // so we always do bucketed scan in a cached plan.
-      SparkSession.getOrCloneSessionWithConfigsOff(
-        session, SQLConf.AUTO_BUCKETED_SCAN_ENABLED :: Nil)
+      SparkSession.getOrCloneSessionWithConfigsOff(session,
+        SQLConf.ADAPTIVE_EXECUTION_APPLY_FINAL_STAGE_SHUFFLE_OPTIMIZATIONS ::
+          SQLConf.AUTO_BUCKETED_SCAN_ENABLED :: Nil)
     } else {
       SparkSession.getOrCloneSessionWithConfigsOff(session, forceDisableConfigs)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 36895b17aa847..fa671c8faf8b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -159,7 +159,13 @@ case class AdaptiveSparkPlanExec(
   )
 
   private def optimizeQueryStage(plan: SparkPlan, isFinalStage: Boolean): SparkPlan = {
-    val optimized = queryStageOptimizerRules.foldLeft(plan) { case (latestPlan, rule) =>
+    val rules = if (isFinalStage &&
+        !conf.getConf(SQLConf.ADAPTIVE_EXECUTION_APPLY_FINAL_STAGE_SHUFFLE_OPTIMIZATIONS)) {
+      queryStageOptimizerRules.filterNot(_.isInstanceOf[AQEShuffleReadRule])
+    } else {
+      queryStageOptimizerRules
+    }
+    val optimized = rules.foldLeft(plan) { case (latestPlan, rule) =>
       val applied = rule.apply(latestPlan)
       val result = rule match {
         case _: AQEShuffleReadRule if !applied.fastEquals(latestPlan) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 1e4a67347f5b1..8331a3c10fc97 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -29,7 +29,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.spark.CleanerListener
 import org.apache.spark.executor.DataReadMethod._
 import org.apache.spark.executor.DataReadMethod.DataReadMethod
-import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
@@ -39,6 +39,7 @@ import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQEPropagateEmptyRelation}
 import org.apache.spark.sql.execution.columnar._
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -1623,23 +1624,44 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1",
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
 
-      withTempView("t1", "t2", "t3") {
-        withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "false") {
-          sql("CACHE TABLE t1 as SELECT /*+ REPARTITION */ * FROM values(1) as t(c)")
-          assert(spark.table("t1").rdd.partitions.length == 2)
+      var finalPlan = ""
+      val listener = new SparkListener {
+        override def onOtherEvent(event: SparkListenerEvent): Unit = {
+          event match {
+            case SparkListenerSQLAdaptiveExecutionUpdate(_, physicalPlanDesc, sparkPlanInfo) =>
+              if (sparkPlanInfo.simpleString.startsWith(
+                  "AdaptiveSparkPlan isFinalPlan=true")) {
+                finalPlan = physicalPlanDesc
+              }
+            case _ => // ignore other events
+          }
         }
+      }
 
-        withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "true") {
-          assert(spark.table("t1").rdd.partitions.length == 2)
-          sql("CACHE TABLE t2 as SELECT /*+ REPARTITION */ * FROM values(2) as t(c)")
-          assert(spark.table("t2").rdd.partitions.length == 1)
-        }
+      withTempView("t0", "t1", "t2") {
+        try {
+          spark.range(10).write.saveAsTable("t0")
+          spark.sparkContext.listenerBus.waitUntilEmpty()
+          spark.sparkContext.addSparkListener(listener)
 
-        withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "false") {
-          assert(spark.table("t1").rdd.partitions.length == 2)
-          assert(spark.table("t2").rdd.partitions.length == 1)
-          sql("CACHE TABLE t3 as SELECT /*+ REPARTITION */ * FROM values(3) as t(c)")
-          assert(spark.table("t3").rdd.partitions.length == 2)
+          withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "false") {
+            sql("CACHE TABLE t1 as SELECT /*+ REPARTITION */ * FROM (" +
+              "SELECT distinct (id+1) FROM t0)")
+            assert(spark.table("t1").rdd.partitions.length == 2)
+            spark.sparkContext.listenerBus.waitUntilEmpty()
+            assert(finalPlan.nonEmpty && !finalPlan.contains("coalesced"))
+          }
+
+          finalPlan = "" // reset finalPlan
+          withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "true") {
+            sql("CACHE TABLE t2 as SELECT /*+ REPARTITION */ * FROM (" +
+              "SELECT distinct (id-1) FROM t0)")
+            assert(spark.table("t2").rdd.partitions.length == 2)
+            spark.sparkContext.listenerBus.waitUntilEmpty()
+            assert(finalPlan.nonEmpty && finalPlan.contains("coalesced"))
+          }
+        } finally {
+          spark.sparkContext.removeSparkListener(listener)
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 0878ae134e9d4..c2fe31520acf6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -2550,16 +2550,29 @@ class DatasetSuite extends QueryTest
   }
 
   test("SPARK-45592: Coaleasced shuffle read is not compatible with hash partitioning") {
-    val ee = spark.range(0, 1000000, 1, 5).map(l => (l, l)).toDF()
-      .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
-    ee.count()
-
-    val minNbrs1 = ee
-      .groupBy("_1").agg(min(col("_2")).as("min_number"))
-      .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
-
-    val join = ee.join(minNbrs1, "_1")
-    assert(join.count() == 1000000)
+    withSQLConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+      SQLConf.SHUFFLE_PARTITIONS.key -> "20",
+      SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "2000") {
+      val ee = spark.range(0, 1000, 1, 5).map(l => (l, l - 1)).toDF()
+        .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
+      ee.count()
+
+      // `minNbrs1` will start with 20 partitions and without the fix would coalesce to ~10
+      // partitions.
+      val minNbrs1 = ee
+        .groupBy("_2").agg(min(col("_1")).as("min_number"))
+        .select(col("_2") as "_1", col("min_number"))
+        .persist(org.apache.spark.storage.StorageLevel.MEMORY_AND_DISK)
+      minNbrs1.count()
+
+      // shuffle on `ee` will start with 2 partitions, smaller than `minNbrs1`'s partition num,
+      // and `EnsureRequirements` will change its partition num to `minNbrs1`'s partition num.
+      withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "5") {
+        val join = ee.join(minNbrs1, "_1")
+        assert(join.count() == 999)
+      }
+    }
   }
 
 }

From 41a7a4a3233772003aef380428acd9eaf39b9a93 Mon Sep 17 00:00:00 2001
From: Deepayan Patra <deepayan.patra@databricks.com>
Date: Wed, 15 Nov 2023 14:27:34 +0800
Subject: [PATCH 106/521] [SPARK-43393][SQL] Address sequence expression
 overflow bug

Spark has a (long-standing) overflow bug in the `sequence` expression.

Consider the following operations:
```
spark.sql("CREATE TABLE foo (l LONG);")
spark.sql(s"INSERT INTO foo VALUES (${Long.MaxValue});")
spark.sql("SELECT sequence(0, l) FROM foo;").collect()
```

The result of these operations will be:
```
Array[org.apache.spark.sql.Row] = Array([WrappedArray()])
```
an unintended consequence of overflow.

The sequence is applied to values `0` and `Long.MaxValue` with a step size of `1` which uses a length computation defined [here](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3451). In this calculation, with `start = 0`, `stop = Long.MaxValue`, and `step = 1`, the calculated `len` overflows to `Long.MinValue`. The computation, in binary looks like:

```
  0111111111111111111111111111111111111111111111111111111111111111
- 0000000000000000000000000000000000000000000000000000000000000000
------------------------------------------------------------------
  0111111111111111111111111111111111111111111111111111111111111111
/ 0000000000000000000000000000000000000000000000000000000000000001
------------------------------------------------------------------
  0111111111111111111111111111111111111111111111111111111111111111
+ 0000000000000000000000000000000000000000000000000000000000000001
------------------------------------------------------------------
  1000000000000000000000000000000000000000000000000000000000000000
```

The following [check](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3454) passes as the negative `Long.MinValue` is still `<= MAX_ROUNDED_ARRAY_LENGTH`. The following cast to `toInt` uses this representation and [truncates the upper bits](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3457) resulting in an empty length of `0`.

Other overflows are similarly problematic.

This PR addresses the issue by checking numeric operations in the length computation for overflow.

There is a correctness bug from overflow in the `sequence` expression.

No.

Tests added in `CollectionExpressionsSuite.scala`.

Closes #41072 from thepinetree/spark-sequence-overflow.

Authored-by: Deepayan Patra <deepayan.patra@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit afc4c49927cb7f0f2a7f24a42c4fe497796dd9e3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../expressions/collectionOperations.scala    | 48 +++++++++++-----
 .../CollectionExpressionsSuite.scala          | 56 +++++++++++++++++--
 2 files changed, 84 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index ade4a6c5be722..c3c235fba677e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -22,6 +22,8 @@ import java.util.Comparator
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
+import org.apache.spark.QueryContext
+import org.apache.spark.SparkException.internalError
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
@@ -40,7 +42,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SQLOpenHashSet
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 import org.apache.spark.unsafe.types.{ByteArray, CalendarInterval, UTF8String}
 
 /**
@@ -3080,6 +3081,34 @@ case class Sequence(
 }
 
 object Sequence {
+  private def prettyName: String = "sequence"
+
+  def sequenceLength(start: Long, stop: Long, step: Long): Int = {
+    try {
+      val delta = Math.subtractExact(stop, start)
+      if (delta == Long.MinValue && step == -1L) {
+        // We must special-case division of Long.MinValue by -1 to catch potential unchecked
+        // overflow in next operation. Division does not have a builtin overflow check. We
+        // previously special-case div-by-zero.
+        throw new ArithmeticException("Long overflow (Long.MinValue / -1)")
+      }
+      val len = if (stop == start) 1L else Math.addExact(1L, (delta / step))
+      if (len > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+        throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(prettyName, len)
+      }
+      len.toInt
+    } catch {
+      // We handle overflows in the previous try block by raising an appropriate exception.
+      case _: ArithmeticException =>
+        val safeLen =
+          BigInt(1) + (BigInt(stop) - BigInt(start)) / BigInt(step)
+        if (safeLen > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+          throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(prettyName, safeLen)
+        }
+        throw internalError("Unreachable code reached.")
+      case e: Exception => throw e
+    }
+  }
 
   private type LessThanOrEqualFn = (Any, Any) => Boolean
 
@@ -3451,13 +3480,7 @@ object Sequence {
         || (estimatedStep == num.zero && start == stop),
       s"Illegal sequence boundaries: $start to $stop by $step")
 
-    val len = if (start == stop) 1L else 1L + (stop.toLong - start.toLong) / estimatedStep.toLong
-
-    require(
-      len <= MAX_ROUNDED_ARRAY_LENGTH,
-      s"Too long sequence: $len. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
-
-    len.toInt
+    sequenceLength(start.toLong, stop.toLong, estimatedStep.toLong)
   }
 
   private def genSequenceLengthCode(
@@ -3467,7 +3490,7 @@ object Sequence {
       step: String,
       estimatedStep: String,
       len: String): String = {
-    val longLen = ctx.freshName("longLen")
+    val calcFn = classOf[Sequence].getName + ".sequenceLength"
     s"""
        |if (!(($estimatedStep > 0 && $start <= $stop) ||
        |  ($estimatedStep < 0 && $start >= $stop) ||
@@ -3475,12 +3498,7 @@ object Sequence {
        |  throw new IllegalArgumentException(
        |    "Illegal sequence boundaries: " + $start + " to " + $stop + " by " + $step);
        |}
-       |long $longLen = $stop == $start ? 1L : 1L + ((long) $stop - $start) / $estimatedStep;
-       |if ($longLen > $MAX_ROUNDED_ARRAY_LENGTH) {
-       |  throw new IllegalArgumentException(
-       |    "Too long sequence: " + $longLen + ". Should be <= $MAX_ROUNDED_ARRAY_LENGTH");
-       |}
-       |int $len = (int) $longLen;
+       |int $len = $calcFn((long) $start, (long) $stop, (long) $estimatedStep);
        """.stripMargin
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 1787f6ac72dd4..d001006c58cf1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds,
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
 
 class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -769,10 +769,6 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     // test sequence boundaries checking
 
-    checkExceptionInExpression[IllegalArgumentException](
-      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
-      EmptyRow, s"Too long sequence: 4294967296. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
-
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(0)), EmptyRow, "boundaries: 1 to 2 by 0")
     checkExceptionInExpression[IllegalArgumentException](
@@ -782,6 +778,56 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(-1)), EmptyRow, "boundaries: 1 to 2 by -1")
 
+    // SPARK-43393: test Sequence overflow checking
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> (BigInt(Int.MaxValue) - BigInt { Int.MinValue } + 1).toString,
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(0L), Literal(Long.MaxValue), Literal(1L)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> (BigInt(Long.MaxValue) + 1).toString,
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(0L), Literal(Long.MinValue), Literal(-1L)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> ((0 - BigInt(Long.MinValue)) + 1).toString(),
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MinValue), Literal(Long.MaxValue), Literal(1L)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MaxValue), Literal(Long.MinValue), Literal(-1L)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MaxValue), Literal(-1L), Literal(-1L)),
+      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      parameters = Map(
+        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { -1L } + 1).toString,
+        "functionName" -> toSQLId("sequence"),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "parameter" -> toSQLId("count")))
+
     // test sequence with one element (zero step or equal start and stop)
 
     checkEvaluation(new Sequence(Literal(1), Literal(1), Literal(-1)), Seq(1))

From e38310c74e6cae8c8c8489ffcbceb80ed37a7cae Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 15 Nov 2023 09:12:42 -0800
Subject: [PATCH 107/521] Revert "[SPARK-43393][SQL] Address sequence
 expression overflow bug"

This reverts commit 41a7a4a3233772003aef380428acd9eaf39b9a93.
---
 .../expressions/collectionOperations.scala    | 48 +++++-----------
 .../CollectionExpressionsSuite.scala          | 56 ++-----------------
 2 files changed, 20 insertions(+), 84 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c3c235fba677e..ade4a6c5be722 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -22,8 +22,6 @@ import java.util.Comparator
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
-import org.apache.spark.QueryContext
-import org.apache.spark.SparkException.internalError
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
@@ -42,6 +40,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SQLOpenHashSet
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
+import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 import org.apache.spark.unsafe.types.{ByteArray, CalendarInterval, UTF8String}
 
 /**
@@ -3081,34 +3080,6 @@ case class Sequence(
 }
 
 object Sequence {
-  private def prettyName: String = "sequence"
-
-  def sequenceLength(start: Long, stop: Long, step: Long): Int = {
-    try {
-      val delta = Math.subtractExact(stop, start)
-      if (delta == Long.MinValue && step == -1L) {
-        // We must special-case division of Long.MinValue by -1 to catch potential unchecked
-        // overflow in next operation. Division does not have a builtin overflow check. We
-        // previously special-case div-by-zero.
-        throw new ArithmeticException("Long overflow (Long.MinValue / -1)")
-      }
-      val len = if (stop == start) 1L else Math.addExact(1L, (delta / step))
-      if (len > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
-        throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(prettyName, len)
-      }
-      len.toInt
-    } catch {
-      // We handle overflows in the previous try block by raising an appropriate exception.
-      case _: ArithmeticException =>
-        val safeLen =
-          BigInt(1) + (BigInt(stop) - BigInt(start)) / BigInt(step)
-        if (safeLen > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
-          throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(prettyName, safeLen)
-        }
-        throw internalError("Unreachable code reached.")
-      case e: Exception => throw e
-    }
-  }
 
   private type LessThanOrEqualFn = (Any, Any) => Boolean
 
@@ -3480,7 +3451,13 @@ object Sequence {
         || (estimatedStep == num.zero && start == stop),
       s"Illegal sequence boundaries: $start to $stop by $step")
 
-    sequenceLength(start.toLong, stop.toLong, estimatedStep.toLong)
+    val len = if (start == stop) 1L else 1L + (stop.toLong - start.toLong) / estimatedStep.toLong
+
+    require(
+      len <= MAX_ROUNDED_ARRAY_LENGTH,
+      s"Too long sequence: $len. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
+
+    len.toInt
   }
 
   private def genSequenceLengthCode(
@@ -3490,7 +3467,7 @@ object Sequence {
       step: String,
       estimatedStep: String,
       len: String): String = {
-    val calcFn = classOf[Sequence].getName + ".sequenceLength"
+    val longLen = ctx.freshName("longLen")
     s"""
        |if (!(($estimatedStep > 0 && $start <= $stop) ||
        |  ($estimatedStep < 0 && $start >= $stop) ||
@@ -3498,7 +3475,12 @@ object Sequence {
        |  throw new IllegalArgumentException(
        |    "Illegal sequence boundaries: " + $start + " to " + $stop + " by " + $step);
        |}
-       |int $len = $calcFn((long) $start, (long) $stop, (long) $estimatedStep);
+       |long $longLen = $stop == $start ? 1L : 1L + ((long) $stop - $start) / $estimatedStep;
+       |if ($longLen > $MAX_ROUNDED_ARRAY_LENGTH) {
+       |  throw new IllegalArgumentException(
+       |    "Too long sequence: " + $longLen + ". Should be <= $MAX_ROUNDED_ARRAY_LENGTH");
+       |}
+       |int $len = (int) $longLen;
        """.stripMargin
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index d001006c58cf1..1787f6ac72dd4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds,
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.array.ByteArrayMethods
+import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 import org.apache.spark.unsafe.types.UTF8String
 
 class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -769,6 +769,10 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     // test sequence boundaries checking
 
+    checkExceptionInExpression[IllegalArgumentException](
+      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
+      EmptyRow, s"Too long sequence: 4294967296. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
+
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(0)), EmptyRow, "boundaries: 1 to 2 by 0")
     checkExceptionInExpression[IllegalArgumentException](
@@ -778,56 +782,6 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(-1)), EmptyRow, "boundaries: 1 to 2 by -1")
 
-    // SPARK-43393: test Sequence overflow checking
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> (BigInt(Int.MaxValue) - BigInt { Int.MinValue } + 1).toString,
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(0L), Literal(Long.MaxValue), Literal(1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> (BigInt(Long.MaxValue) + 1).toString,
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(0L), Literal(Long.MinValue), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> ((0 - BigInt(Long.MinValue)) + 1).toString(),
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(Long.MinValue), Literal(Long.MaxValue), Literal(1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(Long.MaxValue), Literal(Long.MinValue), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-    checkErrorInExpression[SparkRuntimeException](
-      new Sequence(Literal(Long.MaxValue), Literal(-1L), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
-      parameters = Map(
-        "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { -1L } + 1).toString,
-        "functionName" -> toSQLId("sequence"),
-        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
-        "parameter" -> toSQLId("count")))
-
     // test sequence with one element (zero step or equal start and stop)
 
     checkEvaluation(new Sequence(Literal(1), Literal(1), Literal(-1)), Seq(1))

From a9f95e8203bede86462e681bb7a3e6123b8c00a2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 15 Nov 2023 14:12:36 -0800
Subject: [PATCH 108/521] [SPARK-45934][DOCS] Fix `Spark Standalone`
 documentation table layout
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR fixes `Spark Standalone` documentation table layout.

**BEFORE**
- https://spark.apache.org/docs/3.5.0/spark-standalone.html

**AFTER**
- Spark Standalone
<img width="965" alt="Screenshot 2023-11-15 at 2 40 59 AM" src="https://github.com/apache/spark/assets/9700541/281ca898-f252-47c2-8cf3-0504bcdcbfb3">

No.

Manual review.

No.

Closes #43814 from dongjoon-hyun/SPARK-45934.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit e8c2a590f99d8c87968c79960e6b69191f28b420)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/spark-standalone.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 3e87edad0aadd..ebda8d897eae9 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -254,7 +254,7 @@ SPARK_MASTER_OPTS supports the following system properties:
   <td>0.6.2</td>
 </tr>
 <tr>
-  <td><code>spark.worker.resource.{resourceName}.amount</code></td>
+  <td><code>spark.worker.resource.{name}.amount</code></td>
   <td>(none)</td>
   <td>
     Amount of a particular resource to use on the worker.
@@ -262,7 +262,7 @@ SPARK_MASTER_OPTS supports the following system properties:
   <td>3.0.0</td>
 </tr>
 <tr>
-  <td><code>spark.worker.resource.{resourceName}.discoveryScript</code></td>
+  <td><code>spark.worker.resource.{name}.discoveryScript</code></td>
   <td>(none)</td>
   <td>
     Path to resource discovery script, which is used to find a particular resource while worker starting up.
@@ -275,8 +275,10 @@ SPARK_MASTER_OPTS supports the following system properties:
   <td>(none)</td>
   <td>
     Path to resources file which is used to find various resources while worker starting up.
-    The content of resources file should be formatted like <code>
-    [{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2"]}]</code>.
+    The content of resources file should be formatted like
+    <code>[{"id":{"componentName":</code>
+    <code>"spark.worker", "resourceName":"gpu"},</code>
+    <code>"addresses":["0","1","2"]}]</code>.
     If a particular resource is not found in the resources file, the discovery script would be used to
     find that resource. If the discovery script also does not find the resources, the worker will fail
     to start up.

From 44bd909ef9e6f4d5419b5757a265fa9ead001cbb Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 16 Nov 2023 00:52:48 -0800
Subject: [PATCH 109/521] [SPARK-45764][PYTHON][DOCS][3.5] Make code block
 copyable

### What changes were proposed in this pull request?
The pr aims to make code block `copyable `in pyspark docs.
Backport above to `branch 3.5`.
Master branch pr: https://github.com/apache/spark/pull/43799

### Why are the changes needed?
Improving the usability of PySpark documents.

### Does this PR introduce _any_ user-facing change?
Yes, users will be able to easily copy code block in pyspark docs.

### How was this patch tested?
- Manually test.
- Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43827 from panbingkun/branch-3.5_SPARK-45764.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml     |  2 +-
 LICENSE                                  |  5 --
 dev/create-release/spark-rm/Dockerfile   |  2 +-
 dev/requirements.txt                     |  1 +
 licenses/LICENSE-copybutton.txt          | 49 -----------------
 python/docs/source/_static/copybutton.js | 67 ------------------------
 python/docs/source/conf.py               |  7 +--
 7 files changed, 7 insertions(+), 126 deletions(-)
 delete mode 100644 licenses/LICENSE-copybutton.txt
 delete mode 100644 python/docs/source/_static/copybutton.js

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 674e59508510c..f202a7d49c9a2 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -678,7 +678,7 @@ jobs:
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
+        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
diff --git a/LICENSE b/LICENSE
index 1735d3208f2e2..74686d7ffa388 100644
--- a/LICENSE
+++ b/LICENSE
@@ -218,11 +218,6 @@ docs/js/vendor/bootstrap.js
 connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
 
 
-Python Software Foundation License
-----------------------------------
-
-python/docs/source/_static/copybutton.js
-
 BSD 3-Clause
 ------------
 
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index 85155b67bd5a3..cd57226f5e017 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 #   We should use the latest Sphinx version once this is fixed.
 # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.8.0 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.56.0 protobuf==4.21.6 grpcio-status==1.56.0 googleapis-common-protos==1.56.4"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.8.0 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.56.0 protobuf==4.21.6 grpcio-status==1.56.0 googleapis-common-protos==1.56.4"
 ARG GEM_PKGS="bundler:2.3.8"
 
 # Install extra needed repos and refresh.
diff --git a/dev/requirements.txt b/dev/requirements.txt
index 38a9b2447108c..597417aba1f3d 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -37,6 +37,7 @@ numpydoc
 jinja2<3.0.0
 sphinx<3.1.0
 sphinx-plotly-directive
+sphinx-copybutton<0.5.3
 docutils<0.18.0
 # See SPARK-38279.
 markupsafe==2.0.1
diff --git a/licenses/LICENSE-copybutton.txt b/licenses/LICENSE-copybutton.txt
deleted file mode 100644
index 45be6b83a53be..0000000000000
--- a/licenses/LICENSE-copybutton.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
---------------------------------------------
-
-1. This LICENSE AGREEMENT is between the Python Software Foundation
-("PSF"), and the Individual or Organization ("Licensee") accessing and
-otherwise using this software ("Python") in source or binary form and
-its associated documentation.
-
-2. Subject to the terms and conditions of this License Agreement, PSF hereby
-grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
-analyze, test, perform and/or display publicly, prepare derivative works,
-distribute, and otherwise use Python alone or in any derivative version,
-provided, however, that PSF's License Agreement and PSF's notice of copyright,
-i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
-2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Python Software Foundation;
-All Rights Reserved" are retained in Python alone or in any derivative version
-prepared by Licensee.
-
-3. In the event Licensee prepares a derivative work that is based on
-or incorporates Python or any part thereof, and wants to make
-the derivative work available to others as provided herein, then
-Licensee hereby agrees to include in any such work a brief summary of
-the changes made to Python.
-
-4. PSF is making Python available to Licensee on an "AS IS"
-basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
-IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
-DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
-FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
-INFRINGE ANY THIRD PARTY RIGHTS.
-
-5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
-FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
-A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
-OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
-
-6. This License Agreement will automatically terminate upon a material
-breach of its terms and conditions.
-
-7. Nothing in this License Agreement shall be deemed to create any
-relationship of agency, partnership, or joint venture between PSF and
-Licensee.  This License Agreement does not grant permission to use PSF
-trademarks or trade name in a trademark sense to endorse or promote
-products or services of Licensee, or any third party.
-
-8. By copying, installing or otherwise using Python, Licensee
-agrees to be bound by the terms and conditions of this License
-Agreement.
-
diff --git a/python/docs/source/_static/copybutton.js b/python/docs/source/_static/copybutton.js
deleted file mode 100644
index 896faad3f9df1..0000000000000
--- a/python/docs/source/_static/copybutton.js
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright 2014 PSF. Licensed under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
-// File originates from the cpython source found in Doc/tools/sphinxext/static/copybutton.js
-
-$(document).ready(function() {
-    /* Add a [>>>] button on the top-right corner of code samples to hide
-     * the >>> and ... prompts and the output and thus make the code
-     * copyable. */
-    var div = $('.highlight-python .highlight,' +
-                '.highlight-default .highlight,' +
-                '.highlight-python3 .highlight')
-    var pre = div.find('pre');
-
-    // get the styles from the current theme
-    pre.parent().parent().css('position', 'relative');
-    var hide_text = 'Hide the prompts and output';
-    var show_text = 'Show the prompts and output';
-    var border_width = pre.css('border-top-width');
-    var border_style = pre.css('border-top-style');
-    var border_color = pre.css('border-top-color');
-    var button_styles = {
-        'cursor':'pointer', 'position': 'absolute', 'top': '0', 'right': '0',
-        'border-color': border_color, 'border-style': border_style,
-        'border-width': border_width, 'color': border_color, 'text-size': '75%',
-        'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '0.2em',
-        'border-radius': '0 3px 0 0',
-        'user-select': 'none'
-    }
-
-    // create and add the button to all the code blocks that contain >>>
-    div.each(function(index) {
-        var jthis = $(this);
-        if (jthis.find('.gp').length > 0) {
-            var button = $('<span class="copybutton">&gt;&gt;&gt;</span>');
-            button.css(button_styles)
-            button.attr('title', hide_text);
-            button.data('hidden', 'false');
-            jthis.prepend(button);
-        }
-        // tracebacks (.gt) contain bare text elements that need to be
-        // wrapped in a span to work with .nextUntil() (see later)
-        jthis.find('pre:has(.gt)').contents().filter(function() {
-            return ((this.nodeType == 3) && (this.data.trim().length > 0));
-        }).wrap('<span>');
-    });
-
-    // define the behavior of the button when it's clicked
-    $('.copybutton').click(function(e){
-        e.preventDefault();
-        var button = $(this);
-        if (button.data('hidden') === 'false') {
-            // hide the code output
-            button.parent().find('.go, .gp, .gt').hide();
-            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
-            button.css('text-decoration', 'line-through');
-            button.attr('title', show_text);
-            button.data('hidden', 'true');
-        } else {
-            // show the code output
-            button.parent().find('.go, .gp, .gt').show();
-            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
-            button.css('text-decoration', 'none');
-            button.attr('title', hide_text);
-            button.data('hidden', 'false');
-        }
-    });
-});
-
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 0f57cb37ceeb1..a0d087de176ff 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -63,6 +63,7 @@
     'sphinx.ext.viewcode',
     'sphinx.ext.mathjax',
     'sphinx.ext.autosummary',
+    'sphinx_copybutton',
     'nbsphinx',  # Converts Jupyter Notebook to reStructuredText files for Sphinx.
     # For ipython directive in reStructuredText files. It is generated by the notebook.
     'IPython.sphinxext.ipython_console_highlighting',
@@ -70,6 +71,9 @@
     'sphinx_plotly_directive',  # For visualize plot result
 ]
 
+# sphinx copy button
+copybutton_exclude = '.linenos, .gp, .go'
+
 # plotly plot directive
 plotly_include_source = True
 plotly_html_show_formats = False
@@ -416,9 +420,6 @@
 
 # If false, no index is generated.
 #epub_use_index = True
-def setup(app):
-    # The app.add_javascript() is deprecated.
-    getattr(app, "add_js_file", getattr(app, "add_javascript", None))('copybutton.js')
 
 # Skip sample endpoint link (not expected to resolve)
 linkcheck_ignore = [r'https://kinesis.us-east-1.amazonaws.com']

From b962cb26ed20d695e408958be452f0a947e7e989 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Thu, 16 Nov 2023 18:00:56 +0900
Subject: [PATCH 110/521] [SPARK-45935][PYTHON][DOCS] Fix RST files link
 substitutions error

### What changes were proposed in this pull request?
The pr aims to fix RST files `link substitutions` error.
Target branch: branch-3.3, branch-3.4, branch-3.5, master.

### Why are the changes needed?
When I was reviewing Python documents, I found that `the actual address` of the link was incorrect, eg:
https://spark.apache.org/docs/latest/api/python/getting_started/install.html#installing-from-source
<img width="916" alt="image" src="https://github.com/apache/spark/assets/15246973/069c1875-1e21-45db-a236-15c27ee7b913">

`The ref link url` of `Building Spark`: from `https://spark.apache.org/docs/3.5.0/#downloading` to `https://spark.apache.org/docs/3.5.0/building-spark.html`.
We should fix it.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually test.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43815 from panbingkun/SPARK-45935.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 79ccdfa31e282ebe9a82c8f20c703b6ad2ea6bc1)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/docs/source/conf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index a0d087de176ff..08a25c5dd0712 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -98,9 +98,9 @@
 .. |examples| replace:: Examples
 .. _examples: https://github.com/apache/spark/tree/{0}/examples/src/main/python
 .. |downloading| replace:: Downloading
-.. _downloading: https://spark.apache.org/docs/{1}/building-spark.html
+.. _downloading: https://spark.apache.org/docs/{1}/#downloading
 .. |building_spark| replace:: Building Spark
-.. _building_spark: https://spark.apache.org/docs/{1}/#downloading
+.. _building_spark: https://spark.apache.org/docs/{1}/building-spark.html
 """.format(
     os.environ.get("GIT_HASH", "master"),
     os.environ.get("RELEASE_VERSION", "latest"),

From f0054c5a10bf388688e7b2914cb639c96ffdd8f3 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 16 Nov 2023 08:16:20 -0800
Subject: [PATCH 111/521] [SPARK-45920][SQL][3.5] group by ordinal should be
 idempotent

backport https://github.com/apache/spark/pull/43797

### What changes were proposed in this pull request?

GROUP BY ordinal is not idempotent today. If the ordinal points to another integer literal and the plan get analyzed again, we will re-do the ordinal resolution which can lead to wrong result or index out-of-bound error. This PR fixes it by using a hack: if the ordinal points to another integer literal, don't replace the ordinal.

### Why are the changes needed?

For advanced users or Spark plugins, they may manipulate the logical plans directly. We need to make the framework more reliable.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #43836 from cloud-fan/3.5-port.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 14 ++++++++++-
 .../SubstituteUnresolvedOrdinalsSuite.scala   | 23 +++++++++++++++++--
 .../analyzer-results/group-by-ordinal.sql.out |  2 +-
 3 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 80cb5d8c60876..02b9c2445433b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1970,7 +1970,19 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
               throw QueryCompilationErrors.groupByPositionRefersToAggregateFunctionError(
                 index, ordinalExpr)
             } else {
-              ordinalExpr
+              trimAliases(ordinalExpr) match {
+                // HACK ALERT: If the ordinal expression is also an integer literal, don't use it
+                //             but still keep the ordinal literal. The reason is we may repeatedly
+                //             analyze the plan. Using a different integer literal may lead to
+                //             a repeat GROUP BY ordinal resolution which is wrong. GROUP BY
+                //             constant is meaningless so whatever value does not matter here.
+                // TODO: (SPARK-45932) GROUP BY ordinal should pull out grouping expressions to
+                //       a Project, then the resolved ordinal expression is always
+                //       `AttributeReference`.
+                case Literal(_: Int, IntegerType) =>
+                  Literal(index)
+                case _ => ordinalExpr
+              }
             }
           } else {
             throw QueryCompilationErrors.groupByPositionRangeError(index, aggs.size)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index b0d7ace646e2e..953b2c8bb1011 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -17,10 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.analysis.TestRelations.testRelation2
+import org.apache.spark.sql.catalyst.analysis.TestRelations.{testRelation, testRelation2}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.internal.SQLConf
 
 class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
@@ -67,4 +68,22 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
         testRelation2.groupBy(Literal(1), Literal(2))($"a", $"b"))
     }
   }
+
+  test("SPARK-45920: group by ordinal repeated analysis") {
+    val plan = testRelation.groupBy(Literal(1))(Literal(100).as("a")).analyze
+    comparePlans(
+      plan,
+      testRelation.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+
+    val testRelationWithData = testRelation.copy(data = Seq(new GenericInternalRow(Array(1: Any))))
+    // Copy the plan to reset its `analyzed` flag, so that analyzer rules will re-apply.
+    val copiedPlan = plan.transform {
+      case _: LocalRelation => testRelationWithData
+    }
+    comparePlans(
+      copiedPlan.analyze, // repeated analysis
+      testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
index c8c34a856d492..1bcde5bd367f7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-ordinal.sql.out
@@ -61,7 +61,7 @@ Aggregate [a#x, a#x], [a#x, 1 AS 1#x, sum(b#x) AS sum(b)#xL]
 -- !query
 select a, 1, sum(b) from data group by 1, 2
 -- !query analysis
-Aggregate [a#x, 1], [a#x, 1 AS 1#x, sum(b#x) AS sum(b)#xL]
+Aggregate [a#x, 2], [a#x, 1 AS 1#x, sum(b#x) AS sum(b)#xL]
 +- SubqueryAlias data
    +- View (`data`, [a#x,b#x])
       +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]

From e3549b253644749a373daf11108825ad38b3f055 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 16 Nov 2023 15:36:59 -0800
Subject: [PATCH 112/521] [SPARK-45961][DOCS][3.5] Document `spark.master.*`
 configurations
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR documents `spark.master.*` configurations.

### Why are the changes needed?

Currently, `spark.master.*` configurations are undocumented.
```
$ git grep 'ConfigBuilder("spark.master'
core/src/main/scala/org/apache/spark/internal/config/UI.scala:  val MASTER_UI_DECOMMISSION_ALLOW_MODE = ConfigBuilder("spark.master.ui.decommission.allow.mode")
core/src/main/scala/org/apache/spark/internal/config/package.scala:  private[spark] val MASTER_REST_SERVER_ENABLED = ConfigBuilder("spark.master.rest.enabled")
core/src/main/scala/org/apache/spark/internal/config/package.scala:  private[spark] val MASTER_REST_SERVER_PORT = ConfigBuilder("spark.master.rest.port")
core/src/main/scala/org/apache/spark/internal/config/package.scala:  private[spark] val MASTER_UI_PORT = ConfigBuilder("spark.master.ui.port")
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

![Screenshot 2023-11-16 at 2 55 09 PM](https://github.com/apache/spark/assets/9700541/da096ad6-0dec-4cda-90dd-ecf376988ac8)

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43849 from dongjoon-hyun/SPARK-45961-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/spark-standalone.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index ebda8d897eae9..9152547f1bec4 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -190,6 +190,41 @@ SPARK_MASTER_OPTS supports the following system properties:
 
 <table class="table table-striped">
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+<tr>
+  <td><code>spark.master.ui.port</code></td>
+  <td><code>8080</code></td>
+  <td>
+    Specifies the port number of the Master Web UI endpoint.
+  </td>
+  <td>1.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.master.ui.decommission.allow.mode</code></td>
+  <td><code>LOCAL</code></td>
+  <td>
+    Specifies the behavior of the Master Web UI's /workers/kill endpoint. Possible choices
+    are: <code>LOCAL</code> means allow this endpoint from IP's that are local to the machine running
+    the Master, <code>DENY</code> means to completely disable this endpoint, <code>ALLOW</code> means to allow
+    calling this endpoint from any IP.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.master.rest.enabled</code></td>
+  <td><code>false</code></td>
+  <td>
+    Whether to use the Master REST API endpoint or not.
+  </td>
+  <td>1.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.master.rest.port</code></td>
+  <td><code>6066</code></td>
+  <td>
+    Specifies the port number of the Master REST API endpoint.
+  </td>
+  <td>1.3.0</td>
+</tr>
 <tr>
   <td><code>spark.deploy.retainedApplications</code></td>
   <td>200</td>

From 01eb6c83324be76dc30b0857aab9e126fe2ea25d Mon Sep 17 00:00:00 2001
From: Kazuyuki Tanimura <ktanimura@apple.com>
Date: Fri, 17 Nov 2023 02:49:51 -0800
Subject: [PATCH 113/521] [SPARK-45786][SQL][FOLLOWUP][TEST] Fix Decimal random
 number tests with ANSI enabled

### What changes were proposed in this pull request?
This follow-up PR fixes the test for SPARK-45786 that is failing in GHA with SPARK_ANSI_SQL_MODE=true

### Why are the changes needed?
The issue discovered in https://github.com/apache/spark/pull/43678#discussion_r1395693417

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Test updated

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43853 from kazuyukitanimura/SPARK-45786-FollowUp.

Authored-by: Kazuyuki Tanimura <ktanimura@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 949de3416a8ef5b7faa22149f5e07d8235237f40)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/ArithmeticExpressionSuite.scala  | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index 568dcd10d1166..2dc7e82f77226 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -308,27 +308,35 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
           val mulResult = Decimal(mulExact.setScale(mulType.scale, RoundingMode.HALF_UP))
           val mulExpected =
             if (mulResult.precision > DecimalType.MAX_PRECISION) null else mulResult
-          checkEvaluation(mulActual, mulExpected)
+          checkEvaluationOrException(mulActual, mulExpected)
 
           val divType = Divide(null, null).resultDecimalType(p1, s1, p2, s2)
           val divResult = Decimal(divExact.setScale(divType.scale, RoundingMode.HALF_UP))
           val divExpected =
             if (divResult.precision > DecimalType.MAX_PRECISION) null else divResult
-          checkEvaluation(divActual, divExpected)
+          checkEvaluationOrException(divActual, divExpected)
 
           val remType = Remainder(null, null).resultDecimalType(p1, s1, p2, s2)
           val remResult = Decimal(remExact.setScale(remType.scale, RoundingMode.HALF_UP))
           val remExpected =
             if (remResult.precision > DecimalType.MAX_PRECISION) null else remResult
-          checkEvaluation(remActual, remExpected)
+          checkEvaluationOrException(remActual, remExpected)
 
           val quotType = IntegralDivide(null, null).resultDecimalType(p1, s1, p2, s2)
           val quotResult = Decimal(quotExact.setScale(quotType.scale, RoundingMode.HALF_UP))
           val quotExpected =
             if (quotResult.precision > DecimalType.MAX_PRECISION) null else quotResult
-          checkEvaluation(quotActual, quotExpected.toLong)
+          checkEvaluationOrException(quotActual, quotExpected.toLong)
         }
       }
+
+      def checkEvaluationOrException(actual: BinaryArithmetic, expected: Any): Unit =
+        if (SQLConf.get.ansiEnabled && expected == null) {
+          checkExceptionInExpression[SparkArithmeticException](actual,
+            "NUMERIC_VALUE_OUT_OF_RANGE")
+        } else {
+          checkEvaluation(actual, expected)
+        }
     }
   }
 

From 9e492b71c4aaa070bc36bfae120e1c6ca05e4a7a Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 17 Nov 2023 13:04:19 -0800
Subject: [PATCH 114/521] [SPARK-45963][SQL][DOCS][3.5] Restore documentation
 for DSv2 API

This PR cherry-picks https://github.com/apache/spark/pull/43855 to branch-3.5

---

### What changes were proposed in this pull request?

This PR restores the DSv2 documentation. https://github.com/apache/spark/pull/38392 mistakenly added `org/apache/spark/sql/connect` as a private that includes `org/apache/spark/sql/connector`.

### Why are the changes needed?

For end users to read DSv2 documentation.

### Does this PR introduce _any_ user-facing change?

Yes, it restores the DSv2 API documentation that used to be there https://spark.apache.org/docs/3.3.0/api/scala/org/apache/spark/sql/connector/catalog/index.html

### How was this patch tested?

Manually tested via:

```
SKIP_PYTHONDOC=1 SKIP_RDOC=1 SKIP_SQLDOC=1 bundle exec jekyll build
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43865 from HyukjinKwon/SPARK-45963-3.5.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala                                      | 2 +-
 .../spark/sql/connector/catalog/SupportsMetadataColumns.java  | 4 ++--
 .../spark/sql/connector/catalog/InMemoryBaseTable.scala       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 718f2bb28cec4..60d52368de458 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1401,7 +1401,7 @@ object Unidoc {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/io")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
-      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/connect")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/connect/")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive")))
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
index 894184dbcc82d..e42424268b44d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsMetadataColumns.java
@@ -58,8 +58,8 @@ public interface SupportsMetadataColumns extends Table {
    * Determines how this data source handles name conflicts between metadata and data columns.
    * <p>
    * If true, spark will automatically rename the metadata column to resolve the conflict. End users
-   * can reliably select metadata columns (renamed or not) with {@link Dataset.metadataColumn}, and
-   * internal code can use {@link MetadataAttributeWithLogicalName} to extract the logical name from
+   * can reliably select metadata columns (renamed or not) with {@code Dataset.metadataColumn}, and
+   * internal code can use {@code MetadataAttributeWithLogicalName} to extract the logical name from
    * a metadata attribute.
    * <p>
    * If false, the data column will hide the metadata column. It is recommended that Table
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
index a0a4d8bdee9f5..a309db341d8e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -619,9 +619,9 @@ class BufferedRows(val key: Seq[Any] = Seq.empty) extends WriterCommitMessage
 }
 
 /**
- * Theoretically, [[InternalRow]] returned by [[HasPartitionKey#partitionKey()]]
+ * Theoretically, `InternalRow` returned by `HasPartitionKey#partitionKey()`
  * does not need to implement equal and hashcode methods.
- * But [[GenericInternalRow]] implements equals and hashcode methods already. Here we override it
+ * But `GenericInternalRow` implements equals and hashcode methods already. Here we override it
  * to simulate that it has not been implemented to verify codes correctness.
  */
 case class PartitionInternalRow(keys: Array[Any])

From f3baf086acdf166445aef81181d13d4884d44e92 Mon Sep 17 00:00:00 2001
From: Deepayan Patra <deepayan.patra@databricks.com>
Date: Fri, 17 Nov 2023 13:17:43 -0800
Subject: [PATCH 115/521] [SPARK-43393][SQL][3.5] Address sequence expression
 overflow bug

### What changes were proposed in this pull request?
Spark has a (long-standing) overflow bug in the `sequence` expression.

Consider the following operations:
```
spark.sql("CREATE TABLE foo (l LONG);")
spark.sql(s"INSERT INTO foo VALUES (${Long.MaxValue});")
spark.sql("SELECT sequence(0, l) FROM foo;").collect()
```

The result of these operations will be:
```
Array[org.apache.spark.sql.Row] = Array([WrappedArray()])
```
an unintended consequence of overflow.

The sequence is applied to values `0` and `Long.MaxValue` with a step size of `1` which uses a length computation defined [here](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3451). In this calculation, with `start = 0`, `stop = Long.MaxValue`, and `step = 1`, the calculated `len` overflows to `Long.MinValue`. The computation, in binary looks like:

```
  0111111111111111111111111111111111111111111111111111111111111111
- 0000000000000000000000000000000000000000000000000000000000000000
------------------------------------------------------------------
  0111111111111111111111111111111111111111111111111111111111111111
/ 0000000000000000000000000000000000000000000000000000000000000001
------------------------------------------------------------------
  0111111111111111111111111111111111111111111111111111111111111111
+ 0000000000000000000000000000000000000000000000000000000000000001
------------------------------------------------------------------
  1000000000000000000000000000000000000000000000000000000000000000
```

The following [check](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3454) passes as the negative `Long.MinValue` is still `<= MAX_ROUNDED_ARRAY_LENGTH`. The following cast to `toInt` uses this representation and [truncates the upper bits](https://github.com/apache/spark/blob/16411188c7ba6cb19c46a2bd512b2485a4c03e2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L3457) resulting in an empty length of `0`.

Other overflows are similarly problematic.

This PR addresses the issue by checking numeric operations in the length computation for overflow.

### Why are the changes needed?
There is a correctness bug from overflow in the `sequence` expression.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Tests added in `CollectionExpressionsSuite.scala`.

Closes #43820 from thepinetree/spark-sequence-overflow-3.5.

Authored-by: Deepayan Patra <deepayan.patra@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/collectionOperations.scala    | 47 +++++++++++++------
 .../CollectionExpressionsSuite.scala          | 44 +++++++++++++++--
 2 files changed, 71 insertions(+), 20 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index ade4a6c5be722..3ddbe38fdedfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -22,6 +22,7 @@ import java.util.Comparator
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
+import org.apache.spark.SparkException.internalError
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
@@ -40,7 +41,6 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SQLOpenHashSet
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
-import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
 import org.apache.spark.unsafe.types.{ByteArray, CalendarInterval, UTF8String}
 
 /**
@@ -3080,6 +3080,34 @@ case class Sequence(
 }
 
 object Sequence {
+  private def prettyName: String = "sequence"
+
+  def sequenceLength(start: Long, stop: Long, step: Long): Int = {
+    try {
+      val delta = Math.subtractExact(stop, start)
+      if (delta == Long.MinValue && step == -1L) {
+        // We must special-case division of Long.MinValue by -1 to catch potential unchecked
+        // overflow in next operation. Division does not have a builtin overflow check. We
+        // previously special-case div-by-zero.
+        throw new ArithmeticException("Long overflow (Long.MinValue / -1)")
+      }
+      val len = if (stop == start) 1L else Math.addExact(1L, (delta / step))
+      if (len > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+        throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(len)
+      }
+      len.toInt
+    } catch {
+      // We handle overflows in the previous try block by raising an appropriate exception.
+      case _: ArithmeticException =>
+        val safeLen =
+          BigInt(1) + (BigInt(stop) - BigInt(start)) / BigInt(step)
+        if (safeLen > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+          throw QueryExecutionErrors.createArrayWithElementsExceedLimitError(safeLen)
+        }
+        throw internalError("Unreachable code reached.")
+      case e: Exception => throw e
+    }
+  }
 
   private type LessThanOrEqualFn = (Any, Any) => Boolean
 
@@ -3451,13 +3479,7 @@ object Sequence {
         || (estimatedStep == num.zero && start == stop),
       s"Illegal sequence boundaries: $start to $stop by $step")
 
-    val len = if (start == stop) 1L else 1L + (stop.toLong - start.toLong) / estimatedStep.toLong
-
-    require(
-      len <= MAX_ROUNDED_ARRAY_LENGTH,
-      s"Too long sequence: $len. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
-
-    len.toInt
+    sequenceLength(start.toLong, stop.toLong, estimatedStep.toLong)
   }
 
   private def genSequenceLengthCode(
@@ -3467,7 +3489,7 @@ object Sequence {
       step: String,
       estimatedStep: String,
       len: String): String = {
-    val longLen = ctx.freshName("longLen")
+    val calcFn = classOf[Sequence].getName + ".sequenceLength"
     s"""
        |if (!(($estimatedStep > 0 && $start <= $stop) ||
        |  ($estimatedStep < 0 && $start >= $stop) ||
@@ -3475,12 +3497,7 @@ object Sequence {
        |  throw new IllegalArgumentException(
        |    "Illegal sequence boundaries: " + $start + " to " + $stop + " by " + $step);
        |}
-       |long $longLen = $stop == $start ? 1L : 1L + ((long) $stop - $start) / $estimatedStep;
-       |if ($longLen > $MAX_ROUNDED_ARRAY_LENGTH) {
-       |  throw new IllegalArgumentException(
-       |    "Too long sequence: " + $longLen + ". Should be <= $MAX_ROUNDED_ARRAY_LENGTH");
-       |}
-       |int $len = (int) $longLen;
+       |int $len = $calcFn((long) $start, (long) $stop, (long) $estimatedStep);
        """.stripMargin
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index 1787f6ac72dd4..99eece31a1efc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds,
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.array.ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH
+import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
 
 class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -769,10 +769,6 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     // test sequence boundaries checking
 
-    checkExceptionInExpression[IllegalArgumentException](
-      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
-      EmptyRow, s"Too long sequence: 4294967296. Should be <= $MAX_ROUNDED_ARRAY_LENGTH")
-
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(0)), EmptyRow, "boundaries: 1 to 2 by 0")
     checkExceptionInExpression[IllegalArgumentException](
@@ -782,6 +778,44 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkExceptionInExpression[IllegalArgumentException](
       new Sequence(Literal(1), Literal(2), Literal(-1)), EmptyRow, "boundaries: 1 to 2 by -1")
 
+    // SPARK-43393: test Sequence overflow checking
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> (BigInt(Int.MaxValue) - BigInt { Int.MinValue } + 1).toString,
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(0L), Literal(Long.MaxValue), Literal(1L)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> (BigInt(Long.MaxValue) + 1).toString,
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(0L), Literal(Long.MinValue), Literal(-1L)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> ((0 - BigInt(Long.MinValue)) + 1).toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MinValue), Literal(Long.MaxValue), Literal(1L)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MaxValue), Literal(Long.MinValue), Literal(-1L)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+    checkErrorInExpression[SparkRuntimeException](
+      new Sequence(Literal(Long.MaxValue), Literal(-1L), Literal(-1L)),
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      parameters = Map(
+        "count" -> (BigInt(Long.MaxValue) - BigInt { -1L } + 1).toString,
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
+
     // test sequence with one element (zero step or equal start and stop)
 
     checkEvaluation(new Sequence(Literal(1), Literal(1), Literal(-1)), Seq(1))

From 96bfd8370c27baf5283646f2f93cb66ab70de844 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 20 Nov 2023 17:50:04 -0800
Subject: [PATCH 116/521] [SPARK-46012][CORE] EventLogFileReader should not
 read rolling logs if app status file is missing

### What changes were proposed in this pull request?

This PR aims to prevent `EventLogFileReader` from reading rolling event logs if `appStatus` is missing.

### Why are the changes needed?

Since Apache Spark 3.0.0, `appstatus_` is supposed to exist.

https://github.com/apache/spark/blob/839f0c98bd85a14eadad13f8aaac876275ded5a4/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileWriters.scala#L277-L283
### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43914 from dongjoon-hyun/SPARK-46012.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6ca1c67de082269b9337503bff5161f5a2d87225)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../deploy/history/EventLogFileReaders.scala  |  3 +-
 .../history/EventLogFileReadersSuite.scala    | 31 +++++++++++++++++++
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
index b21c67a2823af..714987a8eb873 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -119,7 +119,8 @@ object EventLogFileReader extends Logging {
     if (isSingleEventLog(status)) {
       Some(new SingleFileEventLogFileReader(fs, status.getPath, Option(status)))
     } else if (isRollingEventLogs(status)) {
-      if (fs.listStatus(status.getPath).exists(RollingEventLogFilesWriter.isEventLogFile)) {
+      if (fs.listStatus(status.getPath).exists(RollingEventLogFilesWriter.isEventLogFile) &&
+          fs.listStatus(status.getPath).exists(RollingEventLogFilesWriter.isAppStatusFile)) {
         Some(new RollingEventLogFilesFileReader(fs, status.getPath))
       } else {
         logDebug(s"Rolling event log directory have no event log file at ${status.getPath}")
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
index efb8393403043..f34f792881f90 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
@@ -229,6 +229,37 @@ class SingleFileEventLogFileReaderSuite extends EventLogFileReadersSuite {
 }
 
 class RollingEventLogFilesReaderSuite extends EventLogFileReadersSuite {
+  test("SPARK-46012: appStatus file should exist") {
+    withTempDir { dir =>
+      val appId = getUniqueApplicationId
+      val attemptId = None
+
+      val conf = getLoggingConf(testDirPath)
+      conf.set(EVENT_LOG_ENABLE_ROLLING, true)
+      conf.set(EVENT_LOG_ROLLING_MAX_FILE_SIZE.key, "10m")
+
+      val writer = createWriter(appId, attemptId, testDirPath.toUri, conf,
+        SparkHadoopUtil.get.newConfiguration(conf))
+
+      writer.start()
+      val dummyStr = "dummy" * 1024
+      writeTestEvents(writer, dummyStr, 1024 * 1024 * 20)
+      writer.stop()
+
+      // Verify a healthy rolling event log directory
+      val logPathCompleted = getCurrentLogPath(writer.logPath, isCompleted = true)
+      val readerOpt = EventLogFileReader(fileSystem, new Path(logPathCompleted))
+      assert(readerOpt.get.isInstanceOf[RollingEventLogFilesFileReader])
+      assert(readerOpt.get.listEventLogFiles.length === 3)
+
+      // Make unhealthy rolling event directory by removing appStatus file.
+      val appStatusFile = fileSystem.listStatus(new Path(logPathCompleted))
+        .find(RollingEventLogFilesWriter.isAppStatusFile).get.getPath
+      fileSystem.delete(appStatusFile, false)
+      assert(EventLogFileReader(fileSystem, new Path(logPathCompleted)).isEmpty)
+    }
+  }
+
   allCodecs.foreach { codecShortName =>
     test(s"rolling event log files - codec $codecShortName") {
       val appId = getUniqueApplicationId

From 24079adc8257871e88879796333b8f44633995b8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 20 Nov 2023 17:54:41 -0800
Subject: [PATCH 117/521] [SPARK-46014][SQL][TESTS] Run
 `RocksDBStateStoreStreamingAggregationSuite` on a dedicated JVM

### What changes were proposed in this pull request?

This PR aims to run `RocksDBStateStoreStreamingAggregationSuite` on a dedicated JVM to reduce the flakiness.

### Why are the changes needed?

`RocksDBStateStoreStreamingAggregationSuite` is flaky.
- https://github.com/apache/spark/actions/runs/6936862847/job/18869845206
- https://github.com/apache/spark/actions/runs/6926542106/job/18838877151
- https://github.com/apache/spark/actions/runs/6924927427/job/18834849433

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43916 from dongjoon-hyun/SPARK-46014.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 9bb1fe2a8410e6a0dbf73a420d8e9b359363b932)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 60d52368de458..75a0650b5fac2 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -580,6 +580,7 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite",
     "org.apache.spark.sql.kafka010.KafkaDelegationTokenSuite",
+    "org.apache.spark.sql.streaming.RocksDBStateStoreStreamingAggregationSuite",
     "org.apache.spark.shuffle.KubernetesLocalDiskShuffleDataIOSuite",
     "org.apache.spark.sql.hive.HiveScalaReflectionSuite"
   )

From a436736c02a689e4536acea16af6caea7be67fa7 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 21 Nov 2023 14:38:24 +0900
Subject: [PATCH 118/521] [MINOR][DOCS] Correct Python Spark Connect
 documentation about pip installation

### What changes were proposed in this pull request?

This PR fixes the Spark Connect documentation from `pyspark==3.5.0` to `pyspark[connect]==3.5.0`; otherwise it will fail to execute the example as is because of missing dependencies. This is sort of a followup of SPARK-44867.

https://github.com/apache/spark/blob/d31c8596cd714766892d1395e30358bd1cd3cb84/python/setup.py#L325-L332

### Why are the changes needed?

To guide users about using Spark Connect

### Does this PR introduce _any_ user-facing change?

Yes, this fixes the user-facing documentation for Python Spark Connect.

### How was this patch tested?

Manually checked with Markdown editor.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43919 from HyukjinKwon/SPARK-44867-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit df1280cb10ee71ea362a95705f355402e2bcaff2)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/spark-connect-overview.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/spark-connect-overview.md b/docs/spark-connect-overview.md
index 0673763f03bcc..9da559c37fdef 100644
--- a/docs/spark-connect-overview.md
+++ b/docs/spark-connect-overview.md
@@ -279,11 +279,11 @@ The connection may also be programmatically created using _SparkSession#builder_
 
 <div data-lang="python"  markdown="1">
 
-First, install PySpark with `pip install pyspark==3.5.0` or if building a packaged PySpark application/library,
+First, install PySpark with `pip install pyspark[connect]==3.5.0` or if building a packaged PySpark application/library,
 add it your setup.py file as:
 {% highlight python %}
 install_requires=[
-'pyspark==3.5.0'
+'pyspark[connect]==3.5.0'
 ]
 {% endhighlight %}
 

From ece4ebe575798bf92d8e6c2c454c62e6cbfecf01 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Tue, 21 Nov 2023 10:17:30 -0800
Subject: [PATCH 119/521] [SPARK-46033][SQL][TESTS] Fix flaky
 ArithmeticExpressionSuite

### What changes were proposed in this pull request?
The pr aims to fix flaky ArithmeticExpressionSuite.
https://github.com/panbingkun/spark/actions/runs/6940660146/job/18879997046
<img width="1000" alt="image" src="https://github.com/apache/spark/assets/15246973/9fe6050a-7a06-4110-9152-d4512a49b284">

### Why are the changes needed?
Fix bug.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
- Manually test.
- Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43935 from panbingkun/SPARK-46033.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit b7930e718f453f8a9d923ad57161a982f16ca8e8)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/expressions/ArithmeticExpressionSuite.scala   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index 2dc7e82f77226..7a80188d445de 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -326,7 +326,8 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
           val quotResult = Decimal(quotExact.setScale(quotType.scale, RoundingMode.HALF_UP))
           val quotExpected =
             if (quotResult.precision > DecimalType.MAX_PRECISION) null else quotResult
-          checkEvaluationOrException(quotActual, quotExpected.toLong)
+          checkEvaluationOrException(quotActual,
+            if (quotExpected == null) null else quotExpected.toLong)
         }
       }
 

From fcf55737490a5d6a1b491b4be4c9924492b8e740 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Tue, 21 Nov 2023 10:26:36 -0800
Subject: [PATCH 120/521] [SPARK-46019][SQL][TESTS] Fix
 `HiveThriftServer2ListenerSuite` and `ThriftServerPageSuite` to create
 `java.io.tmpdir` if it doesn't exist

### What changes were proposed in this pull request?
The pr aims to fix `HiveThriftServer2ListenerSuite` and `ThriftServerPageSuite` failed when there are running on local.
```
[info] ThriftServerPageSuite:
[info] - thriftserver page should load successfully *** FAILED *** (515 milliseconds)
[info]   java.lang.IllegalStateException: Could not initialize plugin: interface org.mockito.plugins.MockMaker (alternate: null)
[info]   at org.mockito.internal.configuration.plugins.PluginLoader$1.invoke(PluginLoader.java:84)
[info]   at jdk.proxy2/jdk.proxy2.$Proxy20.isTypeMockable(Unknown Source)
[info]   at org.mockito.internal.util.MockUtil.typeMockabilityOf(MockUtil.java:78)
[info]   at org.mockito.internal.util.MockCreationValidator.validateType(MockCreationValidator.java:22)
[info]   at org.mockito.internal.creation.MockSettingsImpl.validatedSettings(MockSettingsImpl.java:267)
[info]   at org.mockito.internal.creation.MockSettingsImpl.build(MockSettingsImpl.java:234)
[info]   at org.mockito.internal.MockitoCore.mock(MockitoCore.java:86)
[info]   at org.mockito.Mockito.mock(Mockito.java:2037)
[info]   at org.mockito.Mockito.mock(Mockito.java:2010)
[info]   at org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite.getStatusStore(ThriftServerPageSuite.scala:49)
```

It can be simply reproduced by running the following command:
```
build/sbt "hive-thriftserver/testOnly org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite" -Phive-thriftserver
build/sbt "hive-thriftserver/testOnly org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite" -Phive-thriftserver
```

### Why are the changes needed?
Fix tests failed.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually test:
```
build/sbt "hive-thriftserver/testOnly org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite" -Phive-thriftserver
build/sbt "hive-thriftserver/testOnly org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite" -Phive-thriftserver
```

After it:
```
[info] - listener events should store successfully (live = true) (1 second, 711 milliseconds)
[info] - listener events should store successfully (live = false) (6 milliseconds)
[info] - cleanup session if exceeds the threshold (live = true) (21 milliseconds)
[info] - cleanup session if exceeds the threshold (live = false) (3 milliseconds)
[info] - update execution info when jobstart event come after execution end event (9 milliseconds)
[info] - SPARK-31387 - listener update methods should not throw exception with unknown input (8 milliseconds)
[info] Run completed in 3 seconds, 734 milliseconds.
[info] Total number of tests run: 6
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 6, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 156 s (02:36), completed Nov 21, 2023, 1:57:21 PM
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #43921 from panbingkun/SPARK-46019.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit fdcd20f4b51c3ddddaae12f7d3f429e7b77c9f5e)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../ui/HiveThriftServer2ListenerSuite.scala            | 10 ++++++++++
 .../hive/thriftserver/ui/ThriftServerPageSuite.scala   | 10 ++++++++++
 2 files changed, 20 insertions(+)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
index f5167a4ea7377..62d97772bcbc1 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2ListenerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver.ui
 
+import java.io.File
 import java.util.Properties
 
 import org.mockito.Mockito.{mock, RETURNS_SMART_NULLS}
@@ -34,6 +35,15 @@ class HiveThriftServer2ListenerSuite extends SparkFunSuite with BeforeAndAfter {
 
   private var kvstore: ElementTrackingStore = _
 
+  protected override def beforeAll(): Unit = {
+    val tmpDirName = System.getProperty("java.io.tmpdir")
+    val tmpDir = new File(tmpDirName)
+    if (!tmpDir.exists()) {
+      tmpDir.mkdirs()
+    }
+    super.beforeAll()
+  }
+
   after {
     if (kvstore != null) {
       kvstore.close()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala
index d7e1852199639..1245e6740ebbe 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPageSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.hive.thriftserver.ui
 
+import java.io.File
 import java.util.{Calendar, Locale}
 import javax.servlet.http.HttpServletRequest
 
@@ -34,6 +35,15 @@ class ThriftServerPageSuite extends SparkFunSuite with BeforeAndAfter {
 
   private var kvstore: ElementTrackingStore = _
 
+  protected override def beforeAll(): Unit = {
+    val tmpDirName = System.getProperty("java.io.tmpdir")
+    val tmpDir = new File(tmpDirName)
+    if (!tmpDir.exists()) {
+      tmpDir.mkdirs()
+    }
+    super.beforeAll()
+  }
+
   after {
     if (kvstore != null) {
       kvstore.close()

From 8f52fd55d42045d4aadb2cb18c7c3f99ad75eb35 Mon Sep 17 00:00:00 2001
From: Mark Jarvin <mark.jarvin@databricks.com>
Date: Tue, 21 Nov 2023 11:38:31 -0800
Subject: [PATCH 121/521] [SPARK-44973][SQL] Fix
 `ArrayIndexOutOfBoundsException` in `conv()`

### What changes were proposed in this pull request?

Increase the size of the buffer allocated for the result of base conversion in `NumberConverter` to prevent ArrayIndexOutOfBoundsException when evaluating `conv(s"${Long.MinValue}", 10, -2)`.

### Why are the changes needed?

I don't think the ArrayIndexOutOfBoundsException is intended behaviour.

### Does this PR introduce _any_ user-facing change?

Users will no longer experience an ArrayIndexOutOfBoundsException for this specific set of arguments and will instead receive the expected base conversion.

### How was this patch tested?

New unit test cases

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43880 from markj-db/SPARK-44973.

Authored-by: Mark Jarvin <mark.jarvin@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 2ac8ff76a5169fe1f6cf130cc82738ba78bd8c65)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/catalyst/util/NumberConverter.scala     |  9 ++++++++-
 .../sql/catalyst/util/NumberConverterSuite.scala      |  6 ++++++
 .../org/apache/spark/sql/MathFunctionsSuite.scala     | 11 +++++++++++
 3 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 59765cde1f926..06d3910311b1c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -23,6 +23,13 @@ import org.apache.spark.unsafe.types.UTF8String
 
 object NumberConverter {
 
+  /**
+   * The output string has a max length of one char per bit in the 64-bit `Long` intermediate
+   * representation plus one char for the '-' sign.  This happens in practice when converting
+   * `Long.MinValue` with `toBase` equal to -2.
+   */
+  private final val MAX_OUTPUT_LENGTH = java.lang.Long.SIZE + 1
+
   /**
    * Decode v into value[].
    *
@@ -148,7 +155,7 @@ object NumberConverter {
     var (negative, first) = if (n(0) == '-') (true, 1) else (false, 0)
 
     // Copy the digits in the right side of the array
-    val temp = new Array[Byte](Math.max(n.length, 64))
+    val temp = new Array[Byte](Math.max(n.length, MAX_OUTPUT_LENGTH))
     var v: Long = -1
 
     System.arraycopy(n, first, temp, temp.length - n.length + first, n.length - first)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index c634c5b739b8f..3de331f90a6d3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -55,6 +55,12 @@ class NumberConverterSuite extends SparkFunSuite {
     checkConv("-10", 11, 7, "45012021522523134134555")
   }
 
+  test("SPARK-44973: conv must allocate enough space for all digits plus negative sign") {
+    checkConv(s"${Long.MinValue}", 10, -2, BigInt(Long.MinValue).toString(2))
+    checkConv((BigInt(Long.MaxValue) + 1).toString(16), 16, -2, BigInt(Long.MinValue).toString(2))
+    checkConv(BigInt(Long.MinValue).toString(16), 16, -2, BigInt(Long.MinValue).toString(2))
+  }
+
   test("byte to binary") {
     checkToBinary(0.toByte)
     checkToBinary(1.toByte)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 0adb89c3a9eaf..ba04e3b691a1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -262,6 +262,17 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-44973: conv must allocate enough space for all digits plus negative sign") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      val df = Seq(
+        ((BigInt(Long.MaxValue) + 1).toString(16)),
+        (BigInt(Long.MinValue).toString(16))
+      ).toDF("num")
+      checkAnswer(df.select(conv($"num", 16, -2)),
+        Seq(Row(BigInt(Long.MinValue).toString(2)), Row(BigInt(Long.MinValue).toString(2))))
+    }
+  }
+
   test("floor") {
     testOneToOneMathFunction(floor, (d: Double) => math.floor(d).toLong)
     // testOneToOneMathFunction does not validate the resulting data type

From 20657549acff80769af889eb3f0599df06956d3c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 21 Nov 2023 17:51:10 -0800
Subject: [PATCH 122/521] [SPARK-46012][CORE][FOLLOWUP] Invoke `fs.listStatus`
 once and reuse the result

### What changes were proposed in this pull request?

This PR is a follow-up of #43914 and aims to invoke `fs.listStatus` once and reuse the result.

### Why are the changes needed?

This will prevent the increase of the number of `listStatus` invocation .

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs with the existing test case.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43944 from dongjoon-hyun/SPARK-46012-2.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6be4a0358265fb81f68a27589f9940bd726c8ee7)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/deploy/history/EventLogFileReaders.scala    | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
index 714987a8eb873..8c3dda4727784 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventLogFileReaders.scala
@@ -119,8 +119,9 @@ object EventLogFileReader extends Logging {
     if (isSingleEventLog(status)) {
       Some(new SingleFileEventLogFileReader(fs, status.getPath, Option(status)))
     } else if (isRollingEventLogs(status)) {
-      if (fs.listStatus(status.getPath).exists(RollingEventLogFilesWriter.isEventLogFile) &&
-          fs.listStatus(status.getPath).exists(RollingEventLogFilesWriter.isAppStatusFile)) {
+      val files = fs.listStatus(status.getPath)
+      if (files.exists(RollingEventLogFilesWriter.isEventLogFile) &&
+          files.exists(RollingEventLogFilesWriter.isAppStatusFile)) {
         Some(new RollingEventLogFilesFileReader(fs, status.getPath))
       } else {
         logDebug(s"Rolling event log directory have no event log file at ${status.getPath}")

From 1f81e26e03803238ee6292762bcbee49e1a7c066 Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Wed, 22 Nov 2023 16:50:21 +0800
Subject: [PATCH 123/521] [SPARK-46006][YARN] YarnAllocator miss clean
 targetNumExecutorsPerResourceProfileId after YarnSchedulerBackend call stop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
We meet a case that user call sc.stop() after run all custom code, but stuck in some place.

Cause below situation

1. User call sc.stop()
2. sc.stop() stuck in some process, but SchedulerBackend.stop was called
3. Since yarn ApplicationMaster didn't finish， still call YarnAllocator.allocateResources()
4. Since driver endpoint stop new allocated executor failed to register
5. untll trigger Max number of executor failures
6. Caused by

Before call CoarseGrainedSchedulerBackend.stop() will call YarnSchedulerBackend.requestTotalExecutor() to clean request info
![image](https://github.com/apache/spark/assets/46485123/4a61fb40-5986-4ecc-9329-369187d5311d)

When YarnAllocator handle then empty resource request,  since resourceTotalExecutorsWithPreferedLocalities is empty, miss clean targetNumExecutorsPerResourceProfileId.
![image](https://github.com/apache/spark/assets/46485123/0133f606-e1d7-4db7-95fe-140c61379102)

### Why are the changes needed?
Fix bug

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #43906 from AngersZhuuuu/SPARK-46006.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 06635e25f170e61f6cfe53232d001993ec7d376d)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/deploy/yarn/YarnAllocator.scala     | 28 +++++++++++--------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index 19c06f957318b..f14fc9d5de461 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -384,19 +384,25 @@ private[yarn] class YarnAllocator(
     this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
     this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId
 
-    val res = resourceProfileToTotalExecs.map { case (rp, numExecs) =>
-      createYarnResourceForResourceProfile(rp)
-      if (numExecs != getOrUpdateTargetNumExecutorsForRPId(rp.id)) {
-        logInfo(s"Driver requested a total number of $numExecs executor(s) " +
-          s"for resource profile id: ${rp.id}.")
-        targetNumExecutorsPerResourceProfileId(rp.id) = numExecs
-        allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
-        true
-      } else {
-        false
+    if (resourceProfileToTotalExecs.isEmpty) {
+      targetNumExecutorsPerResourceProfileId.clear()
+      allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
+      true
+    } else {
+      val res = resourceProfileToTotalExecs.map { case (rp, numExecs) =>
+        createYarnResourceForResourceProfile(rp)
+        if (numExecs != getOrUpdateTargetNumExecutorsForRPId(rp.id)) {
+          logInfo(s"Driver requested a total number of $numExecs executor(s) " +
+            s"for resource profile id: ${rp.id}.")
+          targetNumExecutorsPerResourceProfileId(rp.id) = numExecs
+          allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
+          true
+        } else {
+          false
+        }
       }
+      res.exists(_ == true)
     }
-    res.exists(_ == true)
   }
 
   /**

From 18bcd020118a8efb49c03546ec501be6f0fc0852 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 21 Nov 2023 16:07:50 +0900
Subject: [PATCH 124/521] [MINOR][BUILD] Rename gprcVersion to grpcVersion in
 SparkBuild

### What changes were proposed in this pull request?

This PR aims to fix a typo.
```
-  val gprcVersion = "1.56.0"
+  val grpcVersion = "1.56.0"
```

There are two occurrences.
```
$ git grep gprc
project/SparkBuild.scala:  val gprcVersion = "1.56.0"
project/SparkBuild.scala:        "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion asProtocPlugin(),
```

### Why are the changes needed?

To fix a typo.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43923 from dongjoon-hyun/minor_grpc.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 project/SparkBuild.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 75a0650b5fac2..79b58deafde57 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -91,7 +91,7 @@ object BuildCommons {
   // SPARK-41247: needs to be consistent with `protobuf.version` in `pom.xml`.
   val protoVersion = "3.23.4"
   // GRPC version used for Spark Connect.
-  val gprcVersion = "1.56.0"
+  val grpcVersion = "1.56.0"
 }
 
 object SparkBuild extends PomBuild {
@@ -694,7 +694,7 @@ object SparkConnectCommon {
         SbtPomKeys.effectivePom.value.getProperties.get(
           "guava.failureaccess.version").asInstanceOf[String]
       Seq(
-        "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion asProtocPlugin(),
+        "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.grpcVersion asProtocPlugin(),
         "com.google.guava" % "guava" % guavaVersion,
         "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
         "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"

From c3671942abbd5d96d7d2c7496a882be91533838b Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 23 Nov 2023 20:11:43 +0900
Subject: [PATCH 125/521] [SPARK-46064][SQL][SS] Move out
 EliminateEventTimeWatermark to the analyzer and change to only take effect on
 resolved child

This PR proposes to move out EliminateEventTimeWatermark to the analyzer (one of the analysis rule), and also make a change to eliminate EventTimeWatermark node only when the child of EventTimeWatermark is "resolved".

Currently, we apply EliminateEventTimeWatermark immediately when withWatermark is called, which means the rule is applied immediately against the child, regardless whether child is resolved or not.

It is not an issue for the usage of DataFrame API initiated by read / readStream, because streaming sources have the flag isStreaming set to true even it is yet resolved. But mix-up of SQL and DataFrame API would expose the issue; we may not know the exact value of isStreaming flag on unresolved node and it is subject to change upon resolution.

No.

New UTs.

No.

Closes #43971 from HeartSaVioR/SPARK-46064.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit a703dace0aa400fa24b2bded1500f44ae7ac8db0)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  6 +++--
 .../sql/catalyst/analysis/AnalysisSuite.scala | 23 +++++++++++++++++++
 .../sql/catalyst/analysis/AnalysisTest.scala  |  2 ++
 .../sql/catalyst/analysis/TestRelations.scala | 14 +++++++++++
 .../optimizer/FilterPushdownSuite.scala       |  8 +++----
 .../scala/org/apache/spark/sql/Dataset.scala  |  3 +--
 6 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 02b9c2445433b..8fe87a05d02d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -348,7 +348,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     Batch("Cleanup", fixedPoint,
       CleanupAliases),
     Batch("HandleSpecialCommand", Once,
-      HandleSpecialCommand)
+      HandleSpecialCommand),
+    Batch("Remove watermark for batch query", Once,
+      EliminateEventTimeWatermark)
   )
 
   /**
@@ -3844,7 +3846,7 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
 object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
     _.containsPattern(EVENT_TIME_WATERMARK)) {
-    case EventTimeWatermark(_, _, child) if !child.isStreaming => child
+    case EventTimeWatermark(_, _, child) if child.resolved && !child.isStreaming => child
   }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 802b6d471a65c..843d51034aa2b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1668,4 +1668,27 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze)
     }
   }
+
+  test("SPARK-46064 Basic functionality of elimination for watermark node in batch query") {
+    val dfWithEventTimeWatermark = EventTimeWatermark($"ts",
+      IntervalUtils.fromIntervalString("10 seconds"), batchRelationWithTs)
+
+    val analyzed = getAnalyzer.executeAndCheck(dfWithEventTimeWatermark, new QueryPlanningTracker)
+
+    // EventTimeWatermark node is eliminated via EliminateEventTimeWatermark.
+    assert(!analyzed.exists(_.isInstanceOf[EventTimeWatermark]))
+  }
+
+  test("SPARK-46064 EliminateEventTimeWatermark properly handles the case where the child of " +
+    "EventTimeWatermark changes the isStreaming flag during resolution") {
+    // UnresolvedRelation which is batch initially and will be resolved as streaming
+    val dfWithTempView = UnresolvedRelation(TableIdentifier("streamingTable"))
+    val dfWithEventTimeWatermark = EventTimeWatermark($"ts",
+      IntervalUtils.fromIntervalString("10 seconds"), dfWithTempView)
+
+    val analyzed = getAnalyzer.executeAndCheck(dfWithEventTimeWatermark, new QueryPlanningTracker)
+
+    // EventTimeWatermark node is NOT eliminated.
+    assert(analyzed.exists(_.isInstanceOf[EventTimeWatermark]))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 997308c6ef44f..5152666473286 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -84,6 +84,8 @@ trait AnalysisTest extends PlanTest {
     createTempView(catalog, "TaBlE3", TestRelations.testRelation3, overrideIfExists = true)
     createGlobalTempView(catalog, "TaBlE4", TestRelations.testRelation4, overrideIfExists = true)
     createGlobalTempView(catalog, "TaBlE5", TestRelations.testRelation5, overrideIfExists = true)
+    createTempView(catalog, "streamingTable", TestRelations.streamingRelation,
+      overrideIfExists = true)
     new Analyzer(catalog) {
       override val extendedResolutionRules = extendedAnalysisRules
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
index d54237fcc1407..01b1a627e2871 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
@@ -68,4 +68,18 @@ object TestRelations {
 
   val mapRelation = LocalRelation(
     AttributeReference("map", MapType(IntegerType, IntegerType))())
+
+  val streamingRelation = LocalRelation(
+    Seq(
+      AttributeReference("a", IntegerType)(),
+      AttributeReference("ts", TimestampType)()
+    ),
+    isStreaming = true)
+
+  val batchRelationWithTs = LocalRelation(
+    Seq(
+      AttributeReference("a", IntegerType)(),
+      AttributeReference("ts", TimestampType)()
+    ),
+    isStreaming = false)
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index ee56d1fa9acd3..2ebb43d4fba3e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -1190,7 +1190,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: no pushdown on watermark attribute #1") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, $"b".timestamp, attrC)
+    val relation = LocalRelation(Seq(attrA, $"b".timestamp, attrC), Nil, isStreaming = true)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1205,7 +1205,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: no pushdown for nondeterministic filter") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, attrB, $"c".timestamp)
+    val relation = LocalRelation(Seq(attrA, attrB, $"c".timestamp), Nil, isStreaming = true)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1221,7 +1221,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: full pushdown") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, attrB, $"c".timestamp)
+    val relation = LocalRelation(Seq(attrA, attrB, $"c".timestamp), Nil, isStreaming = true)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
@@ -1236,7 +1236,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: no pushdown on watermark attribute #2") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation($"a".timestamp, attrB, attrC)
+    val relation = LocalRelation(Seq($"a".timestamp, attrB, attrC), Nil, isStreaming = true)
 
     val originalQuery = EventTimeWatermark($"a", interval, relation)
       .where($"a" === new java.sql.Timestamp(0) && $"b" === 10)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f53c6ddaa3880..c063af9381ff2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -774,8 +774,7 @@ class Dataset[T] private[sql](
     val parsedDelay = IntervalUtils.fromIntervalString(delayThreshold)
     require(!IntervalUtils.isNegative(parsedDelay),
       s"delay threshold ($delayThreshold) should not be negative.")
-    EliminateEventTimeWatermark(
-      EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan))
+    EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan)
   }
 
   /**

From a8552627cdc0945c52d2ae5115b1218c1254264d Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 23 Nov 2023 22:32:16 +0900
Subject: [PATCH 126/521] [SPARK-46062][SQL] Sync the isStreaming flag between
 CTE definition and reference

This PR proposes to sync the flag `isStreaming` from CTE definition to CTE reference.

The essential issue is that CTE reference node cannot determine the flag `isStreaming` by itself, and never be able to have a proper value and always takes the default as it does not have a parameter in constructor. The other flag `resolved` is handled, and we need to do the same for `isStreaming`.

Once we add the parameter to the constructor, we will also need to make sure the flag is in sync with CTE definition. We have a rule `ResolveWithCTE` doing the sync, hence we add the logic to sync the flag `isStreaming` as well.

The bug may impact some rules which behaves differently depending on isStreaming flag. It would no longer be a problem once CTE reference is replaced with CTE definition at some point in "optimization phase", but all rules in analyzer and optimizer being triggered before the rule takes effect may misbehave based on incorrect isStreaming flag.

No.

New UT.

No.

Closes #43966 from HeartSaVioR/SPARK-46062.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 43046631a5d4ac7201361a00473cc87fa52ab5a7)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../catalyst/analysis/CTESubstitution.scala   |  2 +-
 .../catalyst/analysis/ResolveWithCTE.scala    |  2 +-
 .../optimizer/MergeScalarSubqueries.scala     |  3 +-
 ...wnPredicatesAndPruneColumnsForCTEDef.scala |  2 +-
 .../plans/logical/basicLogicalOperators.scala |  1 +
 .../sql/catalyst/analysis/AnalysisSuite.scala | 27 +++++--
 .../MergeScalarSubqueriesSuite.scala          |  3 +-
 .../double-quoted-identifiers-enabled.sql.out |  2 +-
 .../analyzer-results/cte-nested.sql.out       | 38 +++++-----
 .../analyzer-results/cte-nonlegacy.sql.out    | 64 ++++++++---------
 .../sql-tests/analyzer-results/cte.sql.out    | 70 +++++++++----------
 .../analyzer-results/join-lateral.sql.out     |  4 +-
 .../non-excludable-rule.sql.out               |  2 +-
 .../postgreSQL/window_part3.sql.out           |  8 +--
 .../analyzer-results/postgreSQL/with.sql.out  | 12 ++--
 .../exists-subquery/exists-cte.sql.out        | 16 ++---
 .../in-subquery/in-multiple-columns.sql.out   |  4 +-
 .../subquery/in-subquery/in-with-cte.sql.out  | 64 ++++++++---------
 .../scalar-subquery-select.sql.out            | 20 +++---
 .../analyzer-results/transform.sql.out        |  4 +-
 .../analyzer-results/using-join.sql.out       |  4 +-
 .../sql/streaming/StreamingQuerySuite.scala   | 47 ++++++++++++-
 22 files changed, 231 insertions(+), 168 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index 954f5f19cd3ec..7321f5becdc48 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -263,7 +263,7 @@ object CTESubstitution extends Rule[LogicalPlan] {
             d.child
           } else {
             // Add a `SubqueryAlias` for hint-resolving rules to match relation names.
-            SubqueryAlias(table, CTERelationRef(d.id, d.resolved, d.output))
+            SubqueryAlias(table, CTERelationRef(d.id, d.resolved, d.output, d.isStreaming))
           }
         }.getOrElse(u)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala
index 78b776f12f074..f1077378b2d9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveWithCTE.scala
@@ -51,7 +51,7 @@ object ResolveWithCTE extends Rule[LogicalPlan] {
 
       case ref: CTERelationRef if !ref.resolved =>
         cteDefMap.get(ref.cteId).map { cteDef =>
-          CTERelationRef(cteDef.id, cteDef.resolved, cteDef.output)
+          CTERelationRef(cteDef.id, cteDef.resolved, cteDef.output, cteDef.isStreaming)
         }.getOrElse {
           ref
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
index 6184160829ba6..ff0bc5e66d755 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
@@ -381,7 +381,8 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] {
               val subqueryCTE = header.plan.asInstanceOf[CTERelationDef]
               GetStructField(
                 ScalarSubquery(
-                  CTERelationRef(subqueryCTE.id, _resolved = true, subqueryCTE.output),
+                  CTERelationRef(subqueryCTE.id, _resolved = true, subqueryCTE.output,
+                    subqueryCTE.isStreaming),
                   exprId = ssr.exprId),
                 ssr.headerIndex)
             } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index e643a1af363a1..aa13e6a67c510 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -141,7 +141,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] {
         cteDef
       }
 
-    case cteRef @ CTERelationRef(cteId, _, output, _) =>
+    case cteRef @ CTERelationRef(cteId, _, output, _, _) =>
       val (cteDef, _, _, newAttrSet) = cteMap(cteId)
       if (needsPruning(cteDef.child, newAttrSet)) {
         val indices = newAttrSet.toSeq.map(cteDef.output.indexOf)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 0e460706fc5b8..b4d7716a566e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -853,6 +853,7 @@ case class CTERelationRef(
     cteId: Long,
     _resolved: Boolean,
     override val output: Seq[Attribute],
+    override val isStreaming: Boolean,
     statsOpt: Option[Statistics] = None) extends LeafNode with MultiInstanceRelation {
 
   final override val nodePatterns: Seq[TreePattern] = Seq(CTE)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 843d51034aa2b..8e5329d986ef7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1517,7 +1517,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   test("SPARK-43030: deduplicate relations in CTE relation definitions") {
     val join = testRelation.as("left").join(testRelation.as("right"))
     val cteDef = CTERelationDef(join)
-    val cteRef = CTERelationRef(cteDef.id, false, Nil)
+    val cteRef = CTERelationRef(cteDef.id, false, Nil, false)
 
     withClue("flat CTE") {
       val plan = WithCTE(cteRef.select($"left.a"), Seq(cteDef)).analyze
@@ -1530,7 +1530,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     withClue("nested CTE") {
       val cteDef2 = CTERelationDef(WithCTE(cteRef.join(testRelation), Seq(cteDef)))
-      val cteRef2 = CTERelationRef(cteDef2.id, false, Nil)
+      val cteRef2 = CTERelationRef(cteDef2.id, false, Nil, false)
       val plan = WithCTE(cteRef2, Seq(cteDef2)).analyze
       val relations = plan.collect {
         case r: LocalRelation => r
@@ -1542,7 +1542,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-43030: deduplicate CTE relation references") {
     val cteDef = CTERelationDef(testRelation.select($"a"))
-    val cteRef = CTERelationRef(cteDef.id, false, Nil)
+    val cteRef = CTERelationRef(cteDef.id, false, Nil, false)
 
     withClue("single reference") {
       val plan = WithCTE(cteRef.where($"a" > 1), Seq(cteDef)).analyze
@@ -1565,7 +1565,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     withClue("CTE relation has duplicated attributes") {
       val cteDef = CTERelationDef(testRelation.select($"a", $"a"))
-      val cteRef = CTERelationRef(cteDef.id, false, Nil)
+      val cteRef = CTERelationRef(cteDef.id, false, Nil, false)
       val plan = WithCTE(cteRef.join(cteRef.select($"a")), Seq(cteDef)).analyze
       val refs = plan.collect {
         case r: CTERelationRef => r
@@ -1577,14 +1577,14 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     withClue("CTE relation has duplicate aliases") {
       val alias = Alias($"a", "x")()
       val cteDef = CTERelationDef(testRelation.select(alias, alias).where($"x" === 1))
-      val cteRef = CTERelationRef(cteDef.id, false, Nil)
+      val cteRef = CTERelationRef(cteDef.id, false, Nil, false)
       // Should not fail with the assertion failure: Found duplicate rewrite attributes.
       WithCTE(cteRef.join(cteRef), Seq(cteDef)).analyze
     }
 
     withClue("references in both CTE relation definition and main query") {
       val cteDef2 = CTERelationDef(cteRef.where($"a" > 2))
-      val cteRef2 = CTERelationRef(cteDef2.id, false, Nil)
+      val cteRef2 = CTERelationRef(cteDef2.id, false, Nil, false)
       val plan = WithCTE(cteRef.union(cteRef2), Seq(cteDef, cteDef2)).analyze
       val refs = plan.collect {
         case r: CTERelationRef => r
@@ -1691,4 +1691,19 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     // EventTimeWatermark node is NOT eliminated.
     assert(analyzed.exists(_.isInstanceOf[EventTimeWatermark]))
   }
+
+  test("SPARK-46062: isStreaming flag is synced from CTE definition to CTE reference") {
+    val cteDef = CTERelationDef(streamingRelation.select($"a", $"ts"))
+    // Intentionally marking the flag _resolved to false, so that analyzer has a chance to sync
+    // the flag isStreaming on syncing the flag _resolved.
+    val cteRef = CTERelationRef(cteDef.id, _resolved = false, Nil, isStreaming = false)
+    val plan = WithCTE(cteRef, Seq(cteDef)).analyze
+
+    val refs = plan.collect {
+      case r: CTERelationRef => r
+    }
+    assert(refs.length == 1)
+    assert(refs.head.resolved)
+    assert(refs.head.isStreaming)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
index 8af0e02855b12..13e138414781f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueriesSuite.scala
@@ -42,7 +42,8 @@ class MergeScalarSubqueriesSuite extends PlanTest {
   }
 
   private def extractorExpression(cteIndex: Int, output: Seq[Attribute], fieldIndex: Int) = {
-    GetStructField(ScalarSubquery(CTERelationRef(cteIndex, _resolved = true, output)), fieldIndex)
+    GetStructField(ScalarSubquery(
+      CTERelationRef(cteIndex, _resolved = true, output, isStreaming = false)), fieldIndex)
       .as("scalarsubquery()")
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
index 0a009a3a282f9..b45e461264e27 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
@@ -434,7 +434,7 @@ Project [a1#x AS a2#x]
                   :           +- OneRowRelation
                   +- Project [a#x]
                      +- SubqueryAlias v
-                        +- CTERelationRef xxxx, true, [a#x]
+                        +- CTERelationRef xxxx, true, [a#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
index d96965edde136..de0e6dfae2ce3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nested.sql.out
@@ -15,10 +15,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -37,7 +37,7 @@ Aggregate [max(c#x) AS max(c)#x]
       :           +- OneRowRelation
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x]
+            +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -54,7 +54,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- OneRowRelation
 
 
@@ -136,11 +136,11 @@ WithCTE
 :        :           :           +- OneRowRelation
 :        :           +- Project [c#x]
 :        :              +- SubqueryAlias t
-:        :                 +- CTERelationRef xxxx, true, [c#x]
+:        :                 +- CTERelationRef xxxx, true, [c#x], false
 :        +- OneRowRelation
 +- Project [scalarsubquery()#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [scalarsubquery()#x]
+      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false
 
 
 -- !query
@@ -189,7 +189,7 @@ WithCTE
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x]
+            +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -218,7 +218,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x]
+                  +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -253,7 +253,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x]
+                  +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -352,14 +352,14 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- OneRowRelation
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -420,15 +420,15 @@ WithCTE
 :  +- SubqueryAlias t3
 :     +- Project [1#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [1#x]
 :        +- SubqueryAlias t3
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -451,12 +451,12 @@ WithCTE
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x]
+            +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -484,19 +484,19 @@ WithCTE
 :  +- SubqueryAlias cte_inner_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias __auto_generated_subquery_name
 :           +- Project [1#x]
 :              +- SubqueryAlias cte_inner_inner
-:                 +- CTERelationRef xxxx, true, [1#x]
+:                 +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x]
+            +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
index bd9b443d01d0a..f1a302b06f2a8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte-nonlegacy.sql.out
@@ -15,10 +15,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -37,7 +37,7 @@ Aggregate [max(c#x) AS max(c)#x]
       :           +- OneRowRelation
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x]
+            +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -54,7 +54,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- OneRowRelation
 
 
@@ -106,10 +106,10 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [2#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [2#x]
+:           +- CTERelationRef xxxx, true, [2#x], false
 +- Project [2#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [2#x]
+      +- CTERelationRef xxxx, true, [2#x], false
 
 
 -- !query
@@ -144,11 +144,11 @@ WithCTE
 :        :           :           +- OneRowRelation
 :        :           +- Project [c#x]
 :        :              +- SubqueryAlias t
-:        :                 +- CTERelationRef xxxx, true, [c#x]
+:        :                 +- CTERelationRef xxxx, true, [c#x], false
 :        +- OneRowRelation
 +- Project [scalarsubquery()#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [scalarsubquery()#x]
+      +- CTERelationRef xxxx, true, [scalarsubquery()#x], false
 
 
 -- !query
@@ -181,15 +181,15 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [3#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [3#x]
+:           +- CTERelationRef xxxx, true, [3#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [3#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [3#x]
+:           +- CTERelationRef xxxx, true, [3#x], false
 +- Project [3#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [3#x]
+      +- CTERelationRef xxxx, true, [3#x], false
 
 
 -- !query
@@ -214,7 +214,7 @@ WithCTE
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [c#x]
          +- SubqueryAlias t
-            +- CTERelationRef xxxx, true, [c#x]
+            +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -243,7 +243,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x]
+                  +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -278,7 +278,7 @@ WithCTE
          +- SubqueryAlias __auto_generated_subquery_name
             +- Project [c#x]
                +- SubqueryAlias t
-                  +- CTERelationRef xxxx, true, [c#x]
+                  +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -301,7 +301,7 @@ WithCTE
    :     :        +- OneRowRelation
    :     +- Project [2#x]
    :        +- SubqueryAlias t
-   :           +- CTERelationRef xxxx, true, [2#x]
+   :           +- CTERelationRef xxxx, true, [2#x], false
    +- OneRowRelation
 
 
@@ -328,7 +328,7 @@ WithCTE
    :     :     :        +- OneRowRelation
    :     :     +- Project [2#x]
    :     :        +- SubqueryAlias t
-   :     :           +- CTERelationRef xxxx, true, [2#x]
+   :     :           +- CTERelationRef xxxx, true, [2#x], false
    :     +- OneRowRelation
    +- OneRowRelation
 
@@ -362,7 +362,7 @@ WithCTE
    :        :     :        +- OneRowRelation
    :        :     +- Project [3#x]
    :        :        +- SubqueryAlias t
-   :        :           +- CTERelationRef xxxx, true, [3#x]
+   :        :           +- CTERelationRef xxxx, true, [3#x], false
    :        +- OneRowRelation
    +- OneRowRelation
 
@@ -391,9 +391,9 @@ WithCTE
       :     :           +- OneRowRelation
       :     +- Project [c#x]
       :        +- SubqueryAlias t
-      :           +- CTERelationRef xxxx, true, [c#x]
+      :           +- CTERelationRef xxxx, true, [c#x], false
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [c#x]
+         +- CTERelationRef xxxx, true, [c#x], false
 
 
 -- !query
@@ -414,14 +414,14 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [1#x]
 :        +- SubqueryAlias t2
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- OneRowRelation
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -446,10 +446,10 @@ WithCTE
 :  +- SubqueryAlias t
 :     +- Project [2#x]
 :        +- SubqueryAlias aBC
-:           +- CTERelationRef xxxx, true, [2#x]
+:           +- CTERelationRef xxxx, true, [2#x], false
 +- Project [2#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [2#x]
+      +- CTERelationRef xxxx, true, [2#x], false
 
 
 -- !query
@@ -472,7 +472,7 @@ WithCTE
    :     :        +- OneRowRelation
    :     +- Project [2#x]
    :        +- SubqueryAlias aBC
-   :           +- CTERelationRef xxxx, true, [2#x]
+   :           +- CTERelationRef xxxx, true, [2#x], false
    +- OneRowRelation
 
 
@@ -496,15 +496,15 @@ WithCTE
 :  +- SubqueryAlias t3
 :     +- Project [1#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias t2
 :     +- Project [1#x]
 :        +- SubqueryAlias t3
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias t2
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -527,12 +527,12 @@ WithCTE
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x]
+            +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -560,19 +560,19 @@ WithCTE
 :  +- SubqueryAlias cte_inner_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias cte_outer
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias cte_inner
 :     +- Project [1#x]
 :        +- SubqueryAlias __auto_generated_subquery_name
 :           +- Project [1#x]
 :              +- SubqueryAlias cte_inner_inner
-:                 +- CTERelationRef xxxx, true, [1#x]
+:                 +- CTERelationRef xxxx, true, [1#x], false
 +- Project [1#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [1#x]
          +- SubqueryAlias cte_inner
-            +- CTERelationRef xxxx, true, [1#x]
+            +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
index b9a0f776528d8..e817aaf9e59ff 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
@@ -73,7 +73,7 @@ WithCTE
 :                       +- LocalRelation [id#x]
 +- Project [1#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [1#x]
+      +- CTERelationRef xxxx, true, [1#x], false
 
 
 -- !query
@@ -113,13 +113,13 @@ WithCTE
 :  +- SubqueryAlias t2
 :     +- Project [2 AS 2#x]
 :        +- SubqueryAlias t1
-:           +- CTERelationRef xxxx, true, [id#x]
+:           +- CTERelationRef xxxx, true, [id#x], false
 +- Project [id#x, 2#x]
    +- Join Cross
       :- SubqueryAlias t1
-      :  +- CTERelationRef xxxx, true, [id#x]
+      :  +- CTERelationRef xxxx, true, [id#x], false
       +- SubqueryAlias t2
-         +- CTERelationRef xxxx, true, [2#x]
+         +- CTERelationRef xxxx, true, [2#x], false
 
 
 -- !query
@@ -157,10 +157,10 @@ WithCTE
    +- Join Cross
       :- SubqueryAlias t1
       :  +- SubqueryAlias CTE1
-      :     +- CTERelationRef xxxx, true, [id#x]
+      :     +- CTERelationRef xxxx, true, [id#x], false
       +- SubqueryAlias t2
          +- SubqueryAlias CTE1
-            +- CTERelationRef xxxx, true, [id#x]
+            +- CTERelationRef xxxx, true, [id#x], false
 
 
 -- !query
@@ -176,7 +176,7 @@ WithCTE
 +- Project [x#x]
    +- Filter (x#x = 1)
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [x#x]
+         +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
@@ -192,7 +192,7 @@ WithCTE
 +- Project [x#x, y#x]
    +- Filter ((x#x = 1) AND (y#x = 2))
       +- SubqueryAlias t
-         +- CTERelationRef xxxx, true, [x#x, y#x]
+         +- CTERelationRef xxxx, true, [x#x, y#x], false
 
 
 -- !query
@@ -207,7 +207,7 @@ WithCTE
 :           +- OneRowRelation
 +- Project [x#x, x#x]
    +- SubqueryAlias t
-      +- CTERelationRef xxxx, true, [x#x, x#x]
+      +- CTERelationRef xxxx, true, [x#x, x#x], false
 
 
 -- !query
@@ -309,46 +309,46 @@ WithCTE
 :     +- Project [c8#x AS c7#x]
 :        +- Project [c8#x]
 :           +- SubqueryAlias w8
-:              +- CTERelationRef xxxx, true, [c8#x]
+:              +- CTERelationRef xxxx, true, [c8#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w6
 :     +- Project [c7#x AS c6#x]
 :        +- Project [c7#x]
 :           +- SubqueryAlias w7
-:              +- CTERelationRef xxxx, true, [c7#x]
+:              +- CTERelationRef xxxx, true, [c7#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w5
 :     +- Project [c6#x AS c5#x]
 :        +- Project [c6#x]
 :           +- SubqueryAlias w6
-:              +- CTERelationRef xxxx, true, [c6#x]
+:              +- CTERelationRef xxxx, true, [c6#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w4
 :     +- Project [c5#x AS c4#x]
 :        +- Project [c5#x]
 :           +- SubqueryAlias w5
-:              +- CTERelationRef xxxx, true, [c5#x]
+:              +- CTERelationRef xxxx, true, [c5#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w3
 :     +- Project [c4#x AS c3#x]
 :        +- Project [c4#x]
 :           +- SubqueryAlias w4
-:              +- CTERelationRef xxxx, true, [c4#x]
+:              +- CTERelationRef xxxx, true, [c4#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w2
 :     +- Project [c3#x AS c2#x]
 :        +- Project [c3#x]
 :           +- SubqueryAlias w3
-:              +- CTERelationRef xxxx, true, [c3#x]
+:              +- CTERelationRef xxxx, true, [c3#x], false
 :- CTERelationDef xxxx, false
 :  +- SubqueryAlias w1
 :     +- Project [c2#x AS c1#x]
 :        +- Project [c2#x]
 :           +- SubqueryAlias w2
-:              +- CTERelationRef xxxx, true, [c2#x]
+:              +- CTERelationRef xxxx, true, [c2#x], false
 +- Project [c1#x]
    +- SubqueryAlias w1
-      +- CTERelationRef xxxx, true, [c1#x]
+      +- CTERelationRef xxxx, true, [c1#x], false
 
 
 -- !query
@@ -384,7 +384,7 @@ WithCTE
 +- Project [42#x, 10#x]
    +- Join Inner
       :- SubqueryAlias same_name
-      :  +- CTERelationRef xxxx, true, [42#x]
+      :  +- CTERelationRef xxxx, true, [42#x], false
       +- SubqueryAlias same_name
          +- Project [10 AS 10#x]
             +- OneRowRelation
@@ -423,7 +423,7 @@ WithCTE
 :        +- OneRowRelation
 +- Project [x#x, typeof(x#x) AS typeof(x)#x]
    +- SubqueryAlias q
-      +- CTERelationRef xxxx, true, [x#x]
+      +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
@@ -483,7 +483,7 @@ Project [y#x]
       :        +- OneRowRelation
       +- Project [(x#x + 1) AS y#x]
          +- SubqueryAlias q
-            +- CTERelationRef xxxx, true, [x#x]
+            +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
@@ -497,7 +497,7 @@ Project [scalar-subquery#x [] AS scalarsubquery()#x]
 :     :        +- OneRowRelation
 :     +- Project [x#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [x#x]
+:           +- CTERelationRef xxxx, true, [x#x], false
 +- OneRowRelation
 
 
@@ -512,7 +512,7 @@ Project [1 IN (list#x []) AS (1 IN (listquery()))#x]
 :     :        +- OneRowRelation
 :     +- Project [1#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [1#x]
+:           +- CTERelationRef xxxx, true, [1#x], false
 +- OneRowRelation
 
 
@@ -560,14 +560,14 @@ WithCTE
       :- Join Inner
       :  :- SubqueryAlias x
       :  :  +- SubqueryAlias T1
-      :  :     +- CTERelationRef xxxx, true, [a#x]
+      :  :     +- CTERelationRef xxxx, true, [a#x], false
       :  +- SubqueryAlias y
       :     +- Project [b#x]
       :        +- SubqueryAlias T1
-      :           +- CTERelationRef xxxx, true, [b#x]
+      :           +- CTERelationRef xxxx, true, [b#x], false
       +- SubqueryAlias z
          +- SubqueryAlias T1
-            +- CTERelationRef xxxx, true, [a#x]
+            +- CTERelationRef xxxx, true, [a#x], false
 
 
 -- !query
@@ -595,9 +595,9 @@ WithCTE
       +- Project [c#x, a#x]
          +- Join Inner
             :- SubqueryAlias ttTT
-            :  +- CTERelationRef xxxx, true, [c#x]
+            :  +- CTERelationRef xxxx, true, [c#x], false
             +- SubqueryAlias tttT_2
-               +- CTERelationRef xxxx, true, [a#x]
+               +- CTERelationRef xxxx, true, [a#x], false
 
 
 -- !query
@@ -613,7 +613,7 @@ Project [scalar-subquery#x [x#x] AS scalarsubquery(x)#x]
 :     :        +- OneRowRelation
 :     +- Project [x#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [x#x]
+:           +- CTERelationRef xxxx, true, [x#x], false
 +- SubqueryAlias T
    +- Project [1 AS x#x, 2 AS y#x]
       +- OneRowRelation
@@ -632,7 +632,7 @@ Project [scalar-subquery#x [x#x && y#x] AS scalarsubquery(x, y)#x]
 :     :        +- OneRowRelation
 :     +- Project [((outer(x#x) + outer(y#x)) + z#x) AS ((outer(T.x) + outer(T.y)) + z)#x]
 :        +- SubqueryAlias q
-:           +- CTERelationRef xxxx, true, [z#x]
+:           +- CTERelationRef xxxx, true, [z#x], false
 +- SubqueryAlias T
    +- Project [1 AS x#x, 2 AS y#x]
       +- OneRowRelation
@@ -652,12 +652,12 @@ WithCTE
 :  +- SubqueryAlias q2
 :     +- Project [x#x]
 :        +- SubqueryAlias q1
-:           +- CTERelationRef xxxx, true, [x#x]
+:           +- CTERelationRef xxxx, true, [x#x], false
 +- Project [x#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [x#x]
          +- SubqueryAlias q2
-            +- CTERelationRef xxxx, true, [x#x]
+            +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
@@ -674,12 +674,12 @@ WithCTE
 :  +- SubqueryAlias q1
 :     +- Project [(x#x + 1) AS (x + 1)#x]
 :        +- SubqueryAlias q1
-:           +- CTERelationRef xxxx, true, [x#x]
+:           +- CTERelationRef xxxx, true, [x#x], false
 +- Project [(x + 1)#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- Project [(x + 1)#x]
          +- SubqueryAlias q1
-            +- CTERelationRef xxxx, true, [(x + 1)#x]
+            +- CTERelationRef xxxx, true, [(x + 1)#x], false
 
 
 -- !query
@@ -720,9 +720,9 @@ WithCTE
       :  +- Aggregate [max(j#x) AS max(j)#x]
       :     +- SubqueryAlias cte2
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [j#x]
+      :           +- CTERelationRef xxxx, true, [j#x], false
       +- SubqueryAlias cte1
-         +- CTERelationRef xxxx, true, [j#x]
+         +- CTERelationRef xxxx, true, [j#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index 4c032b7cbf9a2..2c7b31f62c6f4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -1310,10 +1310,10 @@ WithCTE
 :           :                 +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
 :           :                    +- LocalRelation [col1#x, col2#x]
 :           +- SubqueryAlias cte1
-:              +- CTERelationRef xxxx, true, [c1#x]
+:              +- CTERelationRef xxxx, true, [c1#x], false
 +- Project [c1#x, c2#x]
    +- SubqueryAlias cte2
-      +- CTERelationRef xxxx, true, [c1#x, c2#x]
+      +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
index 305a59f01e443..b80bed6f7c2aa 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/non-excludable-rule.sql.out
@@ -47,7 +47,7 @@ WithCTE
    +- Filter (id#xL > scalar-subquery#x [])
       :  +- Aggregate [max(id#xL) AS max(id)#xL]
       :     +- SubqueryAlias tmp
-      :        +- CTERelationRef xxxx, true, [id#xL]
+      :        +- CTERelationRef xxxx, true, [id#xL], false
       +- Range (0, 3, step=1, splits=None)
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
index 6b6a37b4e7fb4..6698d1fb083f0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part3.sql.out
@@ -98,7 +98,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL]
+               +- CTERelationRef xxxx, true, [x#xL], false
 
 
 -- !query
@@ -120,7 +120,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL]
+               +- CTERelationRef xxxx, true, [x#xL], false
 
 
 -- !query
@@ -153,7 +153,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, 1)) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL]
+               +- CTERelationRef xxxx, true, [x#xL], false
 
 
 -- !query
@@ -186,7 +186,7 @@ WithCTE
       +- Window [sum(x#xL) windowspecdefinition(x#xL ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(-1 as bigint), cast(1 as bigint))) AS sum(x) OVER (ORDER BY x ASC NULLS FIRST RANGE BETWEEN (- 1) FOLLOWING AND 1 FOLLOWING)#xL], [x#xL ASC NULLS FIRST]
          +- Project [x#xL]
             +- SubqueryAlias cte
-               +- CTERelationRef xxxx, true, [x#xL]
+               +- CTERelationRef xxxx, true, [x#xL], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
index c978c583152c5..b3ce967f2a6b5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/with.sql.out
@@ -12,10 +12,10 @@ WithCTE
 +- Project [x#x, y#x, x#x, y#x]
    +- Join Inner
       :- SubqueryAlias q1
-      :  +- CTERelationRef xxxx, true, [x#x, y#x]
+      :  +- CTERelationRef xxxx, true, [x#x, y#x], false
       +- SubqueryAlias q2
          +- SubqueryAlias q1
-            +- CTERelationRef xxxx, true, [x#x, y#x]
+            +- CTERelationRef xxxx, true, [x#x, y#x], false
 
 
 -- !query
@@ -194,7 +194,7 @@ WithCTE
    +- SubqueryAlias q
       +- Project [foo#x]
          +- SubqueryAlias cte
-            +- CTERelationRef xxxx, true, [foo#x]
+            +- CTERelationRef xxxx, true, [foo#x], false
 
 
 -- !query
@@ -222,13 +222,13 @@ WithCTE
 :                 +- Union false, false
 :                    :- Project [2#x]
 :                    :  +- SubqueryAlias innermost
-:                    :     +- CTERelationRef xxxx, true, [2#x]
+:                    :     +- CTERelationRef xxxx, true, [2#x], false
 :                    +- Project [3 AS 3#x]
 :                       +- OneRowRelation
 +- Sort [x#x ASC NULLS FIRST], true
    +- Project [x#x]
       +- SubqueryAlias outermost
-         +- CTERelationRef xxxx, true, [x#x]
+         +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
@@ -418,7 +418,7 @@ WithCTE
 :        +- OneRowRelation
 +- Project [x#x]
    +- SubqueryAlias ordinality
-      +- CTERelationRef xxxx, true, [x#x]
+      +- CTERelationRef xxxx, true, [x#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
index 2cd6ba5356371..cab83b2649974 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-cte.sql.out
@@ -133,7 +133,7 @@ WithCTE
       :     +- Filter (outer(emp_name#x) = emp_name#x)
       :        +- SubqueryAlias b
       :           +- SubqueryAlias bonus_cte
-      :              +- CTERelationRef xxxx, true, [emp_name#x, bonus_amt#x]
+      :              +- CTERelationRef xxxx, true, [emp_name#x, bonus_amt#x], false
       +- SubqueryAlias a
          +- SubqueryAlias bonus
             +- View (`BONUS`, [emp_name#x,bonus_amt#x])
@@ -189,10 +189,10 @@ WithCTE
       :        +- Join Inner, (dept_id#x = dept_id#x)
       :           :- SubqueryAlias a
       :           :  +- SubqueryAlias emp_cte
-      :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+      :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
       :           +- SubqueryAlias b
       :              +- SubqueryAlias dept_cte
-      :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x]
+      :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x,bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
@@ -253,10 +253,10 @@ WithCTE
          :        +- Join LeftOuter, (dept_id#x = dept_id#x)
          :           :- SubqueryAlias a
          :           :  +- SubqueryAlias emp_cte
-         :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+         :           :     +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
          :           +- SubqueryAlias b
          :              +- SubqueryAlias dept_cte
-         :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x]
+         :                 +- CTERelationRef xxxx, true, [dept_id#x, dept_name#x, state#x], false
          +- Join Inner
             :- Join Inner
             :  :- SubqueryAlias b
@@ -268,7 +268,7 @@ WithCTE
             :  :                 +- LocalRelation [emp_name#x, bonus_amt#x]
             :  +- SubqueryAlias e
             :     +- SubqueryAlias emp_cte
-            :        +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+            :        +- CTERelationRef xxxx, true, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x], false
             +- SubqueryAlias d
                +- SubqueryAlias dept
                   +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
@@ -322,7 +322,7 @@ WithCTE
       :     +- Filter (count(1)#xL > cast(1 as bigint))
       :        +- Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x, count(1) AS count(1)#xL]
       :           +- SubqueryAlias empdept
-      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x]
+      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x,bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
@@ -375,7 +375,7 @@ WithCTE
       :     +- Filter (count(1)#xL < cast(1 as bigint))
       :        +- Aggregate [dept_id#x], [dept_id#x, max(salary#x) AS max(salary)#x, count(1) AS count(1)#xL]
       :           +- SubqueryAlias empdept
-      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x]
+      :              +- CTERelationRef xxxx, true, [id#x, salary#x, emp_name#x, dept_id#x], false
       +- SubqueryAlias bonus
          +- View (`BONUS`, [emp_name#x,bonus_amt#x])
             +- Project [cast(emp_name#x as string) AS emp_name#x, cast(bonus_amt#x as double) AS bonus_amt#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
index ab16f4b9d687c..1717e553f5c3c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-multiple-columns.sql.out
@@ -330,7 +330,7 @@ WithCTE
       +- Project [t1a#x, t1b#x, t1a#x, t1b#x]
          +- Join Inner, (t1b#x = t1b#x)
             :- SubqueryAlias cte1
-            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
             +- SubqueryAlias cte2
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
index 9d82c707177b7..6d0a944bfcfe2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-with-cte.sql.out
@@ -138,7 +138,7 @@ WithCTE
       :  +- Project [t1b#x]
       :     +- Filter (cast(t1b#x as int) > 0)
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x,t1b#x,t1c#x,t1d#xL,t1e#x,t1f#x,t1g#x,t1h#x,t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -197,21 +197,21 @@ WithCTE
          :        :  :     :- Project [t1b#x]
          :        :  :     :  +- Filter (cast(t1b#x as int) > 0)
          :        :  :     :     +- SubqueryAlias cte1
-         :        :  :     :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :        :  :     :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          :        :  :     +- Project [t1b#x]
          :        :  :        +- Filter (cast(t1b#x as int) > 5)
          :        :  :           +- SubqueryAlias cte1
-         :        :  :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :        :  :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          :        :  +- Intersect false
          :        :     :- Project [t1b#x]
          :        :     :  +- SubqueryAlias cte1
-         :        :     :     +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :        :     :     +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          :        :     +- Project [t1b#x]
          :        :        +- SubqueryAlias cte1
-         :        :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :        :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          :        +- Project [t1b#x]
          :           +- SubqueryAlias cte1
-         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          +- SubqueryAlias t1
             +- View (`t1`, [t1a#x,t1b#x,t1c#x,t1d#xL,t1e#x,t1f#x,t1g#x,t1h#x,t1i#x])
                +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -268,22 +268,22 @@ WithCTE
       :        :  :  :- Join FullOuter, (t1c#x = t1c#x)
       :        :  :  :  :- Join Inner, (t1b#x > t1b#x)
       :        :  :  :  :  :- SubqueryAlias cte1
-      :        :  :  :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :        :  :  :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       :        :  :  :  :  +- SubqueryAlias cte2
       :        :  :  :  :     +- SubqueryAlias cte1
-      :        :  :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :        :  :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       :        :  :  :  +- SubqueryAlias cte3
       :        :  :  :     +- SubqueryAlias cte1
-      :        :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :        :  :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       :        :  :  +- SubqueryAlias cte4
       :        :  :     +- SubqueryAlias cte1
-      :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       :        :  +- SubqueryAlias cte5
       :        :     +- SubqueryAlias cte1
-      :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       :        +- SubqueryAlias cte6
       :           +- SubqueryAlias cte1
-      :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x]
+      :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x], false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x,t1b#x,t1c#x,t1d#xL,t1e#x,t1f#x,t1g#x,t1h#x,t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -354,16 +354,16 @@ WithCTE
             :- Join FullOuter, (t1a#x = t1a#x)
             :  :- Join Inner, ((cast(t1b#x as int) > 5) AND (t1a#x = t1a#x))
             :  :  :- SubqueryAlias cte1
-            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
             :  :  +- SubqueryAlias cte2
             :  :     +- SubqueryAlias cte1
-            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
             :  +- SubqueryAlias cte3
             :     +- SubqueryAlias cte1
-            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
             +- SubqueryAlias cte4
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
 
 
 -- !query
@@ -424,10 +424,10 @@ WithCTE
          +- Project [t1a#x, t1b#x]
             +- Join Inner, (t1h#x >= t1h#x)
                :- SubqueryAlias cte1
-               :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x]
+               :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false
                +- SubqueryAlias cte2
                   +- SubqueryAlias cte1
-                     +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x]
+                     +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1h#x], false
 
 
 -- !query
@@ -485,16 +485,16 @@ WithCTE
             :- Join RightOuter, (t1b#x = t1b#x)
             :  :- Join Inner, (t1a#x = t1a#x)
             :  :  :- SubqueryAlias cte1
-            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x]
+            :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
             :  :  +- SubqueryAlias cte2
             :  :     +- SubqueryAlias cte1
-            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x]
+            :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
             :  +- SubqueryAlias cte3
             :     +- SubqueryAlias cte1
-            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x]
+            :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
             +- SubqueryAlias cte4
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x]
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x], false
 
 
 -- !query
@@ -538,10 +538,10 @@ WithCTE
       +- Project [t1a#x, t1b#x]
          +- Join RightOuter, (t1a#x = t1a#x)
             :- SubqueryAlias cte1
-            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+            :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
             +- SubqueryAlias cte2
                +- SubqueryAlias cte1
-                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+                  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
 
 
 -- !query
@@ -599,15 +599,15 @@ WithCTE
          :        :           +- SubqueryAlias t1
          :        :              +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
          :        +- SubqueryAlias cte1
-         :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+         :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
          +- SubqueryAlias s
             +- Project [t1b#x]
                +- Join LeftOuter, (t1b#x = t1b#x)
                   :- SubqueryAlias cte1
-                  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+                  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
                   +- SubqueryAlias cte2
                      +- SubqueryAlias cte1
-                        +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+                        +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
 
 
 -- !query
@@ -642,7 +642,7 @@ WithCTE
       :  +- Project [t1b#x]
       :     +- Filter (cast(t1b#x as int) < 0)
       :        +- SubqueryAlias cte1
-      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x]
+      :           +- CTERelationRef xxxx, true, [t1a#x, t1b#x], false
       +- SubqueryAlias t1
          +- View (`t1`, [t1a#x,t1b#x,t1c#x,t1d#xL,t1e#x,t1f#x,t1g#x,t1h#x,t1i#x])
             +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
@@ -722,16 +722,16 @@ WithCTE
          :        :- Join RightOuter, (t1b#x = t1b#x)
          :        :  :- Join Inner, (t1a#x = t1a#x)
          :        :  :  :- SubqueryAlias cte1
-         :        :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x]
+         :        :  :  :  +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
          :        :  :  +- SubqueryAlias cte2
          :        :  :     +- SubqueryAlias cte1
-         :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x]
+         :        :  :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
          :        :  +- SubqueryAlias cte3
          :        :     +- SubqueryAlias cte1
-         :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x]
+         :        :        +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
          :        +- SubqueryAlias cte4
          :           +- SubqueryAlias cte1
-         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x]
+         :              +- CTERelationRef xxxx, true, [t1a#x, t1b#x, t1c#x, t1d#xL, t1h#x], false
          +- SubqueryAlias t1
             +- View (`t1`, [t1a#x,t1b#x,t1c#x,t1d#xL,t1e#x,t1f#x,t1g#x,t1h#x,t1i#x])
                +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as double) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index cb41f7cdc4557..c7271d8b85628 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -623,7 +623,7 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#x]
 :     :        +- OneRowRelation
 :     +- Project [(a#x + outer(c1#x)) AS (a + outer(t1.c1))#x]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [a#x]
+:           +- CTERelationRef xxxx, true, [a#x], false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x,c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -647,7 +647,7 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :     :                       +- LocalRelation [c1#x, c2#x]
 :     +- Aggregate [sum(c2#x) AS sum(c2)#xL]
 :        +- SubqueryAlias t
-:           +- CTERelationRef xxxx, true, [c1#x, c2#x]
+:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x,c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -677,10 +677,10 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :     :     +- Project [c1#x, c2#x]
 :     :        +- Filter (outer(c1#x) = c1#x)
 :     :           +- SubqueryAlias t3
-:     :              +- CTERelationRef xxxx, true, [c1#x, c2#x]
+:     :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 :     +- Aggregate [sum(c2#x) AS sum(c2)#xL]
 :        +- SubqueryAlias t4
-:           +- CTERelationRef xxxx, true, [c1#x, c2#x]
+:           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x,c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -713,10 +713,10 @@ Project [c1#x, scalar-subquery#x [c1#x] AS scalarsubquery(c1)#xL]
 :                    +- Union false, false
 :                       :- Project [c1#x, c2#x]
 :                       :  +- SubqueryAlias t
-:                       :     +- CTERelationRef xxxx, true, [c1#x, c2#x]
+:                       :     +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 :                       +- Project [c2#x, c1#x]
 :                          +- SubqueryAlias t
-:                             +- CTERelationRef xxxx, true, [c1#x, c2#x]
+:                             +- CTERelationRef xxxx, true, [c1#x, c2#x], false
 +- SubqueryAlias t1
    +- View (`t1`, [c1#x,c2#x])
       +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -756,9 +756,9 @@ WithCTE
       :           :  +- Aggregate [sum(c2#x) AS sum(c2)#xL]
       :           :     +- Filter (c1#x = outer(c1#x))
       :           :        +- SubqueryAlias t
-      :           :           +- CTERelationRef xxxx, true, [c1#x, c2#x]
+      :           :           +- CTERelationRef xxxx, true, [c1#x, c2#x], false
       :           +- SubqueryAlias v
-      :              +- CTERelationRef xxxx, true, [c1#x, c2#x]
+      :              +- CTERelationRef xxxx, true, [c1#x, c2#x], false
       +- SubqueryAlias t1
          +- View (`t1`, [c1#x,c2#x])
             +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -779,7 +779,7 @@ WithCTE
    :  +- Project [a#x]
    :     +- Filter (a#x = outer(c1#x))
    :        +- SubqueryAlias t
-   :           +- CTERelationRef xxxx, true, [a#x]
+   :           +- CTERelationRef xxxx, true, [a#x], false
    +- SubqueryAlias t1
       +- View (`t1`, [c1#x,c2#x])
          +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as int) AS c2#x]
@@ -1027,7 +1027,7 @@ WithCTE
    :  +- Aggregate [sum(1) AS sum(1)#xL]
    :     +- Filter ((a#x = cast(outer(col#x) as int)) OR (upper(cast(outer(col#x) as string)) = Y))
    :        +- SubqueryAlias T
-   :           +- CTERelationRef xxxx, true, [a#x]
+   :           +- CTERelationRef xxxx, true, [a#x], false
    +- SubqueryAlias foo
       +- Project [null AS col#x]
          +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
index cda76f716a8a8..ceca433a1c915 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
@@ -888,10 +888,10 @@ WithCTE
    +- Join Inner, (b#x = b#x)
       :- SubqueryAlias t1
       :  +- SubqueryAlias temp
-      :     +- CTERelationRef xxxx, true, [b#x]
+      :     +- CTERelationRef xxxx, true, [b#x], false
       +- SubqueryAlias t2
          +- SubqueryAlias temp
-            +- CTERelationRef xxxx, true, [b#x]
+            +- CTERelationRef xxxx, true, [b#x], false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
index 0fe7254d7348c..97410d3cdd369 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/using-join.sql.out
@@ -833,6 +833,6 @@ WithCTE
          +- Project [coalesce(key#x, key#x) AS key#x, key#x, key#x, key#x]
             +- Join FullOuter, (key#x = key#x)
                :- SubqueryAlias t1
-               :  +- CTERelationRef xxxx, true, [key#x]
+               :  +- CTERelationRef xxxx, true, [key#x], false
                +- SubqueryAlias t2
-                  +- CTERelationRef xxxx, true, [key#x]
+                  +- CTERelationRef xxxx, true, [key#x], false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 4a6325eb06074..8565056cda6fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -39,7 +39,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset, Row, SaveMode}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
-import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.catalyst.plans.logical.{CTERelationDef, CTERelationRef, LocalRelation}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Complete
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.connector.read.InputPartition
@@ -1318,6 +1318,51 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
   }
 
+  test("SPARK-46062: streaming query reading from CTE, which refers to temp view from " +
+    "streaming source") {
+    val inputStream = MemoryStream[Int]
+    inputStream.toDF().createOrReplaceTempView("tv")
+
+    val df = spark.sql(
+      """
+        |WITH w as (
+        |  SELECT * FROM tv
+        |)
+        |SELECT value from w
+        |""".stripMargin)
+
+    testStream(df)(
+      AddData(inputStream, 1, 2, 3),
+      CheckAnswer(1, 2, 3),
+      Execute { q =>
+        var isStreamingForCteDef: Option[Boolean] = None
+        var isStreamingForCteRef: Option[Boolean] = None
+
+        q.analyzedPlan.foreach {
+          case d: CTERelationDef =>
+            assert(d.resolved, "The definition node must be resolved after analysis.")
+            isStreamingForCteDef = Some(d.isStreaming)
+
+          case d: CTERelationRef =>
+            assert(d.resolved, "The reference node must be marked as resolved after analysis.")
+            isStreamingForCteRef = Some(d.isStreaming)
+
+          case _ =>
+        }
+
+        assert(isStreamingForCteDef.isDefined && isStreamingForCteRef.isDefined,
+          "Both definition and reference for CTE should be available in analyzed plan.")
+
+        assert(isStreamingForCteDef.get, "Expected isStreaming=true for CTE definition, but " +
+          "isStreaming is set to false.")
+
+        assert(isStreamingForCteDef === isStreamingForCteRef,
+          "isStreaming flag should be carried over from definition to reference, " +
+            s"definition: ${isStreamingForCteDef.get}, reference: ${isStreamingForCteRef.get}.")
+      }
+    )
+  }
+
   private def checkExceptionMessage(df: DataFrame): Unit = {
     withTempDir { outputDir =>
       withTempDir { checkpointDir =>

From 351a5f8c004a449013ab25acbcfdd85e9e7868b8 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon.lee@databricks.com>
Date: Fri, 24 Nov 2023 19:38:31 +0900
Subject: [PATCH 127/521] [SPARK-46016][DOCS][PS] Fix pandas API support list
 properly

### What changes were proposed in this pull request?

This PR proposes to fix a critical issue in the [Supported pandas API documentation](https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/supported_pandas_api.html) where many essential APIs such as `DataFrame.max`, `DataFrame.min`, `DataFrame.mean`, `and DataFrame.median`, etc. were incorrectly marked as not implemented - marked as "N" - as below:

<img width="291" alt="Screenshot 2023-11-24 at 12 37 49 PM" src="https://github.com/apache/spark/assets/44108233/95c5785c-711c-400c-b2ec-0db034e90fd8">

 The root cause of this issue was that the script used to generate the support list excluded functions inherited from parent classes. For instance, `CategoricalIndex.max` is actually supported by inheriting the `Index` class but was not directly implemented in `CategoricalIndex`, leading to it being marked as unsupported:

<img width="397" alt="Screenshot 2023-11-24 at 12 30 08 PM" src="https://github.com/apache/spark/assets/44108233/90e92996-a88a-4a20-bb0c-4909097e2688">

### Why are the changes needed?

The current documentation inaccurately represents the state of supported pandas API, which could significantly hinder user experience and adoption. By correcting these inaccuracies, we ensure that the documentation reflects the true capabilities of Pandas API on Spark, providing users with reliable and accurate information.

### Does this PR introduce _any_ user-facing change?

No. This PR only updates the documentation to accurately reflect the current state of supported pandas API.

### How was this patch tested?

Manually build documentation, and check if the supported pandas API list is correctly generated as below:

<img width="299" alt="Screenshot 2023-11-24 at 12 36 31 PM" src="https://github.com/apache/spark/assets/44108233/a2da0f0b-0973-45cb-b22d-9582bbeb51b5">

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43996 from itholic/fix_supported_api_gen.

Authored-by: Haejoon Lee <haejoon.lee@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 132bb63a897f4f4049f34deefc065ed3eac6a90f)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/supported_api_gen.py | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
index 06591c5b26ad6..8c3cdec3671c1 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -138,23 +138,11 @@ def _create_supported_by_module(
         # module not implemented
         return {}
 
-    pd_funcs = dict(
-        [
-            m
-            for m in getmembers(pd_module, isfunction)
-            if not m[0].startswith("_") and m[0] in pd_module.__dict__
-        ]
-    )
+    pd_funcs = dict([m for m in getmembers(pd_module, isfunction) if not m[0].startswith("_")])
     if not pd_funcs:
         return {}
 
-    ps_funcs = dict(
-        [
-            m
-            for m in getmembers(ps_module, isfunction)
-            if not m[0].startswith("_") and m[0] in ps_module.__dict__
-        ]
-    )
+    ps_funcs = dict([m for m in getmembers(ps_module, isfunction) if not m[0].startswith("_")])
 
     return _organize_by_implementation_status(
         module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group

From 132c1a1f08d6555c950600c102db28b9d7581350 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 24 Nov 2023 17:31:22 -0800
Subject: [PATCH 128/521] [SPARK-46095][DOCS] Document `REST API` for Spark
 Standalone Cluster
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR aims to document `REST API` for Spark Standalone Cluster.

To help the users to understand Apache Spark features.

No.

Manual review. `REST API` Section is added newly.

**AFTER**

<img width="704" alt="Screenshot 2023-11-24 at 4 13 53 PM" src="https://github.com/apache/spark/assets/9700541/a4e09d94-d216-4629-8b37-9d350365a428">

No.

Closes #44007 from dongjoon-hyun/SPARK-46095.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/spark-standalone.md | 68 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 9152547f1bec4..e7ea2669a1139 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -446,6 +446,8 @@ Spark applications supports the following configuration properties specific to s
 
 # Launching Spark Applications
 
+## Spark Protocol
+
 The [`spark-submit` script](submitting-applications.html) provides the most straightforward way to
 submit a compiled Spark application to the cluster. For standalone clusters, Spark currently
 supports two deploy modes. In `client` mode, the driver is launched in the same process as the
@@ -468,6 +470,72 @@ failing repeatedly, you may do so through:
 
 You can find the driver ID through the standalone Master web UI at `http://<master url>:8080`.
 
+## REST API
+
+If `spark.master.rest.enabled` is enabled, Spark master provides additional REST API
+via <code>http://[host:port]/[version]/submissions/[action]</code> where
+<code>host</code> is the master host, and
+<code>port</code> is the port number specified by `spark.master.rest.port` (default: 6066), and 
+<code>version</code> is a protocol version, <code>v1</code> as of today, and
+<code>action</code> is one of the following supported actions.
+
+<table class="table table-striped">
+  <thead><tr><th style="width:21%">Command</th><th>Description</th><th>HTTP METHOD</th><th>Since Version</th></tr></thead>
+  <tr>
+    <td><code>create</code></td>
+    <td>Create a Spark driver via <code>cluster</code> mode.</td>
+    <td>POST</td>
+    <td>1.3.0</td>
+  </tr>
+  <tr>
+    <td><code>kill</code></td>
+    <td>Kill a single Spark driver.</td>
+    <td>POST</td>
+    <td>1.3.0</td>
+  </tr>
+  <tr>
+    <td><code>status</code></td>
+    <td>Check the status of a Spark job.</td>
+    <td>GET</td>
+    <td>1.3.0</td>
+  </tr>
+</table>
+
+The following is a <code>curl</code> CLI command example with the `pi.py` and REST API.
+
+```bash
+$ curl -XPOST http://IP:PORT/v1/submissions/create \
+--header "Content-Type:application/json;charset=UTF-8" \
+--data '{
+  "appResource": "",
+  "sparkProperties": {
+    "spark.master": "spark://master:7077",
+    "spark.app.name": "Spark Pi",
+    "spark.driver.memory": "1g",
+    "spark.driver.cores": "1",
+    "spark.jars": ""
+  },
+  "clientSparkVersion": "",
+  "mainClass": "org.apache.spark.deploy.SparkSubmit",
+  "environmentVariables": { },
+  "action": "CreateSubmissionRequest",
+  "appArgs": [ "/opt/spark/examples/src/main/python/pi.py", "10" ]
+}'
+```
+
+The following is the response from the REST API for the above <code>create</code> request.
+
+```bash
+{
+  "action" : "CreateSubmissionResponse",
+  "message" : "Driver successfully submitted as driver-20231124153531-0000",
+  "serverSparkVersion" : "3.5.1",
+  "submissionId" : "driver-20231124153531-0000",
+  "success" : true
+}
+```
+
+
 # Resource Scheduling
 
 The standalone cluster mode currently only supports a simple FIFO scheduler across applications.

From e4731e9d3b4443f79a23e7d4bf5b749b54f2e1bb Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Sun, 26 Nov 2023 23:28:52 +0800
Subject: [PATCH 129/521] [SPARK-45974][SQL] Add scan.filterAttributes
 non-empty judgment for RowLevelOperationRuntimeGroupFiltering

### What changes were proposed in this pull request?

Add scan.filterAttributes non-empty judgment for RowLevelOperationRuntimeGroupFiltering.

### Why are the changes needed?

When scan.filterAttributes is empty, an invalid dynamic pruning condition will be generated in RowLevelOperationRuntimeGroupFiltering.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

added test case

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43869 from wForget/SPARK-45974.

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit ade861d19910df724d9233df98c059ff9d57f795)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 ...wLevelOperationRuntimeGroupFiltering.scala |  4 ++-
 .../connector/MergeIntoTableSuiteBase.scala   | 32 +++++++++++++++++++
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
index 7360349284ec1..479e9065c0712 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
@@ -50,7 +50,8 @@ class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPla
     // apply special dynamic filtering only for group-based row-level operations
     case GroupBasedRowLevelOperation(replaceData, _, Some(cond),
         DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _))
-        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral =>
+        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral
+          && scan.filterAttributes().nonEmpty =>
 
       // use reference equality on scan to find required scan relations
       val newQuery = replaceData.query transformUp {
@@ -115,6 +116,7 @@ class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPla
       matchingRowsPlan: LogicalPlan,
       buildKeys: Seq[Attribute],
       pruningKeys: Seq[Attribute]): Expression = {
+    assert(buildKeys.nonEmpty && pruningKeys.nonEmpty)
 
     val buildQuery = Aggregate(buildKeys, buildKeys, matchingRowsPlan)
     DynamicPruningExpression(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
index e7555c23fa4fc..5668e5981910c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
@@ -32,6 +32,38 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
 
   import testImplicits._
 
+  test("SPARK-45974: merge into non filter attributes table") {
+    val tableName: String = "cat.ns1.non_partitioned_table"
+    withTable(tableName) {
+      withTempView("source") {
+        val sourceRows = Seq(
+          (1, 100, "hr"),
+          (2, 200, "finance"),
+          (3, 300, "hr"))
+        sourceRows.toDF("pk", "salary", "dep").createOrReplaceTempView("source")
+
+        sql(s"CREATE TABLE $tableName (pk INT NOT NULL, salary INT, dep STRING)".stripMargin)
+
+        val df = sql(
+          s"""MERGE INTO $tableName t
+             |USING (select * from source) s
+             |ON t.pk = s.pk
+             |WHEN MATCHED THEN
+             | UPDATE SET t.salary = s.salary
+             |WHEN NOT MATCHED THEN
+             | INSERT *
+             |""".stripMargin)
+
+        checkAnswer(
+          sql(s"SELECT * FROM $tableName"),
+          Seq(
+            Row(1, 100, "hr"), // insert
+            Row(2, 200, "finance"), // insert
+            Row(3, 300, "hr"))) // insert
+      }
+    }
+  }
+
   test("merge into empty table with NOT MATCHED clause") {
     withTempView("source") {
       createTable("pk INT NOT NULL, salary INT, dep STRING")

From 92b6619d3ffe3531ac7b11363bf68ad4a6cc8f1e Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Tue, 28 Nov 2023 11:04:14 +0800
Subject: [PATCH 130/521] [SPARK-46006][YARN][FOLLOWUP] YarnAllocator set
 target executor number to 0 to cancel pending allocate request when driver
 stop

### What changes were proposed in this pull request?
YarnAllocator set target executor number to 0 to cancel pending allocate request when driver stop
Now for this issue we do:

1. AllocationFailure should not be treated as exitCausedByApp when driver is shutting down https://github.com/apache/spark/pull/38622
2. Avoid new allocation requests when sc.stop stuck https://github.com/apache/spark/pull/43906
3. Cancel pending allocation request, this pr https://github.com/apache/spark/pull/44036

### Why are the changes needed?
Avoid unnecessary allocate request

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
MT

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44036 from AngersZhuuuu/SPARK-46006-FOLLOWUP.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit dbc8756bdac823be42ed10bc011415f405905497)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../scala/org/apache/spark/deploy/yarn/YarnAllocator.scala   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index f14fc9d5de461..5fccc8c9ff47c 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -385,7 +385,10 @@ private[yarn] class YarnAllocator(
     this.hostToLocalTaskCountPerResourceProfileId = hostToLocalTaskCountPerResourceProfileId
 
     if (resourceProfileToTotalExecs.isEmpty) {
-      targetNumExecutorsPerResourceProfileId.clear()
+      // Set target executor number to 0 to cancel pending allocate request.
+      targetNumExecutorsPerResourceProfileId.keys.foreach { rp =>
+        targetNumExecutorsPerResourceProfileId(rp) = 0
+      }
       allocatorNodeHealthTracker.setSchedulerExcludedNodes(excludedNodes)
       true
     } else {

From 35ecb32e479a33a1454709d133c48295d6774f3b Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Wed, 29 Nov 2023 01:37:35 +0100
Subject: [PATCH 131/521] [SPARK-46029][SQL] Escape the single quote, `_` and
 `%` for DS V2 pushdown

### What changes were proposed in this pull request?
Spark supports push down `startsWith`, `endWith` and `contains` to JDBC database with DS V2 pushdown.
But the `V2ExpressionSQLBuilder` didn't escape the single quote, `_` and `%`, it can cause unexpected result.

### Why are the changes needed?
Escape the single quote, `_` and `%` for DS V2 pushdown

### Does this PR introduce _any_ user-facing change?
'No'.

### How was this patch tested?
Exists test cases.

### Was this patch authored or co-authored using generative AI tooling?
'No'.

Closes #43801 from beliefer/SPARK-38432_followup3.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d2cd98bdd32446b4106e66eb099efd8fb47acf40)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../util/V2ExpressionSQLBuilder.java          |  35 +++++-
 .../org/apache/spark/sql/jdbc/H2Dialect.scala |   8 ++
 .../datasources/v2/V2PredicateSuite.scala     |   6 +-
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   | 113 +++++++++++++++++-
 4 files changed, 151 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index 9ca0fe4787f10..dcb3c706946c5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -48,6 +48,35 @@
  */
 public class V2ExpressionSQLBuilder {
 
+  /**
+   * Escape the special chars for like pattern.
+   *
+   * Note: This method adopts the escape representation within Spark and is not bound to any JDBC
+   * dialect. JDBC dialect should overwrite this API if the underlying database have more special
+   * chars other than _ and %.
+   */
+  protected String escapeSpecialCharsForLikePattern(String str) {
+    StringBuilder builder = new StringBuilder();
+
+    for (char c : str.toCharArray()) {
+      switch (c) {
+        case '_':
+          builder.append("\\_");
+          break;
+        case '%':
+          builder.append("\\%");
+          break;
+        case '\'':
+          builder.append("\\\'");
+          break;
+        default:
+          builder.append(c);
+      }
+    }
+
+    return builder.toString();
+  }
+
   public String build(Expression expr) {
     if (expr instanceof Literal) {
       return visitLiteral((Literal<?>) expr);
@@ -247,21 +276,21 @@ protected String visitStartsWith(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '" + value + "%'";
+    return l + " LIKE '" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
   }
 
   protected String visitEndsWith(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '%" + value + "'";
+    return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "' ESCAPE '\\'";
   }
 
   protected String visitContains(String l, String r) {
     // Remove quotes at the beginning and end.
     // e.g. converts "'str'" to "str".
     String value = r.substring(1, r.length() - 1);
-    return l + " LIKE '%" + value + "%'";
+    return l + " LIKE '%" + escapeSpecialCharsForLikePattern(value) + "%' ESCAPE '\\'";
   }
 
   private String inputToSQL(Expression input) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index c246b50f4e156..8471a49153ff4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -240,6 +240,14 @@ private[sql] object H2Dialect extends JdbcDialect {
   }
 
   class H2SQLBuilder extends JDBCSQLBuilder {
+    override def escapeSpecialCharsForLikePattern(str: String): String = {
+      str.map {
+        case '_' => "\\_"
+        case '%' => "\\%"
+        case c => c.toString
+      }.mkString
+    }
+
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
       if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
index a5fee51dc916f..4a8a231cc54ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
@@ -315,7 +315,7 @@ class V2PredicateSuite extends SparkFunSuite {
       Array[Expression](ref("a"), literal))
     assert(predicate1.equals(predicate2))
     assert(predicate1.references.map(_.describe()).toSeq == Seq("a"))
-    assert(predicate1.describe.equals("a LIKE 'str%'"))
+    assert(predicate1.describe.equals(raw"a LIKE 'str%' ESCAPE '\'"))
 
     val v1Filter = StringStartsWith("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
@@ -332,7 +332,7 @@ class V2PredicateSuite extends SparkFunSuite {
       Array[Expression](ref("a"), literal))
     assert(predicate1.equals(predicate2))
     assert(predicate1.references.map(_.describe()).toSeq == Seq("a"))
-    assert(predicate1.describe.equals("a LIKE '%str'"))
+    assert(predicate1.describe.equals(raw"a LIKE '%str' ESCAPE '\'"))
 
     val v1Filter = StringEndsWith("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
@@ -349,7 +349,7 @@ class V2PredicateSuite extends SparkFunSuite {
       Array[Expression](ref("a"), literal))
     assert(predicate1.equals(predicate2))
     assert(predicate1.references.map(_.describe()).toSeq == Seq("a"))
-    assert(predicate1.describe.equals("a LIKE '%str%'"))
+    assert(predicate1.describe.equals(raw"a LIKE '%str%' ESCAPE '\'"))
 
     val v1Filter = StringContains("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index ae0cfe17b11f5..51a15881088b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -185,6 +185,19 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       conn.prepareStatement("INSERT INTO \"test\".\"datetime\" VALUES " +
         "('alex', '2022-05-18', '2022-05-18 00:00:00')").executeUpdate()
 
+      conn.prepareStatement(
+        "CREATE TABLE \"test\".\"address\" (email TEXT(32) NOT NULL)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"address\" VALUES " +
+        "('abc_def@gmail.com')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"address\" VALUES " +
+        "('abc%def@gmail.com')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"address\" VALUES " +
+        "('abc%_def@gmail.com')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"address\" VALUES " +
+        "('abc_%def@gmail.com')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"address\" VALUES " +
+        "('abc_''%def@gmail.com')").executeUpdate()
+
       conn.prepareStatement("CREATE TABLE \"test\".\"binary1\" (name TEXT(32),b BINARY(20))")
         .executeUpdate()
       val stmt = conn.prepareStatement("INSERT INTO \"test\".\"binary1\" VALUES (?, ?)")
@@ -1096,7 +1109,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
 
     val df3 = spark.table("h2.test.employee").filter($"name".startsWith("a"))
     checkFiltersRemoved(df3)
-    checkPushedInfo(df3, "PushedFilters: [NAME IS NOT NULL, NAME LIKE 'a%']")
+    checkPushedInfo(df3, raw"PushedFilters: [NAME IS NOT NULL, NAME LIKE 'a%' ESCAPE '\']")
     checkAnswer(df3, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false)))
 
     val df4 = spark.table("h2.test.employee").filter($"is_manager")
@@ -1240,6 +1253,94 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(df17, Seq(Row(6, "jen", 12000, 1200, true)))
   }
 
+  test("SPARK-38432: escape the single quote, _ and % for DS V2 pushdown") {
+    val df1 = spark.table("h2.test.address").filter($"email".startsWith("abc_"))
+    checkFiltersRemoved(df1)
+    checkPushedInfo(df1, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_%' ESCAPE '\']")
+    checkAnswer(df1,
+      Seq(Row("abc_%def@gmail.com"), Row("abc_'%def@gmail.com"), Row("abc_def@gmail.com")))
+
+    val df2 = spark.table("h2.test.address").filter($"email".startsWith("abc%"))
+    checkFiltersRemoved(df2)
+    checkPushedInfo(df2, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\%%' ESCAPE '\']")
+    checkAnswer(df2, Seq(Row("abc%_def@gmail.com"), Row("abc%def@gmail.com")))
+
+    val df3 = spark.table("h2.test.address").filter($"email".startsWith("abc%_"))
+    checkFiltersRemoved(df3)
+    checkPushedInfo(df3, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\%\_%' ESCAPE '\']")
+    checkAnswer(df3, Seq(Row("abc%_def@gmail.com")))
+
+    val df4 = spark.table("h2.test.address").filter($"email".startsWith("abc_%"))
+    checkFiltersRemoved(df4)
+    checkPushedInfo(df4, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\%%' ESCAPE '\']")
+    checkAnswer(df4, Seq(Row("abc_%def@gmail.com")))
+
+    val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
+    checkFiltersRemoved(df5)
+    checkPushedInfo(df5,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']")
+    checkAnswer(df5, Seq(Row("abc_'%def@gmail.com")))
+
+    val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com"))
+    checkFiltersRemoved(df6)
+    checkPushedInfo(df6,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_def@gmail.com' ESCAPE '\']")
+    checkAnswer(df6, Seq(Row("abc%_def@gmail.com"), Row("abc_def@gmail.com")))
+
+    val df7 = spark.table("h2.test.address").filter($"email".endsWith("%def@gmail.com"))
+    checkFiltersRemoved(df7)
+    checkPushedInfo(df7,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%def@gmail.com' ESCAPE '\']")
+    checkAnswer(df7,
+      Seq(Row("abc%def@gmail.com"), Row("abc_%def@gmail.com"), Row("abc_'%def@gmail.com")))
+
+    val df8 = spark.table("h2.test.address").filter($"email".endsWith("%_def@gmail.com"))
+    checkFiltersRemoved(df8)
+    checkPushedInfo(df8,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%\_def@gmail.com' ESCAPE '\']")
+    checkAnswer(df8, Seq(Row("abc%_def@gmail.com")))
+
+    val df9 = spark.table("h2.test.address").filter($"email".endsWith("_%def@gmail.com"))
+    checkFiltersRemoved(df9)
+    checkPushedInfo(df9,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\%def@gmail.com' ESCAPE '\']")
+    checkAnswer(df9, Seq(Row("abc_%def@gmail.com")))
+
+    val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com"))
+    checkFiltersRemoved(df10)
+    checkPushedInfo(df10,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']")
+    checkAnswer(df10, Seq(Row("abc_'%def@gmail.com")))
+
+    val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
+    checkFiltersRemoved(df11)
+    checkPushedInfo(df11, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_d%' ESCAPE '\']")
+    checkAnswer(df11, Seq(Row("abc_def@gmail.com")))
+
+    val df12 = spark.table("h2.test.address").filter($"email".contains("c%d"))
+    checkFiltersRemoved(df12)
+    checkPushedInfo(df12, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\%d%' ESCAPE '\']")
+    checkAnswer(df12, Seq(Row("abc%def@gmail.com")))
+
+    val df13 = spark.table("h2.test.address").filter($"email".contains("c%_d"))
+    checkFiltersRemoved(df13)
+    checkPushedInfo(df13,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\%\_d%' ESCAPE '\']")
+    checkAnswer(df13, Seq(Row("abc%_def@gmail.com")))
+
+    val df14 = spark.table("h2.test.address").filter($"email".contains("c_%d"))
+    checkFiltersRemoved(df14)
+    checkPushedInfo(df14,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\%d%' ESCAPE '\']")
+    checkAnswer(df14, Seq(Row("abc_%def@gmail.com")))
+
+    val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d"))
+    checkFiltersRemoved(df15)
+    checkPushedInfo(df15,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']")
+    checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
+  }
+
   test("scan with filter push-down with ansi mode") {
     Seq(false, true).foreach { ansiMode =>
       withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) {
@@ -1325,10 +1426,11 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         checkFiltersRemoved(df6, ansiMode)
         val expectedPlanFragment6 = if (ansiMode) {
           "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL, " +
-            "CAST(BONUS AS string) LIKE '%30%', CAST(DEPT AS byte) > 1, " +
+            raw"CAST(BONUS AS string) LIKE '%30%' ESCAPE '\', CAST(DEPT AS byte) > 1, " +
             "CAST(DEPT AS short) > 1, CAST(BONUS AS decimal(20,2)) > 1200.00]"
         } else {
-          "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL, CAST(BONUS AS string) LIKE '%30%']"
+          "PushedFilters: [BONUS IS NOT NULL, " +
+            raw"DEPT IS NOT NULL, CAST(BONUS AS string) LIKE '%30%' ESCAPE '\']"
         }
         checkPushedInfo(df6, expectedPlanFragment6)
         checkAnswer(df6, Seq(Row(2, "david", 10000, 1300, true)))
@@ -1538,8 +1640,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
 
   test("show tables") {
     checkAnswer(sql("SHOW TABLES IN h2.test"),
-      Seq(Row("test", "people", false), Row("test", "empty_table", false),
-        Row("test", "employee", false), Row("test", "item", false), Row("test", "dept", false),
+      Seq(Row("test", "address", false), Row("test", "people", false),
+        Row("test", "empty_table", false), Row("test", "employee", false),
+        Row("test", "item", false), Row("test", "dept", false),
         Row("test", "person", false), Row("test", "view1", false), Row("test", "view2", false),
         Row("test", "datetime", false), Row("test", "binary1", false)))
   }

From c2342ba0e4ded541916eeb3478a1db2c129d3130 Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Thu, 30 Nov 2023 11:07:47 +0900
Subject: [PATCH 132/521] [SPARK-45943][SQL] Move DetermineTableStats to
 resolution rules

### What changes were proposed in this pull request?

Move DetermineTableStats to resolution rules.

### Why are the changes needed?

`MergeIntoTable#sourceTable` is used for `ReplaceData#groupFilterCondition` in `RewriteMergeIntoTable`, SourceTable in  `ReplaceData#groupFilterCondition` is resolved and will not be applied to `DetermineTableStats` through `ResolveSubquery#resolveSubQueries`. So, when there is a hive table without stats in `MergeIntoTable#sourceTable`, IllegalStateException will occur.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

added test case

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #43867 from wForget/SPARK-45943.

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit d1aea92daf254334bcbd6d96901a54a2502eda29)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/hive/HiveSessionStateBuilder.scala    |  2 +-
 .../HiveSourceRowLevelOperationSuite.scala    | 72 +++++++++++++++++++
 2 files changed, 73 insertions(+), 1 deletion(-)
 create mode 100644 sql/hive/src/test/scala/org/apache/spark/sql/hive/connector/HiveSourceRowLevelOperationSuite.scala

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 2d0bcdff07151..08e02c90ebd63 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -92,11 +92,11 @@ class HiveSessionStateBuilder(
         new ResolveSessionCatalog(catalogManager) +:
         ResolveWriteToStream +:
         new EvalSubqueriesForTimeTravel +:
+        new DetermineTableStats(session) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
       DetectAmbiguousSelfJoin +:
-        new DetermineTableStats(session) +:
         RelationConversions(catalog) +:
         QualifyLocationWithWarehouse(catalog) +:
         PreprocessTableCreation(catalog) +:
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/connector/HiveSourceRowLevelOperationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/connector/HiveSourceRowLevelOperationSuite.scala
new file mode 100644
index 0000000000000..344fdc21fe2cf
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/connector/HiveSourceRowLevelOperationSuite.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.hive.connector
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.connector.catalog.InMemoryRowLevelOperationTableCatalog
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.test.SQLTestUtils
+
+class HiveSourceRowLevelOperationSuite extends QueryTest with TestHiveSingleton
+  with BeforeAndAfter with SQLTestUtils {
+
+  before {
+    spark.conf.set("spark.sql.catalog.cat", classOf[InMemoryRowLevelOperationTableCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.unsetConf("spark.sql.catalog.cat")
+  }
+
+  test("SPARK-45943: merge into using hive table without stats") {
+    val inMemCatNs = "cat.ns1"
+    val inMemCatTable = "in_mem_cat_table"
+    withTable("hive_table", s"$inMemCatNs.$inMemCatTable") {
+      // create hive table without stats
+      sql("create table hive_table(pk int, salary int, dep string)")
+
+      sql(
+        s"""
+           |create table $inMemCatNs.$inMemCatTable (
+           |  pk INT NOT NULL,
+           |  salary INT,
+           |  dep STRING)
+           |PARTITIONED BY (dep)
+           | """.stripMargin)
+
+      try {
+        // three-part naming is not supported in
+        // org.apache.spark.sql.hive.test.TestHiveQueryExecution.analyzed.{referencedTables}
+        sql(s"use $inMemCatNs")
+        sql(
+          s"""MERGE INTO $inMemCatTable t
+             |USING (SELECT pk, salary, dep FROM spark_catalog.default.hive_table) s
+             |ON t.pk = s.pk
+             |WHEN MATCHED THEN
+             | UPDATE SET t.salary = s.salary
+             |WHEN NOT MATCHED THEN
+             | INSERT *
+             |""".stripMargin)
+      } finally {
+        sql("use spark_catalog.default")
+      }
+    }
+  }
+}

From 00bb4ad46e373311a6303952f3944680b08e03d7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 30 Nov 2023 14:56:48 -0800
Subject: [PATCH 133/521] [SPARK-46188][DOC][3.5] Fix the CSS of Spark doc's
 generated tables

### What changes were proposed in this pull request?

After https://github.com/apache/spark/pull/40269, there is no border in the generated tables of Spark doc(for example,  https://spark.apache.org/docs/latest/sql-ref-ansi-compliance.html) .  This PR is to fix it by restoring part of the table style in https://github.com/apache/spark/pull/40269/files#diff-309b964023ca899c9505205f36d3f4d5b36a6487e5c9b2e242204ee06bbc9ce9L26

This PR also unifies all the styles of tables by removing the `class="table table-striped"` in HTML style tables in markdown docs.

### Why are the changes needed?

Fix a regression in the table CSS of Spark docs

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually build docs and verify.
Before changes:
<img width="931" alt="image" src="https://github.com/apache/spark/assets/1097932/1eb7abff-65af-4c4c-bbd5-9077f38c1b43">

After changes:
<img width="911" alt="image" src="https://github.com/apache/spark/assets/1097932/be77d4c6-1279-43ec-a234-b69ee02e3dc6">

### Was this patch authored or co-authored using generative AI tooling?

Generated-by: ChatGPT 4

Closes #44097 from gengliangwang/fixTable3.5.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/building-spark.md                        |  2 +-
 docs/cluster-overview.md                      |  2 +-
 docs/configuration.md                         | 40 +++++++++----------
 docs/css/custom.css                           | 13 ++++++
 docs/ml-classification-regression.md          | 14 +++----
 docs/ml-clustering.md                         |  8 ++--
 docs/mllib-classification-regression.md       |  2 +-
 docs/mllib-decision-tree.md                   |  2 +-
 docs/mllib-ensembles.md                       |  2 +-
 docs/mllib-evaluation-metrics.md              | 10 ++---
 docs/mllib-linear-methods.md                  |  4 +-
 docs/mllib-pmml-model-export.md               |  2 +-
 docs/monitoring.md                            | 10 ++---
 docs/rdd-programming-guide.md                 |  8 ++--
 docs/running-on-kubernetes.md                 |  8 ++--
 docs/running-on-mesos.md                      |  2 +-
 docs/running-on-yarn.md                       |  8 ++--
 docs/security.md                              | 26 ++++++------
 docs/spark-standalone.md                      | 12 +++---
 docs/sparkr.md                                |  6 +--
 docs/sql-data-sources-avro.md                 | 12 +++---
 docs/sql-data-sources-csv.md                  |  2 +-
 docs/sql-data-sources-hive-tables.md          |  4 +-
 docs/sql-data-sources-jdbc.md                 |  2 +-
 docs/sql-data-sources-json.md                 |  2 +-
 docs/sql-data-sources-load-save-functions.md  |  2 +-
 docs/sql-data-sources-orc.md                  |  4 +-
 docs/sql-data-sources-parquet.md              |  4 +-
 docs/sql-data-sources-text.md                 |  2 +-
 ...ql-distributed-sql-engine-spark-sql-cli.md |  4 +-
 docs/sql-error-conditions-sqlstates.md        | 26 ++++++------
 docs/sql-migration-guide.md                   |  4 +-
 docs/sql-performance-tuning.md                | 16 ++++----
 docs/storage-openstack-swift.md               |  2 +-
 docs/streaming-custom-receivers.md            |  2 +-
 docs/streaming-programming-guide.md           | 10 ++---
 .../structured-streaming-kafka-integration.md | 20 +++++-----
 .../structured-streaming-programming-guide.md | 12 +++---
 docs/submitting-applications.md               |  2 +-
 docs/web-ui.md                                |  2 +-
 40 files changed, 164 insertions(+), 151 deletions(-)

diff --git a/docs/building-spark.md b/docs/building-spark.md
index 4b8e70655d59c..33d253a49dbf3 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -286,7 +286,7 @@ If use an individual repository or a repository on GitHub Enterprise, export bel
 
 ### Related environment variables
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>SPARK_PROJECT_URL</code></td>
diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md
index 7da06a852089e..34913bd97a418 100644
--- a/docs/cluster-overview.md
+++ b/docs/cluster-overview.md
@@ -91,7 +91,7 @@ The [job scheduling overview](job-scheduling.html) describes this in more detail
 
 The following table summarizes terms you'll see used to refer to cluster concepts:
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th style="width: 130px;">Term</th><th>Meaning</th></tr>
   </thead>
diff --git a/docs/configuration.md b/docs/configuration.md
index 4604360dda287..248f9333c9a3b 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -135,7 +135,7 @@ of the most common options to set are:
 
 ### Application Properties
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.app.name</code></td>
@@ -520,7 +520,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Runtime Environment
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.driver.extraClassPath</code></td>
@@ -907,7 +907,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Shuffle Behavior
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.reducer.maxSizeInFlight</code></td>
@@ -1282,7 +1282,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Spark UI
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.eventLog.logBlockUpdates.enabled</code></td>
@@ -1674,7 +1674,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Compression and Serialization
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.broadcast.compress</code></td>
@@ -1872,7 +1872,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Memory Management
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.memory.fraction</code></td>
@@ -1997,7 +1997,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Execution Behavior
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.broadcast.blockSize</code></td>
@@ -2247,7 +2247,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Executor Metrics
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.eventLog.logStageExecutorMetrics</code></td>
@@ -2315,7 +2315,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Networking
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.rpc.message.maxSize</code></td>
@@ -2478,7 +2478,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Scheduling
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.cores.max</code></td>
@@ -2962,7 +2962,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Barrier Execution Mode
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.barrier.sync.timeout</code></td>
@@ -3009,7 +3009,7 @@ Apart from these, the following properties are also available, and may be useful
 
 ### Dynamic Allocation
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.dynamicAllocation.enabled</code></td>
@@ -3151,7 +3151,7 @@ finer granularity starting from driver and executor. Take RPC module as example
 like shuffle, just replace "rpc" with "shuffle" in the property names except
 <code>spark.{driver|executor}.rpc.netty.dispatcher.numThreads</code>, which is only for RPC module.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.{driver|executor}.rpc.io.serverThreads</code></td>
@@ -3294,7 +3294,7 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### Spark Streaming
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.streaming.backpressure.enabled</code></td>
@@ -3426,7 +3426,7 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### SparkR
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.r.numRBackendThreads</code></td>
@@ -3482,7 +3482,7 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### GraphX
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.graphx.pregel.checkpointInterval</code></td>
@@ -3497,7 +3497,7 @@ External users can query the static sql config values via `SparkSession.conf` or
 
 ### Deploy
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.deploy.recoveryMode</code></td>
@@ -3547,7 +3547,7 @@ copy `conf/spark-env.sh.template` to create it. Make sure you make the copy exec
 The following variables can be set in `spark-env.sh`:
 
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>JAVA_HOME</code></td>
@@ -3684,7 +3684,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
 
 ### External Shuffle service(server) side configuration options
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.shuffle.push.server.mergedShuffleFileManagerImpl</code></td>
@@ -3718,7 +3718,7 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
 
 ### Client side configuration options
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.shuffle.push.enabled</code></td>
diff --git a/docs/css/custom.css b/docs/css/custom.css
index 4576f45d1ab7d..e7416d9ded618 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -1110,5 +1110,18 @@ img {
 table {
   width: 100%;
   overflow-wrap: normal;
+  border-collapse: collapse; /* Ensures that the borders collapse into a single border */
 }
 
+table th, table td {
+  border: 1px solid #cccccc; /* Adds a border to each table header and data cell */
+  padding: 6px 13px; /* Optional: Adds padding inside each cell for better readability */
+}
+
+table tr {
+  background-color: white; /* Sets a default background color for all rows */
+}
+
+table tr:nth-child(2n) {
+  background-color: #F1F4F5; /* Sets a different background color for even rows */
+}
diff --git a/docs/ml-classification-regression.md b/docs/ml-classification-regression.md
index d184f4fe0257c..604b3245272fc 100644
--- a/docs/ml-classification-regression.md
+++ b/docs/ml-classification-regression.md
@@ -703,7 +703,7 @@ others.
 
 ###  Available families
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th>Family</th>
@@ -1224,7 +1224,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 ### Input Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1251,7 +1251,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 ### Output Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1326,7 +1326,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Input Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1353,7 +1353,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Output Columns (Predictions)
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1407,7 +1407,7 @@ All output columns are optional; to exclude an output column, set its correspond
 
 #### Input Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -1436,7 +1436,7 @@ Note that `GBTClassifier` currently only supports binary labels.
 
 #### Output Columns (Predictions)
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
diff --git a/docs/ml-clustering.md b/docs/ml-clustering.md
index 00a156b6645ce..fdb8173ce3bbe 100644
--- a/docs/ml-clustering.md
+++ b/docs/ml-clustering.md
@@ -40,7 +40,7 @@ called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 
 ### Input Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -61,7 +61,7 @@ called [kmeans||](http://theory.stanford.edu/~sergei/papers/vldb12-kmpar.pdf).
 
 ### Output Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -204,7 +204,7 @@ model.
 
 ### Input Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
@@ -225,7 +225,7 @@ model.
 
 ### Output Columns
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th align="left">Param name</th>
diff --git a/docs/mllib-classification-regression.md b/docs/mllib-classification-regression.md
index 10cb85e392029..b3305314abc56 100644
--- a/docs/mllib-classification-regression.md
+++ b/docs/mllib-classification-regression.md
@@ -26,7 +26,7 @@ classification](http://en.wikipedia.org/wiki/Multiclass_classification), and
 [regression analysis](http://en.wikipedia.org/wiki/Regression_analysis). The table below outlines
 the supported algorithms for each type of problem.
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Problem Type</th><th>Supported Methods</th></tr>
   </thead>
diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md
index 174255c48b699..0d9886315e288 100644
--- a/docs/mllib-decision-tree.md
+++ b/docs/mllib-decision-tree.md
@@ -51,7 +51,7 @@ The *node impurity* is a measure of the homogeneity of the labels at the node. T
 implementation provides two impurity measures for classification (Gini impurity and entropy) and one
 impurity measure for regression (variance).
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Impurity</th><th>Task</th><th>Formula</th><th>Description</th></tr>
   </thead>
diff --git a/docs/mllib-ensembles.md b/docs/mllib-ensembles.md
index b1006f2730db5..fdad7ae68dd49 100644
--- a/docs/mllib-ensembles.md
+++ b/docs/mllib-ensembles.md
@@ -191,7 +191,7 @@ Note that each loss is applicable to one of classification or regression, not bo
 
 Notation: $N$ = number of instances. $y_i$ = label of instance $i$.  $x_i$ = features of instance $i$.  $F(x_i)$ = model's predicted label for instance $i$.
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Loss</th><th>Task</th><th>Formula</th><th>Description</th></tr>
   </thead>
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index f82f6a01136b9..30acc3dc634be 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -76,7 +76,7 @@ plots (recall, false positive rate) points.
 
 **Available metrics**
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -179,7 +179,7 @@ For this section, a modified delta function $\hat{\delta}(x)$ will prove useful
 
 $$\hat{\delta}(x) = \begin{cases}1 & \text{if $x = 0$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -296,7 +296,7 @@ The following definition of indicator function $I_A(x)$ on a set $A$ will be nec
 
 $$I_A(x) = \begin{cases}1 & \text{if $x \in A$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
@@ -447,7 +447,7 @@ documents, returns a relevance score for the recommended document.
 
 $$rel_D(r) = \begin{cases}1 & \text{if $r \in D$}, \\ 0 & \text{otherwise}.\end{cases}$$
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Metric</th><th>Definition</th><th>Notes</th></tr>
   </thead>
@@ -553,7 +553,7 @@ variable from a number of independent variables.
 
 **Available metrics**
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>Metric</th><th>Definition</th></tr>
   </thead>
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index b535d2de307a9..448d881f794a5 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -72,7 +72,7 @@ training error) and minimizing model complexity (i.e., to avoid overfitting).
 The following table summarizes the loss functions and their gradients or sub-gradients for the
 methods `spark.mllib` supports:
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th></th><th>loss function $L(\wv; \x, y)$</th><th>gradient or sub-gradient</th></tr>
   </thead>
@@ -105,7 +105,7 @@ The purpose of the
 encourage simple models and avoid overfitting.  We support the following
 regularizers in `spark.mllib`:
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th></th><th>regularizer $R(\wv)$</th><th>gradient or sub-gradient</th></tr>
   </thead>
diff --git a/docs/mllib-pmml-model-export.md b/docs/mllib-pmml-model-export.md
index e20d7c2fe4e17..02b5fda7a36df 100644
--- a/docs/mllib-pmml-model-export.md
+++ b/docs/mllib-pmml-model-export.md
@@ -28,7 +28,7 @@ license: |
 
 The table below outlines the `spark.mllib` models that can be exported to PMML and their equivalent PMML model.
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr><th>spark.mllib model</th><th>PMML model</th></tr>
   </thead>
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 91b158bf85d26..e90ef46bdffe0 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -69,7 +69,7 @@ The history server can be configured as follows:
 
 ### Environment Variables
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>SPARK_DAEMON_MEMORY</code></td>
@@ -145,7 +145,7 @@ Use it with caution.
 Security options for the Spark History Server are covered more detail in the
 [Security](security.html#web-ui) page.
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Property Name</th>
@@ -470,7 +470,7 @@ only for applications in cluster mode, not applications in client mode. Applicat
 can be identified by their `[attempt-id]`. In the API listed below, when running in YARN cluster mode,
 `[app-id]` will actually be `[base-app-id]/[attempt-id]`, where `[base-app-id]` is the YARN application ID.
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Endpoint</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>/applications</code></td>
@@ -669,7 +669,7 @@ The REST API exposes the values of the Task Metrics collected by Spark executors
 of task execution. The metrics can be used for performance troubleshooting and workload characterization.
 A list of the available metrics, with a short description:
 
-<table class="table table-striped">
+<table>
   <thead>
     <tr>
       <th>Spark Executor Task Metric name</th>
@@ -827,7 +827,7 @@ In addition, aggregated per-stage peak values of the executor memory metrics are
 Executor memory metrics are also exposed via the Spark metrics system based on the [Dropwizard metrics library](https://metrics.dropwizard.io/4.2.0).
 A list of the available metrics, with a short description:
 
-<table class="table table-striped">
+<table>
   <thead>
       <tr><th>Executor Level Metric name</th>
       <th>Short description</th>
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index aee22ad484e60..cc897aea06c93 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -378,7 +378,7 @@ resulting Java objects using [pickle](https://github.com/irmen/pickle/). When sa
 PySpark does the reverse. It unpickles Python objects into Java objects and then converts them to Writables. The following
 Writables are automatically converted:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Writable Type</th><th>Python Type</th></tr></thead>
 <tr><td>Text</td><td>str</td></tr>
 <tr><td>IntWritable</td><td>int</td></tr>
@@ -954,7 +954,7 @@ and pair RDD functions doc
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
@@ -1069,7 +1069,7 @@ and pair RDD functions doc
  [Java](api/java/index.html?org/apache/spark/api/java/JavaPairRDD.html))
 for details.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Action</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>reduce</b>(<i>func</i>) </td>
@@ -1214,7 +1214,7 @@ to `persist()`. The `cache()` method is a shorthand for using the default storag
 which is `StorageLevel.MEMORY_ONLY` (store deserialized objects in memory). The full set of
 storage levels is:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th style="width:23%">Storage Level</th><th>Meaning</th></tr></thead>
 <tr>
   <td> MEMORY_ONLY </td>
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 38a745f1afca3..a684e7caa1a04 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -579,7 +579,7 @@ See the [configuration page](configuration.html) for information on Spark config
 
 #### Spark Properties
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.kubernetes.context</code></td>
@@ -1645,7 +1645,7 @@ See the below table for the full list of pod specifications that will be overwri
 
 ### Pod Metadata
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Pod metadata key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>name</td>
@@ -1681,7 +1681,7 @@ See the below table for the full list of pod specifications that will be overwri
 
 ### Pod Spec
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Pod spec key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>imagePullSecrets</td>
@@ -1734,7 +1734,7 @@ See the below table for the full list of pod specifications that will be overwri
 
 The following affect the driver and executor containers. All other containers in the pod spec will be unaffected.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Container spec key</th><th>Modified value</th><th>Description</th></tr></thead>
 <tr>
   <td>env</td>
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index b1a54a089a542..3d1c57030982d 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -374,7 +374,7 @@ See the [configuration page](configuration.html) for information on Spark config
 
 #### Spark Properties
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.mesos.coarse</code></td>
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 97cc9ac135af1..d577b70a68039 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -143,7 +143,7 @@ To use a custom metrics.properties for the application master and executors, upd
 
 #### Spark Properties
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.yarn.am.memory</code></td>
@@ -696,7 +696,7 @@ To use a custom metrics.properties for the application master and executors, upd
 
 #### Available patterns for SHS custom executor log URL
 
-<table class="table table-striped">
+<table>
     <thead><tr><th>Pattern</th><th>Meaning</th></tr></thead>
     <tr>
       <td>&#123;&#123;HTTP_SCHEME&#125;&#125;</td>
@@ -783,7 +783,7 @@ staging directory of the Spark application.
 
 ## YARN-specific Kerberos Configuration
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.kerberos.keytab</code></td>
@@ -882,7 +882,7 @@ to avoid garbage collection issues during shuffle.
 
 The following extra configuration options are available when the shuffle service is running on YARN:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>spark.yarn.shuffle.stopOnFailure</code></td>
diff --git a/docs/security.md b/docs/security.md
index 3c6fd507fec6d..c5d132f680a41 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -60,7 +60,7 @@ distributing the shared secret. Each application will use a unique shared secret
 the case of YARN, this feature relies on YARN RPC encryption being enabled for the distribution of
 secrets to be secure.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.yarn.shuffle.server.recovery.disabled</code></td>
@@ -82,7 +82,7 @@ that any user that can list pods in the namespace where the Spark application is
 also see their authentication secret. Access control rules should be properly set up by the
 Kubernetes admin to ensure that Spark authentication is secure.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.authenticate</code></td>
@@ -103,7 +103,7 @@ Kubernetes admin to ensure that Spark authentication is secure.
 Alternatively, one can mount authentication secrets using files and Kubernetes secrets that
 the user mounts into their pods.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.authenticate.secret.file</code></td>
@@ -159,7 +159,7 @@ is still required when talking to shuffle services from Spark versions older tha
 
 The following table describes the different options available for configuring this feature.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.network.crypto.enabled</code></td>
@@ -219,7 +219,7 @@ encrypting output data generated by applications with APIs such as `saveAsHadoop
 
 The following settings cover enabling encryption for data written to disk:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.io.encryption.enabled</code></td>
@@ -287,7 +287,7 @@ below.
 
 The following options control the authentication of Web UIs:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.ui.allowFramingFrom</code></td>
@@ -391,7 +391,7 @@ servlet filters.
 
 To enable authorization in the SHS, a few extra options are used:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.history.ui.acls.enable</code></td>
@@ -440,7 +440,7 @@ protocol-specific settings. This way the user can easily provide the common sett
 protocols without disabling the ability to configure each one individually. The following table
 describes the SSL configuration namespaces:
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Config Namespace</th>
@@ -471,7 +471,7 @@ describes the SSL configuration namespaces:
 The full breakdown of available SSL options can be found below. The `${ns}` placeholder should be
 replaced with one of the above namespaces.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>${ns}.enabled</code></td>
@@ -641,7 +641,7 @@ Apache Spark can be configured to include HTTP headers to aid in preventing Cros
 (XSS), Cross-Frame Scripting (XFS), MIME-Sniffing, and also to enforce HTTP Strict Transport
 Security.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.ui.xXssProtection</code></td>
@@ -697,7 +697,7 @@ configure those ports.
 
 ## Standalone mode only
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
@@ -748,7 +748,7 @@ configure those ports.
 
 ## All cluster managers
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>From</th><th>To</th><th>Default Port</th><th>Purpose</th><th>Configuration
@@ -824,7 +824,7 @@ deployment-specific page for more information.
 
 The following options provides finer-grained control for this feature:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.security.credentials.${service}.enabled</code></td>
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index e7ea2669a1139..5babac9e25295 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -53,7 +53,7 @@ You should see the new node listed there, along with its number of CPUs and memo
 
 Finally, the following configuration options can be passed to the master and worker:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">Argument</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>-h HOST</code>, <code>--host HOST</code></td>
@@ -116,7 +116,7 @@ Note that these scripts must be executed on the machine you want to run the Spar
 
 You can optionally configure the cluster further by setting environment variables in `conf/spark-env.sh`. Create this file by starting with the `conf/spark-env.sh.template`, and _copy it to all your worker machines_ for the settings to take effect. The following settings are available:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">Environment Variable</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>SPARK_MASTER_HOST</code></td>
@@ -188,7 +188,7 @@ You can optionally configure the cluster further by setting environment variable
 
 SPARK_MASTER_OPTS supports the following system properties:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.master.ui.port</code></td>
@@ -324,7 +324,7 @@ SPARK_MASTER_OPTS supports the following system properties:
 
 SPARK_WORKER_OPTS supports the following system properties:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.worker.cleanup.enabled</code></td>
@@ -429,7 +429,7 @@ You can also pass an option `--total-executor-cores <numCores>` to control the n
 
 Spark applications supports the following configuration properties specific to standalone mode:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">Property Name</th><th>Default Value</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
   <td><code>spark.standalone.submit.waitAppCompletion</code></td>
@@ -646,7 +646,7 @@ ZooKeeper is the best way to go for production-level high availability, but if y
 
 In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spark-env using this configuration:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th style="width:21%">System property</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.deploy.recoveryMode</code></td>
diff --git a/docs/sparkr.md b/docs/sparkr.md
index 8e6a98e40b680..a34a1200c4c00 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -77,7 +77,7 @@ sparkR.session(master = "local[*]", sparkConfig = list(spark.driver.memory = "2g
 
 The following Spark driver properties can be set in `sparkConfig` with `sparkR.session` from RStudio:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Property Name</th><th>Property group</th><th><code>spark-submit</code> equivalent</th></tr></thead>
   <tr>
     <td><code>spark.master</code></td>
@@ -588,7 +588,7 @@ The following example shows how to save/load a MLlib model by SparkR.
 {% include_example read_write r/ml/ml.R %}
 
 # Data type mapping between R and Spark
-<table class="table table-striped">
+<table>
 <thead><tr><th>R</th><th>Spark</th></tr></thead>
 <tr>
   <td>byte</td>
@@ -728,7 +728,7 @@ function is masking another function.
 
 The following functions are masked by the SparkR package:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Masked function</th><th>How to Access</th></tr></thead>
   <tr>
     <td><code>cov</code> in <code>package:stats</code></td>
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index b01174b918245..c846116ebf3e3 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -233,7 +233,7 @@ Data source options of Avro can be set via:
  * the `.option` method on `DataFrameReader` or `DataFrameWriter`.
  * the `options` parameter in function `from_avro`.
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td><code>avroSchema</code></td>
@@ -331,7 +331,7 @@ Data source options of Avro can be set via:
 
 ## Configuration
 Configuration of Avro can be done using the `setConf` method on SparkSession or by running `SET key=value` commands using SQL.
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td>spark.sql.legacy.replaceDatabricksSparkAvro.enabled</td>
@@ -418,7 +418,7 @@ Submission Guide for more details.
 
 ## Supported types for Avro -> Spark SQL conversion
 Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.2/specification/#primitive-types) and [complex types](https://avro.apache.org/docs/1.11.2/specification/#complex-types) under records of Avro.
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>boolean</td>
@@ -483,7 +483,7 @@ All other union types are considered complex. They will be mapped to StructType
 
 It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.2/specification/#logical-types):
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Avro logical type</b></th><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr></thead>
   <tr>
     <td>date</td>
@@ -516,7 +516,7 @@ At the moment, it ignores docs, aliases and other properties present in the Avro
 ## Supported types for Spark SQL -> Avro conversion
 Spark supports writing of all Spark SQL types into Avro. For most types, the mapping from Spark types to Avro types is straightforward (e.g. IntegerType gets converted to int); however, there are a few special cases which are listed below:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr></thead>
   <tr>
     <td>ByteType</td>
@@ -552,7 +552,7 @@ Spark supports writing of all Spark SQL types into Avro. For most types, the map
 
 You can also specify the whole output Avro schema with the option `avroSchema`, so that Spark SQL types can be converted into other Avro types. The following conversions are not applied by default and require user specified Avro schema:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Spark SQL type</b></th><th><b>Avro type</b></th><th><b>Avro logical type</b></th></tr></thead>
   <tr>
     <td>BinaryType</td>
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 31167f5514302..241aae3571221 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -52,7 +52,7 @@ Data source options of CSV can be set via:
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>sep</code></td>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 0de573ec64b89..13cd8fc2cc056 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -75,7 +75,7 @@ format("serde", "input format", "output format"), e.g. `CREATE TABLE src(id int)
 By default, we will read the table files as plain text. Note that, Hive storage handler is not supported yet when
 creating table, you can create a table using storage handler at Hive side, and use Spark SQL to read it.
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Property Name</th><th>Meaning</th></tr></thead>
   <tr>
     <td><code>fileFormat</code></td>
@@ -123,7 +123,7 @@ will compile against built-in Hive and use those classes for internal execution
 
 The following options can be used to configure the version of Hive that is used to retrieve metadata:
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.sql.hive.metastore.version</code></td>
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index f96776514c672..edcdef4bf0084 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -51,7 +51,7 @@ For connection properties, users can specify the JDBC connection properties in t
 <code>user</code> and <code>password</code> are normally provided as connection properties for
 logging into the data sources.
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>url</code></td>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index 881a69cb1cea4..4ade5170a6d81 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -109,7 +109,7 @@ Data source options of JSON can be set via:
   * `schema_of_json`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <!-- TODO(SPARK-35433): Add timeZone to Data Source Option for CSV, too. -->
diff --git a/docs/sql-data-sources-load-save-functions.md b/docs/sql-data-sources-load-save-functions.md
index 9d0a3f9c72b9a..31f6d944bc972 100644
--- a/docs/sql-data-sources-load-save-functions.md
+++ b/docs/sql-data-sources-load-save-functions.md
@@ -218,7 +218,7 @@ present. It is important to realize that these save modes do not utilize any loc
 atomic. Additionally, when performing an `Overwrite`, the data will be deleted before writing out the
 new data.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Scala/Java</th><th>Any Language</th><th>Meaning</th></tr></thead>
 <tr>
   <td><code>SaveMode.ErrorIfExists</code> (default)</td>
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 4e492598f595d..561f601aa4e56 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -129,7 +129,7 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
 
 ### Configuration
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Since Version</b></th></tr></thead>
   <tr>
     <td><code>spark.sql.orc.impl</code></td>
@@ -230,7 +230,7 @@ Data source options of ORC can be set via:
   * `DataStreamWriter`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>mergeSchema</code></td>
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index 925e47504e5ef..f49bbd7a9d042 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -386,7 +386,7 @@ Data source options of Parquet can be set via:
   * `DataStreamWriter`
 * `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>datetimeRebaseMode</code></td>
@@ -434,7 +434,7 @@ Other generic options can be found in <a href="https://spark.apache.org/docs/lat
 Configuration of Parquet can be done using the `setConf` method on `SparkSession` or by running
 `SET key=value` commands using SQL.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.sql.parquet.binaryAsString</code></td>
diff --git a/docs/sql-data-sources-text.md b/docs/sql-data-sources-text.md
index bb485d29c396a..aed8a2e9942fb 100644
--- a/docs/sql-data-sources-text.md
+++ b/docs/sql-data-sources-text.md
@@ -47,7 +47,7 @@ Data source options of text can be set via:
   *  `DataStreamWriter`
 *  `OPTIONS` clause at [CREATE TABLE USING DATA_SOURCE](sql-ref-syntax-ddl-create-table-datasource.html)
 
-<table class="table table-striped">
+<table>
   <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
   <tr>
     <td><code>wholetext</code></td>
diff --git a/docs/sql-distributed-sql-engine-spark-sql-cli.md b/docs/sql-distributed-sql-engine-spark-sql-cli.md
index a67e009b9ae10..6d506cbb09c21 100644
--- a/docs/sql-distributed-sql-engine-spark-sql-cli.md
+++ b/docs/sql-distributed-sql-engine-spark-sql-cli.md
@@ -62,7 +62,7 @@ For example: `/path/to/spark-sql-cli.sql` equals to `file:///path/to/spark-sql-c
 
 ## Supported comment types
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Comment</th><th>Example</th></tr></thead>
 <tr>
   <td>simple comment</td>
@@ -115,7 +115,7 @@ Use `;` (semicolon) to terminate commands. Notice:
    ```
    However, if ';' is the end of the line, it terminates the SQL statement. The example above will be terminated into  `/* This is a comment contains ` and `*/ SELECT 1`, Spark will submit these two commands separated and throw parser error (`unclosed bracketed comment` and `Syntax error at or near '*/'`).
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Command</th><th>Description</th></tr></thead>
 <tr>
   <td><code>quit</code> or <code>exit</code></td>
diff --git a/docs/sql-error-conditions-sqlstates.md b/docs/sql-error-conditions-sqlstates.md
index 5529c961b3bfb..49cfb56b36626 100644
--- a/docs/sql-error-conditions-sqlstates.md
+++ b/docs/sql-error-conditions-sqlstates.md
@@ -33,7 +33,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 
 ## Class `0A`: feature not supported
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>0A000</td>
@@ -48,7 +48,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `21`: cardinality violation
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>21000</td>
@@ -63,7 +63,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `22`: data exception
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>22003</td>
@@ -168,7 +168,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `23`: integrity constraint violation
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>23505</td>
@@ -183,7 +183,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `2B`: dependent privilege descriptors still exist
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>2BP01</td>
@@ -198,7 +198,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `38`: external routine exception
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>38000</td>
@@ -213,7 +213,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `39`: external routine invocation exception
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>39000</td>
@@ -228,7 +228,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `42`: syntax error or access rule violation
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>42000</td>
@@ -648,7 +648,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `46`: java ddl 1
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>46110</td>
@@ -672,7 +672,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `53`: insufficient resources
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>53200</td>
@@ -687,7 +687,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `54`: program limit exceeded
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>54000</td>
@@ -702,7 +702,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `HY`: CLI-specific condition
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>HY008</td>
@@ -717,7 +717,7 @@ Spark SQL uses the following `SQLSTATE` classes:
 </table>
 ## Class `XX`: internal error
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr></thead>
 <tr>
   <td>XX000</td>
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5cf0b28982c24..88635ee3d1f44 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -469,7 +469,7 @@ license: |
 ## Upgrading from Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.
-    <table class="table table-striped">
+    <table>
         <thead>
           <tr>
             <th>
@@ -583,7 +583,7 @@ license: |
   - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
 
   - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
-    <table class="table table-striped">
+    <table>
     <thead>
       <tr>
         <th>
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 1467409bb500d..2dec65cc553ed 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -34,7 +34,7 @@ memory usage and GC pressure. You can call `spark.catalog.uncacheTable("tableNam
 Configuration of in-memory caching can be done using the `setConf` method on `SparkSession` or by running
 `SET key=value` commands using SQL.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.sql.inMemoryColumnarStorage.compressed</code></td>
@@ -62,7 +62,7 @@ Configuration of in-memory caching can be done using the `setConf` method on `Sp
 The following options can also be used to tune the performance of query execution. It is possible
 that these options will be deprecated in future release as more optimizations are performed automatically.
 
-<table class="table table-striped">
+<table>
   <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.sql.files.maxPartitionBytes</code></td>
@@ -253,7 +253,7 @@ Adaptive Query Execution (AQE) is an optimization technique in Spark SQL that ma
 
 ### Coalescing Post Shuffle Partitions
 This feature coalesces the post shuffle partitions based on the map output statistics when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.coalescePartitions.enabled` configurations are true. This feature simplifies the tuning of shuffle partition number when running queries. You do not need to set a proper shuffle partition number to fit your dataset. Spark can pick the proper shuffle partition number at runtime once you set a large enough initial number of shuffle partitions via `spark.sql.adaptive.coalescePartitions.initialPartitionNum` configuration.
- <table class="table table-striped">
+ <table>
    <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
    <tr>
      <td><code>spark.sql.adaptive.coalescePartitions.enabled</code></td>
@@ -298,7 +298,7 @@ This feature coalesces the post shuffle partitions based on the map output stati
  </table>
 
 ### Spliting skewed shuffle partitions
- <table class="table table-striped">
+ <table>
    <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
    <tr>
      <td><code>spark.sql.adaptive.optimizeSkewsInRebalancePartitions.enabled</code></td>
@@ -320,7 +320,7 @@ This feature coalesces the post shuffle partitions based on the map output stati
 
 ### Converting sort-merge join to broadcast join
 AQE converts sort-merge join to broadcast hash join when the runtime statistics of any join side is smaller than the adaptive broadcast hash join threshold. This is not as efficient as planning a broadcast hash join in the first place, but it's better than keep doing the sort-merge join, as we can save the sorting of both the join sides, and read shuffle files locally to save network traffic(if `spark.sql.adaptive.localShuffleReader.enabled` is true)
-  <table class="table table-striped">
+  <table>
      <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.autoBroadcastJoinThreshold</code></td>
@@ -342,7 +342,7 @@ AQE converts sort-merge join to broadcast hash join when the runtime statistics
 
 ### Converting sort-merge join to shuffled hash join
 AQE converts sort-merge join to shuffled hash join when all post shuffle partitions are smaller than a threshold, the max threshold can see the config `spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold`.
-  <table class="table table-striped">
+  <table>
      <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold</code></td>
@@ -356,7 +356,7 @@ AQE converts sort-merge join to shuffled hash join when all post shuffle partiti
 
 ### Optimizing Skew Join
 Data skew can severely downgrade the performance of join queries. This feature dynamically handles skew in sort-merge join by splitting (and replicating if needed) skewed tasks into roughly evenly sized tasks. It takes effect when both `spark.sql.adaptive.enabled` and `spark.sql.adaptive.skewJoin.enabled` configurations are enabled.
-  <table class="table table-striped">
+  <table>
      <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
      <tr>
        <td><code>spark.sql.adaptive.skewJoin.enabled</code></td>
@@ -393,7 +393,7 @@ Data skew can severely downgrade the performance of join queries. This feature d
    </table>
 
 ### Misc
-  <table class="table table-striped">
+  <table>
     <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
     <tr>
       <td><code>spark.sql.adaptive.optimizer.excludedRules</code></td>
diff --git a/docs/storage-openstack-swift.md b/docs/storage-openstack-swift.md
index 73b21a1f7c27b..5b30786bdd7f9 100644
--- a/docs/storage-openstack-swift.md
+++ b/docs/storage-openstack-swift.md
@@ -60,7 +60,7 @@ required by Keystone.
 The following table contains a list of Keystone mandatory parameters. <code>PROVIDER</code> can be
 any (alphanumeric) name.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Meaning</th><th>Required</th></tr></thead>
 <tr>
   <td><code>fs.swift.service.PROVIDER.auth.url</code></td>
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index 591a4415bb1a5..11a52232510fd 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -243,7 +243,7 @@ interval in the [Spark Streaming Programming Guide](streaming-programming-guide.
 
 The following table summarizes the characteristics of both types of receivers
 
-<table class="table table-striped">
+<table>
 <thead>
 <tr>
   <th>Receiver Type</th>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index f8f98ca54425d..4b93fb7c89ad1 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -433,7 +433,7 @@ Streaming core
 artifact `spark-streaming-xyz_{{site.SCALA_BINARY_VERSION}}` to the dependencies. For example,
 some of the common ones are as follows.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Source</th><th>Artifact</th></tr></thead>
 <tr><td> Kafka </td><td> spark-streaming-kafka-0-10_{{site.SCALA_BINARY_VERSION}} </td></tr>
 <tr><td> Kinesis<br/></td><td>spark-streaming-kinesis-asl_{{site.SCALA_BINARY_VERSION}} [Amazon Software License] </td></tr>
@@ -820,7 +820,7 @@ Similar to that of RDDs, transformations allow the data from the input DStream t
 DStreams support many of the transformations available on normal Spark RDD's.
 Some of the common ones are as follows.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>map</b>(<i>func</i>) </td>
@@ -1109,7 +1109,7 @@ JavaPairDStream<String, Integer> windowedWordCounts = pairs.reduceByKeyAndWindow
 Some of the common window operations are as follows. All of these operations take the
 said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th style="width:25%">Transformation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>window</b>(<i>windowLength</i>, <i>slideInterval</i>) </td>
@@ -1280,7 +1280,7 @@ Since the output operations actually allow the transformed data to be consumed b
 they trigger the actual execution of all the DStream transformations (similar to actions for RDDs).
 Currently, the following output operations are defined:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th style="width:30%">Output Operation</th><th>Meaning</th></tr></thead>
 <tr>
   <td> <b>print</b>()</td>
@@ -2485,7 +2485,7 @@ enabled](#deploying-applications) and reliable receivers, there is zero data los
 
 The following table summarizes the semantics under failures:
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th style="width:30%">Deployment Scenario</th>
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 66e6efb1c8a9f..c5ffdf025b173 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -297,7 +297,7 @@ df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
 </div>
 
 Each row in the source has the following schema:
-<table class="table table-striped">
+<table>
 <thead><tr><th>Column</th><th>Type</th></tr></thead>
 <tr>
   <td>key</td>
@@ -336,7 +336,7 @@ Each row in the source has the following schema:
 The following options must be set for the Kafka source
 for both batch and streaming queries.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
 <tr>
   <td>assign</td>
@@ -368,7 +368,7 @@ for both batch and streaming queries.
 
 The following configurations are optional:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
 <tr>
   <td>startingTimestamp</td>
@@ -607,7 +607,7 @@ The caching key is built up from the following information:
 
 The following properties are available to configure the consumer pool:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.consumer.cache.capacity</td>
@@ -657,7 +657,7 @@ Note that it doesn't leverage Apache Commons Pool due to the difference of chara
 
 The following properties are available to configure the fetched data pool:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.consumer.fetchedData.cache.timeout</td>
@@ -685,7 +685,7 @@ solution to remove duplicates when reading the written data could be to introduc
 that can be used to perform de-duplication when reading.
 
 The Dataframe being written to Kafka should have the following columns in schema:
-<table class="table table-striped">
+<table>
 <thead><tr><th>Column</th><th>Type</th></tr></thead>
 <tr>
   <td>key (optional)</td>
@@ -725,7 +725,7 @@ will be used.
 The following options must be set for the Kafka sink
 for both batch and streaming queries.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
 <tr>
   <td>kafka.bootstrap.servers</td>
@@ -736,7 +736,7 @@ for both batch and streaming queries.
 
 The following configurations are optional:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
 <tr>
   <td>topic</td>
@@ -912,7 +912,7 @@ It will use different Kafka producer when delegation token is renewed; Kafka pro
 
 The following properties are available to configure the producer pool:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td>spark.kafka.producer.cache.timeout</td>
@@ -1039,7 +1039,7 @@ When none of the above applies then unsecure connection assumed.
 
 Delegation tokens can be obtained from multiple clusters and <code>${cluster}</code> is an arbitrary unique identifier which helps to group different configurations.
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>spark.kafka.clusters.${cluster}.auth.bootstrap.servers</code></td>
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 3e87c45a34915..845f0617898b4 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -545,7 +545,7 @@ checkpointed offsets after a failure. See the earlier section on
 [fault-tolerance semantics](#fault-tolerance-semantics).
 Here are the details of all the sources in Spark.
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Source</th>
@@ -1819,7 +1819,7 @@ regarding watermark delays and whether data will be dropped or not.
 
 ##### Support matrix for joins in streaming queries
 
-<table class="table table-striped">
+<table>
 <thead>
   <tr>
     <th>Left Input</th>
@@ -2307,7 +2307,7 @@ to `org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider`.
 
 Here are the configs regarding to RocksDB instance of the state store provider:
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Config Name</th>
@@ -2474,7 +2474,7 @@ More information to be added in future releases.
 Different types of streaming queries support different output modes.
 Here is the compatibility matrix.
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Query Type</th>
@@ -2613,7 +2613,7 @@ meant for debugging purposes only. See the earlier section on
 [fault-tolerance semantics](#fault-tolerance-semantics).
 Here are the details of all the sinks in Spark.
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Sink</th>
@@ -3201,7 +3201,7 @@ The trigger settings of a streaming query define the timing of streaming data pr
 the query is going to be executed as micro-batch query with a fixed batch interval or as a continuous processing query.
 Here are the different kinds of triggers that are supported.
 
-<table class="table table-striped">
+<table>
   <thead>
   <tr>
     <th>Trigger Type</th>
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index becdfb4b18f5d..4821f883eef9d 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -159,7 +159,7 @@ export HADOOP_CONF_DIR=XXX
 
 The master URL passed to Spark can be in one of the following formats:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>Master URL</th><th>Meaning</th></tr></thead>
 <tr><td> <code>local</code> </td><td> Run Spark locally with one worker thread (i.e. no parallelism at all). </td></tr>
 <tr><td> <code>local[K]</code> </td><td> Run Spark locally with K worker threads (ideally, set this to the number of cores on your machine). </td></tr>
diff --git a/docs/web-ui.md b/docs/web-ui.md
index 079bc6137f020..cdf62e0d8ec0b 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -380,7 +380,7 @@ operator shows the number of bytes written by a shuffle.
 
 Here is the list of SQL metrics:
 
-<table class="table table-striped">
+<table>
 <thead><tr><th>SQL metrics</th><th>Meaning</th><th>Operators</th></tr></thead>
 <tr><td> <code>number of output rows</code> </td><td> the number of output rows of the operator </td><td> Aggregate operators, Join operators, Sample, Range, Scan operators, Filter, etc.</td></tr>
 <tr><td> <code>data size</code> </td><td> the size of broadcast/shuffled/collected data of the operator </td><td> BroadcastExchange, ShuffleExchange, Subquery </td></tr>

From 53e2e7bdd618e2a7dec5a84b9d5ae965fb136179 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Fri, 1 Dec 2023 10:28:33 +0800
Subject: [PATCH 134/521] [SPARK-46189][PS][SQL] Perform comparisons and
 arithmetic between same types in various Pandas aggregate functions to avoid
 interpreted mode errors

### What changes were proposed in this pull request?

In various Pandas aggregate functions, remove each comparison or arithmetic operation between `DoubleType` and `IntergerType` in `evaluateExpression` and replace with a comparison or arithmetic operation between `DoubleType` and `DoubleType`.

Affected functions are `PandasStddev`, `PandasVariance`, `PandasSkewness`, `PandasKurtosis`, and `PandasCovar`.

### Why are the changes needed?

These functions fail in interpreted mode. For example, `evaluateExpression` in `PandasKurtosis` compares a double to an integer:
```
If(n < 4, Literal.create(null, DoubleType) ...
```
This results in a boxed double and a boxed integer getting passed to `SQLOrderingUtil.compareDoubles` which expects two doubles as arguments. The scala runtime tries to unbox the boxed integer as a double, resulting in an error.

Reproduction example:
```
spark.sql("set spark.sql.codegen.wholeStage=false")
spark.sql("set spark.sql.codegen.factoryMode=NO_CODEGEN")

import numpy as np
import pandas as pd

import pyspark.pandas as ps

pser = pd.Series([1, 2, 3, 7, 9, 8], index=np.random.rand(6), name="a")
psser = ps.from_pandas(pser)

psser.kurt()
```
See Jira (SPARK-46189) for the other reproduction cases.

This works fine in codegen mode because the integer is already unboxed and the Java runtime will implictly cast it to a double.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New unit tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44099 from bersprockets/unboxing_error.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
(cherry picked from commit 042d8546be5d160e203ad78a8aa2e12e74142338)
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 .../aggregate/CentralMomentAgg.scala          | 16 ++--
 .../expressions/aggregate/Covariance.scala    |  2 +-
 .../aggregate/CentralMomentAggSuite.scala     | 77 +++++++++++++++++++
 .../aggregate/CovarianceAggSuite.scala        | 39 ++++++++++
 .../DeclarativeAggregateEvaluator.scala       | 10 +--
 .../aggregate/TestWithAndWithoutCodegen.scala | 35 +++++++++
 6 files changed, 165 insertions(+), 14 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAggSuite.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CovarianceAggSuite.scala
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/TestWithAndWithoutCodegen.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 133a39d987459..316cb9e0bbc34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -353,7 +353,7 @@ case class PandasStddev(
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === ddof, divideByZeroEvalResult, sqrt(m2 / (n - ddof))))
+      If(n === ddof.toDouble, divideByZeroEvalResult, sqrt(m2 / (n - ddof.toDouble))))
   }
 
   override def prettyName: String = "pandas_stddev"
@@ -375,7 +375,7 @@ case class PandasVariance(
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === ddof, divideByZeroEvalResult, m2 / (n - ddof)))
+      If(n === ddof.toDouble, divideByZeroEvalResult, m2 / (n - ddof.toDouble)))
   }
 
   override def prettyName: String = "pandas_variance"
@@ -405,8 +405,8 @@ case class PandasSkewness(child: Expression)
     val _m2 = If(abs(m2) < 1e-14, Literal(0.0), m2)
     val _m3 = If(abs(m3) < 1e-14, Literal(0.0), m3)
 
-    If(n < 3, Literal.create(null, DoubleType),
-      If(_m2 === 0.0, Literal(0.0), sqrt(n - 1) * (n / (n - 2)) * _m3 / sqrt(_m2 * _m2 * _m2)))
+    If(n < 3.0, Literal.create(null, DoubleType),
+      If(_m2 === 0.0, Literal(0.0), sqrt(n - 1.0) * (n / (n - 2.0)) * _m3 / sqrt(_m2 * _m2 * _m2)))
   }
 
   override protected def withNewChildInternal(newChild: Expression): PandasSkewness =
@@ -423,9 +423,9 @@ case class PandasKurtosis(child: Expression)
   override protected def momentOrder = 4
 
   override val evaluateExpression: Expression = {
-    val adj = ((n - 1) / (n - 2)) * ((n - 1) / (n - 3)) * 3
-    val numerator = n * (n + 1) * (n - 1) * m4
-    val denominator = (n - 2) * (n - 3) * m2 * m2
+    val adj = ((n - 1.0) / (n - 2.0)) * ((n - 1.0) / (n - 3.0)) * 3.0
+    val numerator = n * (n + 1.0) * (n - 1.0) * m4
+    val denominator = (n - 2.0) * (n - 3.0) * m2 * m2
 
     // floating point error
     //
@@ -436,7 +436,7 @@ case class PandasKurtosis(child: Expression)
     val _numerator = If(abs(numerator) < 1e-14, Literal(0.0), numerator)
     val _denominator = If(abs(denominator) < 1e-14, Literal(0.0), denominator)
 
-    If(n < 4, Literal.create(null, DoubleType),
+    If(n < 4.0, Literal.create(null, DoubleType),
       If(_denominator === 0.0, Literal(0.0), _numerator / _denominator - adj))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index ff31fb1128b9b..b392b603ab8d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -156,7 +156,7 @@ case class PandasCovar(
 
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
-      If(n === ddof, divideByZeroEvalResult, ck / (n - ddof)))
+      If(n === ddof.toDouble, divideByZeroEvalResult, ck / (n - ddof.toDouble)))
   }
   override def prettyName: String = "pandas_covar"
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAggSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAggSuite.scala
new file mode 100644
index 0000000000000..daf3ede0d0369
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAggSuite.scala
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.types.DoubleType
+
+class CentralMomentAggSuite extends TestWithAndWithoutCodegen {
+  val input = AttributeReference("input", DoubleType, nullable = true)()
+
+  testBothCodegenAndInterpreted("SPARK-46189: pandas_kurtosis eval") {
+    val evaluator = DeclarativeAggregateEvaluator(PandasKurtosis(input), Seq(input))
+    val buffer = evaluator.update(
+      InternalRow(1.0d),
+      InternalRow(2.0d),
+      InternalRow(3.0d),
+      InternalRow(7.0d),
+      InternalRow(9.0d),
+      InternalRow(8.0d))
+    val result = evaluator.eval(buffer)
+    assert(result === InternalRow(-2.5772889417360285d))
+  }
+
+  testBothCodegenAndInterpreted("SPARK-46189: pandas_skew eval") {
+    val evaluator = DeclarativeAggregateEvaluator(PandasSkewness(input), Seq(input))
+    val buffer = evaluator.update(
+      InternalRow(1.0d),
+      InternalRow(2.0d),
+      InternalRow(2.0d),
+      InternalRow(2.0d),
+      InternalRow(2.0d),
+      InternalRow(100.0d))
+    val result = evaluator.eval(buffer)
+    assert(result === InternalRow(2.4489389171333733d))
+  }
+
+  testBothCodegenAndInterpreted("SPARK-46189: pandas_stddev eval") {
+    val evaluator = DeclarativeAggregateEvaluator(PandasStddev(input, 1), Seq(input))
+    val buffer = evaluator.update(
+      InternalRow(1.0d),
+      InternalRow(2.0d),
+      InternalRow(3.0d),
+      InternalRow(7.0d),
+      InternalRow(9.0d),
+      InternalRow(8.0d))
+    val result = evaluator.eval(buffer)
+    assert(result === InternalRow(3.40587727318528d))
+  }
+
+  testBothCodegenAndInterpreted("SPARK-46189: pandas_variance eval") {
+    val evaluator = DeclarativeAggregateEvaluator(PandasVariance(input, 1), Seq(input))
+    val buffer = evaluator.update(
+      InternalRow(1.0d),
+      InternalRow(2.0d),
+      InternalRow(3.0d),
+      InternalRow(7.0d),
+      InternalRow(9.0d),
+      InternalRow(8.0d))
+    val result = evaluator.eval(buffer)
+    assert(result === InternalRow(11.6d))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CovarianceAggSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CovarianceAggSuite.scala
new file mode 100644
index 0000000000000..2df053184c2b4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CovarianceAggSuite.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.types.DoubleType
+
+class CovarianceAggSuite extends TestWithAndWithoutCodegen {
+  val a = AttributeReference("a", DoubleType, nullable = true)()
+  val b = AttributeReference("b", DoubleType, nullable = true)()
+
+  testBothCodegenAndInterpreted("SPARK-46189: pandas_covar eval") {
+    val evaluator = DeclarativeAggregateEvaluator(PandasCovar(a, b, 1), Seq(a, b))
+    val buffer = evaluator.update(
+      InternalRow(1.0d, 1.0d),
+      InternalRow(2.0d, 2.0d),
+      InternalRow(3.0d, 3.0d),
+      InternalRow(7.0d, 7.0d),
+      InternalRow(9.0, 9.0),
+      InternalRow(8.0d, 6.0))
+    val result = evaluator.eval(buffer)
+    assert(result === InternalRow(10.4d))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
index b0f55b3b5c443..ac80e1419a99d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/DeclarativeAggregateEvaluator.scala
@@ -17,24 +17,24 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, JoinedRow, SafeProjection}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, JoinedRow, MutableProjection}
 
 /**
  * Evaluator for a [[DeclarativeAggregate]].
  */
 case class DeclarativeAggregateEvaluator(function: DeclarativeAggregate, input: Seq[Attribute]) {
 
-  lazy val initializer = SafeProjection.create(function.initialValues)
+  lazy val initializer = MutableProjection.create(function.initialValues)
 
-  lazy val updater = SafeProjection.create(
+  lazy val updater = MutableProjection.create(
     function.updateExpressions,
     function.aggBufferAttributes ++ input)
 
-  lazy val merger = SafeProjection.create(
+  lazy val merger = MutableProjection.create(
     function.mergeExpressions,
     function.aggBufferAttributes ++ function.inputAggBufferAttributes)
 
-  lazy val evaluator = SafeProjection.create(
+  lazy val evaluator = MutableProjection.create(
     function.evaluateExpression :: Nil,
     function.aggBufferAttributes)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/TestWithAndWithoutCodegen.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/TestWithAndWithoutCodegen.scala
new file mode 100644
index 0000000000000..b43b160146eb4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/TestWithAndWithoutCodegen.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.internal.SQLConf
+
+trait TestWithAndWithoutCodegen extends SparkFunSuite with SQLHelper {
+  def testBothCodegenAndInterpreted(name: String)(f: => Unit): Unit = {
+    val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
+    for (fallbackMode <- modes) {
+      test(s"$name with $fallbackMode") {
+        withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
+          f
+        }
+      }
+    }
+  }
+}

From fde0fe676358fb3e9142d6895e4c7fc2e6604d5e Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Sat, 2 Dec 2023 11:25:54 -0800
Subject: [PATCH 135/521] [SPARK-45975][SQL][TESTS][3.5] Reset
 storeAssignmentPolicy to original

### What changes were proposed in this pull request?
Reset storeAssignmentPolicy to original in HiveCompatibilitySuite.

### Why are the changes needed?
STORE_ASSIGNMENT_POLICY was not reset in HiveCompatibilitySuite, causing subsequent test cases to fail.

Details: https://github.com/wForget/spark/actions/runs/6902668865/job/18779862759

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44126 from LuciferYang/SPARK-45943-FOLLOWUP.

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/execution/HiveCompatibilitySuite.scala      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index bd323dc4b24e1..0467603c01cd0 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -41,6 +41,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
   private val originalSessionLocalTimeZone = TestHive.conf.sessionLocalTimeZone
   private val originalAnsiMode = TestHive.conf.getConf(SQLConf.ANSI_ENABLED)
+  private val originalStoreAssignmentPolicy =
+    TestHive.conf.getConf(SQLConf.STORE_ASSIGNMENT_POLICY)
   private val originalCreateHiveTable =
     TestHive.conf.getConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT)
 
@@ -76,6 +78,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.setConf(SQLConf.SESSION_LOCAL_TIMEZONE, originalSessionLocalTimeZone)
       TestHive.setConf(SQLConf.ANSI_ENABLED, originalAnsiMode)
+      TestHive.setConf(SQLConf.STORE_ASSIGNMENT_POLICY, originalStoreAssignmentPolicy)
       TestHive.setConf(SQLConf.LEGACY_CREATE_HIVE_TABLE_BY_DEFAULT, originalCreateHiveTable)
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules

From 273ef5708fc33872cfe3091627617bbac8fdd56f Mon Sep 17 00:00:00 2001
From: Xingbo Jiang <xingbo.jiang@databricks.com>
Date: Sun, 3 Dec 2023 22:08:20 -0800
Subject: [PATCH 136/521] [SPARK-46182][CORE] Track `lastTaskFinishTime` using
 the exact task finished event

### What changes were proposed in this pull request?

We found a race condition between lastTaskRunningTime and lastShuffleMigrationTime that could lead to a decommissioned executor exit before all the shuffle blocks have been discovered. The issue could lead to immediate task retry right after an executor exit, thus longer query execution time.

To fix the issue, we choose to update the lastTaskRunningTime only when a task updates its status to finished through the StatusUpdate event. This is better than the current approach (which use a thread to check for number of running tasks every second), because in this way we clearly know whether the shuffle block refresh happened after all tasks finished running or not, thus resolved the race condition mentioned above.

### Why are the changes needed?

To fix a race condition that could lead to shuffle data lost, thus longer query execution time.

### How was this patch tested?

This is a very subtle race condition that is hard to write a unit test using current unit test framework. And we are confident the change is low risk. Thus only verify by passing all the existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44090 from jiangxb1987/SPARK-46182.

Authored-by: Xingbo Jiang <xingbo.jiang@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6f112f7b1a50a2b8a59952c69f67dd5f80ab6633)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../executor/CoarseGrainedExecutorBackend.scala  | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index c695a9ec2851b..537522326fc78 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -21,7 +21,7 @@ import java.net.URL
 import java.nio.ByteBuffer
 import java.util.Locale
 import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong}
 
 import scala.util.{Failure, Success}
 import scala.util.control.NonFatal
@@ -80,6 +80,10 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   private var decommissioned = false
 
+  // Track the last time in ns that at least one task is running. If no task is running and all
+  // shuffle/RDD data migration are done, the decommissioned executor should exit.
+  private var lastTaskFinishTime = new AtomicLong(System.nanoTime())
+
   override def onStart(): Unit = {
     if (env.conf.get(DECOMMISSION_ENABLED)) {
       val signal = env.conf.get(EXECUTOR_DECOMMISSION_SIGNAL)
@@ -269,6 +273,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     val msg = StatusUpdate(executorId, taskId, state, data, cpus, resources)
     if (TaskState.isFinished(state)) {
       taskResources.remove(taskId)
+      lastTaskFinishTime.set(System.nanoTime())
     }
     driver match {
       case Some(driverRef) => driverRef.send(msg)
@@ -341,7 +346,6 @@ private[spark] class CoarseGrainedExecutorBackend(
 
       val shutdownThread = new Thread("wait-for-blocks-to-migrate") {
         override def run(): Unit = {
-          var lastTaskRunningTime = System.nanoTime()
           val sleep_time = 1000 // 1s
           // This config is internal and only used by unit tests to force an executor
           // to hang around for longer when decommissioned.
@@ -358,7 +362,7 @@ private[spark] class CoarseGrainedExecutorBackend(
                 val (migrationTime, allBlocksMigrated) = env.blockManager.lastMigrationInfo()
                 // We can only trust allBlocksMigrated boolean value if there were no tasks running
                 // since the start of computing it.
-                if (allBlocksMigrated && (migrationTime > lastTaskRunningTime)) {
+                if (allBlocksMigrated && (migrationTime > lastTaskFinishTime.get())) {
                   logInfo("No running tasks, all blocks migrated, stopping.")
                   exitExecutor(0, ExecutorLossMessage.decommissionFinished, notifyDriver = true)
                 } else {
@@ -370,12 +374,6 @@ private[spark] class CoarseGrainedExecutorBackend(
               }
             } else {
               logInfo(s"Blocked from shutdown by ${executor.numRunningTasks} running tasks")
-              // If there is a running task it could store blocks, so make sure we wait for a
-              // migration loop to complete after the last task is done.
-              // Note: this is only advanced if there is a running task, if there
-              // is no running task but the blocks are not done migrating this does not
-              // move forward.
-              lastTaskRunningTime = System.nanoTime()
             }
             Thread.sleep(sleep_time)
           }

From 97472c91ed5660c5af862e8da99d44a1c24f2815 Mon Sep 17 00:00:00 2001
From: Johan Lasperas <johan.lasperas@databricks.com>
Date: Mon, 4 Dec 2023 08:58:03 -0800
Subject: [PATCH 137/521] [SPARK-46092][SQL][3.5] Don't push down Parquet row
 group filters that overflow

This is a cherry-pick from https://github.com/apache/spark/pull/44006 to spark 3.5

### What changes were proposed in this pull request?
This change adds a check for overflows when creating Parquet row group filters on an INT32 (byte/short/int) parquet type to avoid incorrectly skipping row groups if the predicate value doesn't fit in an INT. This can happen if the read schema is specified as LONG, e.g via `.schema("col LONG")`
While the Parquet readers don't support reading INT32 into a LONG, the overflow can lead to row groups being incorrectly skipped, bypassing the reader altogether and producing incorrect results instead of failing.

### Why are the changes needed?
Reading a parquet file containing INT32 values with a read schema specified as LONG can produce incorrect results today:
```
Seq(0).toDF("a").write.parquet(path)
spark.read.schema("a LONG").parquet(path).where(s"a < ${Long.MaxValue}").collect()
```
will return an empty result. The correct result is either:
- Failing the query if the parquet reader doesn't support upcasting integers to longs (all parquet readers in Spark today)
- Return result `[0]` if the parquet reader supports that upcast (no readers in Spark as of now, but I'm looking into adding this capability).

### Does this PR introduce _any_ user-facing change?
The following:
```
Seq(0).toDF("a").write.parquet(path)
spark.read.schema("a LONG").parquet(path).where(s"a < ${Long.MaxValue}").collect()
```
produces an (incorrect) empty result before this change. After this change, the read will fail, raising an error about the unsupported conversion from INT to LONG in the parquet reader.

### How was this patch tested?
- Added tests to `ParquetFilterSuite` to ensure that no row group filter is created when the predicate value overflows or when the value type isn't compatible with the parquet type
- Added test to `ParquetQuerySuite` covering the correctness issue described above.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44154 from johanl-db/SPARK-46092-row-group-skipping-overflow-3.5.

Authored-by: Johan Lasperas <johan.lasperas@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../datasources/parquet/ParquetFilters.scala  | 10 ++-
 .../parquet/ParquetFilterSuite.scala          | 71 +++++++++++++++++++
 .../parquet/ParquetQuerySuite.scala           | 20 ++++++
 3 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index 5899b6621ad8e..0983841dc8c2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.lang.{Boolean => JBoolean, Double => JDouble, Float => JFloat, Long => JLong}
+import java.lang.{Boolean => JBoolean, Byte => JByte, Double => JDouble, Float => JFloat, Long => JLong, Short => JShort}
 import java.math.{BigDecimal => JBigDecimal}
 import java.nio.charset.StandardCharsets.UTF_8
 import java.sql.{Date, Timestamp}
@@ -612,7 +612,13 @@ class ParquetFilters(
     value == null || (nameToParquetField(name).fieldType match {
       case ParquetBooleanType => value.isInstanceOf[JBoolean]
       case ParquetIntegerType if value.isInstanceOf[Period] => true
-      case ParquetByteType | ParquetShortType | ParquetIntegerType => value.isInstanceOf[Number]
+      case ParquetByteType | ParquetShortType | ParquetIntegerType => value match {
+        // Byte/Short/Int are all stored as INT32 in Parquet so filters are built using type Int.
+        // We don't create a filter if the value would overflow.
+        case _: JByte | _: JShort | _: Integer => true
+        case v: JLong => v.longValue() >= Int.MinValue && v.longValue() <= Int.MaxValue
+        case _ => false
+      }
       case ParquetLongType => value.isInstanceOf[JLong] || value.isInstanceOf[Duration]
       case ParquetFloatType => value.isInstanceOf[JFloat]
       case ParquetDoubleType => value.isInstanceOf[JDouble]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 269a3efb7360c..8e88049f51e10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import java.io.File
+import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
 import java.math.{BigDecimal => JBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
@@ -901,6 +902,76 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     }
   }
 
+  test("don't push down filters that would result in overflows") {
+    val schema = StructType(Seq(
+      StructField("cbyte", ByteType),
+      StructField("cshort", ShortType),
+      StructField("cint", IntegerType)
+    ))
+
+    val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
+    val parquetFilters = createParquetFilters(parquetSchema)
+
+    for {
+      column <- Seq("cbyte", "cshort", "cint")
+      value <- Seq(JLong.MAX_VALUE, JLong.MIN_VALUE).map(JLong.valueOf)
+    } {
+      val filters = Seq(
+        sources.LessThan(column, value),
+        sources.LessThanOrEqual(column, value),
+        sources.GreaterThan(column, value),
+        sources.GreaterThanOrEqual(column, value),
+        sources.EqualTo(column, value),
+        sources.EqualNullSafe(column, value),
+        sources.Not(sources.EqualTo(column, value)),
+        sources.In(column, Array(value))
+      )
+      for (filter <- filters) {
+        assert(parquetFilters.createFilter(filter).isEmpty,
+          s"Row group filter $filter shouldn't be pushed down.")
+      }
+    }
+  }
+
+  test("don't push down filters when value type doesn't match column type") {
+    val schema = StructType(Seq(
+      StructField("cbyte", ByteType),
+      StructField("cshort", ShortType),
+      StructField("cint", IntegerType),
+      StructField("clong", LongType),
+      StructField("cfloat", FloatType),
+      StructField("cdouble", DoubleType),
+      StructField("cboolean", BooleanType),
+      StructField("cstring", StringType),
+      StructField("cdate", DateType),
+      StructField("ctimestamp", TimestampType),
+      StructField("cbinary", BinaryType),
+      StructField("cdecimal", DecimalType(10, 0))
+    ))
+
+    val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
+    val parquetFilters = createParquetFilters(parquetSchema)
+
+    val filters = Seq(
+      sources.LessThan("cbyte", String.valueOf("1")),
+      sources.LessThan("cshort", JBigDecimal.valueOf(1)),
+      sources.LessThan("cint", JFloat.valueOf(JFloat.NaN)),
+      sources.LessThan("clong", String.valueOf("1")),
+      sources.LessThan("cfloat", JDouble.valueOf(1.0D)),
+      sources.LessThan("cdouble", JFloat.valueOf(1.0F)),
+      sources.LessThan("cboolean", String.valueOf("true")),
+      sources.LessThan("cstring", Integer.valueOf(1)),
+      sources.LessThan("cdate", Timestamp.valueOf("2018-01-01 00:00:00")),
+      sources.LessThan("ctimestamp", Date.valueOf("2018-01-01")),
+      sources.LessThan("cbinary", Integer.valueOf(1)),
+      sources.LessThan("cdecimal", Integer.valueOf(1234))
+    )
+    for (filter <- filters) {
+      assert(parquetFilters.createFilter(filter).isEmpty,
+        s"Row group filter $filter shouldn't be pushed down.")
+    }
+  }
+
   test("SPARK-6554: don't push down predicates which reference partition columns") {
     import testImplicits._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 2e7b26126d24f..ea5444a1791fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -1095,6 +1095,26 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
+  test("row group skipping doesn't overflow when reading into larger type") {
+    withTempPath { path =>
+      Seq(0).toDF("a").write.parquet(path.toString)
+      // The vectorized and non-vectorized readers will produce different exceptions, we don't need
+      // to test both as this covers row group skipping.
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+        // Reading integer 'a' as a long isn't supported. Check that an exception is raised instead
+        // of incorrectly skipping the single row group and producing incorrect results.
+        val exception = intercept[SparkException] {
+          spark.read
+            .schema("a LONG")
+            .parquet(path.toString)
+            .where(s"a < ${Long.MaxValue}")
+            .collect()
+        }
+        assert(exception.getCause.getCause.isInstanceOf[SchemaColumnConvertNotSupportedException])
+      }
+    }
+  }
+
   test("SPARK-36825, SPARK-36852: create table with ANSI intervals") {
     withTable("tbl") {
       sql("create table tbl (c1 interval day, c2 interval year to month) using parquet")

From 1321b4e64deaa1e58bf297c25b72319083056568 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 4 Dec 2023 14:41:27 -0800
Subject: [PATCH 138/521] [SPARK-46239][CORE] Hide `Jetty` info

**What changes were proposed in this pull request?**
The PR sets parameters to hide the version of  jetty in spark.

**Why are the changes needed?**
It can avoid obtaining remote WWW service information through HTTP.

**Does this PR introduce any user-facing change?**
No

**How was this patch tested?**
Manual review

**Was this patch authored or co-authored using generative AI tooling?**
No

Closes #44158 from chenyu-opensource/branch-SPARK-46239.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: chenyu <119398199+chenyu-opensource@users.noreply.github.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit ff4f59341215b7f3a87e6cd8798d49e25562fcd6)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/ui/JettyUtils.scala | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 9582bdbf52641..21753361e627a 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -312,6 +312,12 @@ private[spark] object JettyUtils extends Logging {
       logDebug(s"Using requestHeaderSize: $requestHeaderSize")
       httpConfig.setRequestHeaderSize(requestHeaderSize)
 
+      // Hide information.
+      logDebug("Using setSendServerVersion: false")
+      httpConfig.setSendServerVersion(false)
+      logDebug("Using setSendXPoweredBy: false")
+      httpConfig.setSendXPoweredBy(false)
+
       // If SSL is configured, create the secure connector first.
       val securePort = sslOptions.createJettySslContextFactory().map { factory =>
         val securePort = sslOptions.port.getOrElse(if (port > 0) Utils.userPort(port, 400) else 0)

From b5cbe1fcdb464fc064ffb5fbef3edfa408d6638f Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 6 Dec 2023 10:46:31 -0800
Subject: [PATCH 139/521] [SPARK-46286][DOCS] Document
 `spark.io.compression.zstd.bufferPool.enabled`

This PR adds spark.io.compression.zstd.bufferPool.enabled to documentation

- Missing docs
- https://github.com/apache/spark/pull/31502#issuecomment-774792276 potential regression

no

doc build

no

Closes #44207 from yaooqinn/SPARK-46286.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6b6980de451e655ef4b9f63d502b73c09a513d4c)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/configuration.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/configuration.md b/docs/configuration.md
index 248f9333c9a3b..f79406c5b6d89 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1752,6 +1752,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.io.compression.zstd.bufferPool.enabled</code></td>
+  <td>true</td>
+  <td>
+    If true, enable buffer pool of ZSTD JNI library.
+  </td>
+  <td>3.2.0</td>
+</tr>
 <tr>
   <td><code>spark.kryo.classesToRegister</code></td>
   <td>(none)</td>

From a697725d99a0177a2b1fbb0607e859ac10af1c4e Mon Sep 17 00:00:00 2001
From: Nick Young <nick.young@databricks.com>
Date: Wed, 6 Dec 2023 15:20:19 -0800
Subject: [PATCH 140/521] [SPARK-46274][SQL] Fix Range operator computeStats()
 to check long validity before converting

### What changes were proposed in this pull request?

Range operator's `computeStats()` function unsafely casts from `BigInt` to `Long` and causes issues downstream with statistics estimation. Adds bounds checking to avoid crashing.

### Why are the changes needed?

Downstream statistics estimation will crash and fail loudly; to avoid this and help maintain clean code we should fix this.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

UT

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44191 from n-young-db/range-compute-stats.

Authored-by: Nick Young <nick.young@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 9fd575ae46f8a4dbd7da18887a44c693d8788332)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../plans/logical/basicLogicalOperators.scala    | 12 +++++++-----
 .../BasicStatsEstimationSuite.scala              | 16 ++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index b4d7716a566e4..58c03ee72d6dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1063,10 +1063,12 @@ case class Range(
     if (numElements == 0) {
       Statistics(sizeInBytes = 0, rowCount = Some(0))
     } else {
-      val (minVal, maxVal) = if (step > 0) {
-        (start, start + (numElements - 1) * step)
+      val (minVal, maxVal) = if (!numElements.isValidLong) {
+        (None, None)
+      } else if (step > 0) {
+        (Some(start), Some(start + (numElements.toLong - 1) * step))
       } else {
-        (start + (numElements - 1) * step, start)
+        (Some(start + (numElements.toLong - 1) * step), Some(start))
       }
 
       val histogram = if (conf.histogramEnabled) {
@@ -1077,8 +1079,8 @@ case class Range(
 
       val colStat = ColumnStat(
         distinctCount = Some(numElements),
-        max = Some(maxVal),
-        min = Some(minVal),
+        max = maxVal,
+        min = minVal,
         nullCount = Some(0),
         avgLen = Some(LongType.defaultSize),
         maxLen = Some(LongType.defaultSize),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index 33e521eb65a57..d1276615c5faa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -176,6 +176,22 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
         expectedStatsCboOff = rangeStats, extraConfig)
   }
 
+test("range with invalid long value") {
+  val numElements = BigInt(Long.MaxValue) - BigInt(Long.MinValue)
+  val range = Range(Long.MinValue, Long.MaxValue, 1, None)
+  val rangeAttrs = AttributeMap(range.output.map(attr =>
+    (attr, ColumnStat(
+      distinctCount = Some(numElements),
+      nullCount = Some(0),
+      maxLen = Some(LongType.defaultSize),
+      avgLen = Some(LongType.defaultSize)))))
+  val rangeStats = Statistics(
+    sizeInBytes = numElements * 8,
+    rowCount = Some(numElements),
+    attributeStats = rangeAttrs)
+  checkStats(range, rangeStats, rangeStats)
+}
+
   test("windows") {
     val windows = plan.window(Seq(min(attribute).as("sum_attr")), Seq(attribute), Nil)
     val windowsStats = Statistics(sizeInBytes = plan.size.get * (4 + 4 + 8) / (4 + 8))

From dbb61981b804dbc03cf140c7c76653348e2ac740 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 6 Dec 2023 15:24:48 -0800
Subject: [PATCH 141/521] [SPARK-45580][SQL][3.5] Handle case where a nested
 subquery becomes an existence join

### What changes were proposed in this pull request?

This is a back-port of #44193.

In `RewritePredicateSubquery`, prune existence flags from the final join when `rewriteExistentialExpr` returns an existence join. This change prunes the flags (attributes with the name "exists") by adding a `Project` node.

For example:
```
Join LeftSemi, ((a#13 = c1#15) OR exists#19)
:- Join ExistenceJoin(exists#19), (a#13 = col1#17)
:  :- LocalRelation [a#13]
:  +- LocalRelation [col1#17]
+- LocalRelation [c1#15]
```
becomes
```
Project [a#13]
+- Join LeftSemi, ((a#13 = c1#15) OR exists#19)
   :- Join ExistenceJoin(exists#19), (a#13 = col1#17)
   :  :- LocalRelation [a#13]
   :  +- LocalRelation [col1#17]
   +- LocalRelation [c1#15]
```
This change always adds the `Project` node, whether `rewriteExistentialExpr` returns an existence join or not. In the case when `rewriteExistentialExpr` does not return an existence join, `RemoveNoopOperators` will remove the unneeded `Project` node.

### Why are the changes needed?

This query returns an extraneous boolean column when run in spark-sql:
```
create or replace temp view t1(a) as values (1), (2), (3), (7);
create or replace temp view t2(c1) as values (1), (2), (3);
create or replace temp view t3(col1) as values (3), (9);

select *
from t1
where exists (
  select c1
  from t2
  where a = c1
  or a in (select col1 from t3)
);

1	false
2	false
3	true
```
(Note: the above query will not have the extraneous boolean column when run from the Dataset API. That is because the Dataset API truncates the rows based on the schema of the analyzed plan. The bug occurs during optimization).

This query fails when run in either spark-sql or using the Dataset API:
```
select (
  select *
  from t1
  where exists (
    select c1
    from t2
    where a = c1
    or a in (select col1 from t3)
  )
  limit 1
)
from range(1);

java.lang.AssertionError: assertion failed: Expects 1 field, but got 2; something went wrong in analysis
```

### Does this PR introduce _any_ user-facing change?

No, except for the removal of the extraneous boolean flag and the fix to the error condition.

### How was this patch tested?

New unit test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44215 from bersprockets/schema_change_br35.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/optimizer/subquery.scala     |  9 ++--
 .../org/apache/spark/sql/SubquerySuite.scala  | 46 +++++++++++++++++++
 2 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 91cd838ad617a..ee20053157816 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -118,16 +118,19 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       withSubquery.foldLeft(newFilter) {
         case (p, Exists(sub, _, _, conditions, subHint)) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          buildJoin(outerPlan, sub, LeftSemi, joinCond, subHint)
+          val join = buildJoin(outerPlan, sub, LeftSemi, joinCond, subHint)
+          Project(p.output, join)
         case (p, Not(Exists(sub, _, _, conditions, subHint))) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          buildJoin(outerPlan, sub, LeftAnti, joinCond, subHint)
+          val join = buildJoin(outerPlan, sub, LeftAnti, joinCond, subHint)
+          Project(p.output, join)
         case (p, InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint))) =>
           // Deduplicate conflicting attributes if any.
           val newSub = dedupSubqueryOnSelfJoin(p, sub, Some(values))
           val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
-          Join(outerPlan, newSub, LeftSemi, joinCond, JoinHint(None, subHint))
+          val join = Join(outerPlan, newSub, LeftSemi, joinCond, JoinHint(None, subHint))
+          Project(p.output, join)
         case (p, Not(InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint)))) =>
           // This is a NULL-aware (left) anti join (NAAJ) e.g. col NOT IN expr
           // Construct the condition. A NULL in one of the conditions is regarded as a positive
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index a7a0f6156cb1d..fbc256b33968a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -2736,4 +2736,50 @@ class SubquerySuite extends QueryTest
         Row(1, "a", 3) :: Row(2, "a", 3) :: Row(3, "a", 3) :: Nil)
     }
   }
+
+  test("SPARK-45580: Handle case where a nested subquery becomes an existence join") {
+    withTempView("t1", "t2", "t3") {
+      Seq((1), (2), (3), (7)).toDF("a").persist().createOrReplaceTempView("t1")
+      Seq((1), (2), (3)).toDF("c1").persist().createOrReplaceTempView("t2")
+      Seq((3), (9)).toDF("col1").persist().createOrReplaceTempView("t3")
+
+      val query1 =
+        """
+          |SELECT *
+          |FROM t1
+          |WHERE EXISTS (
+          |  SELECT c1
+          |  FROM t2
+          |  WHERE a = c1
+          |  OR a IN (SELECT col1 FROM t3)
+          |)""".stripMargin
+      val df1 = sql(query1)
+      checkAnswer(df1, Row(1) :: Row(2) :: Row(3) :: Nil)
+
+      val query2 =
+        """
+          |SELECT *
+          |FROM t1
+          |WHERE a IN (
+          |  SELECT c1
+          |  FROM t2
+          |  where a IN (SELECT col1 FROM t3)
+          |)""".stripMargin
+      val df2 = sql(query2)
+      checkAnswer(df2, Row(3))
+
+      val query3 =
+        """
+          |SELECT *
+          |FROM t1
+          |WHERE NOT EXISTS (
+          |  SELECT c1
+          |  FROM t2
+          |  WHERE a = c1
+          |  OR a IN (SELECT col1 FROM t3)
+          |)""".stripMargin
+      val df3 = sql(query3)
+      checkAnswer(df3, Row(7))
+    }
+  }
 }

From ab14430523473528bafa41d8f10bc33efbb74493 Mon Sep 17 00:00:00 2001
From: Raghu Angadi <raghu.angadi@databricks.com>
Date: Fri, 8 Dec 2023 16:40:27 +0900
Subject: [PATCH 142/521] [SPARK-46275] Protobuf: Return null in permissive
 mode when deserialization fails

### What changes were proposed in this pull request?
This updates the the behavior of `from_protobuf()` built function when underlying record fails to deserialize.

  * **Current behvior**:
    * By default, this would throw an error and the query fails. [This part is not changed in the PR]
    * When `mode` is set to 'PERMISSIVE' it returns a non-null struct with each of the inner fields set to null e.g. `{ "field_a": null, "field_b": null }`  etc.
       * This is not very convenient to the users. They don't know if this was due to malformed record or if the input itself has null. It is very hard to check for each field for null in SQL query (imagine a sql query with a struct that has 10 fields).

  * **New behavior**
    * When `mode` is set to 'PERMISSIVE' it simply returns `null`.

### Why are the changes needed?
This makes it easier for users to detect and handle malformed records.

### Does this PR introduce _any_ user-facing change?
Yes, but this does not change the contract. In fact, it clarifies it.

### How was this patch tested?
 - Unit tests are updated.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44214 from rangadi/protobuf-null.

Authored-by: Raghu Angadi <raghu.angadi@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 309c796876f310f8604292d84acc12e711ba7031)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/protobuf/ProtobufDataToCatalyst.scala | 31 +++----------------
 .../ProtobufCatalystDataConversionSuite.scala | 13 +-------
 2 files changed, 6 insertions(+), 38 deletions(-)

diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
index 5c4a5ff068968..d2417674837be 100644
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
@@ -22,12 +22,12 @@ import scala.util.control.NonFatal
 import com.google.protobuf.DynamicMessage
 import com.google.protobuf.TypeRegistry
 
-import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, SpecificInternalRow, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.protobuf.utils.{ProtobufOptions, ProtobufUtils, SchemaConverters}
-import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, StructType}
+import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType}
 
 private[sql] case class ProtobufDataToCatalyst(
     child: Expression,
@@ -39,16 +39,8 @@ private[sql] case class ProtobufDataToCatalyst(
 
   override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
 
-  override lazy val dataType: DataType = {
-    val dt = SchemaConverters.toSqlType(messageDescriptor, protobufOptions).dataType
-    parseMode match {
-      // With PermissiveMode, the output Catalyst row might contain columns of null values for
-      // corrupt records, even if some of the columns are not nullable in the user-provided schema.
-      // Therefore we force the schema to be all nullable here.
-      case PermissiveMode => dt.asNullable
-      case _ => dt
-    }
-  }
+  override lazy val dataType: DataType =
+    SchemaConverters.toSqlType(messageDescriptor, protobufOptions).dataType
 
   override def nullable: Boolean = true
 
@@ -87,22 +79,9 @@ private[sql] case class ProtobufDataToCatalyst(
     mode
   }
 
-  @transient private lazy val nullResultRow: Any = dataType match {
-    case st: StructType =>
-      val resultRow = new SpecificInternalRow(st.map(_.dataType))
-      for (i <- 0 until st.length) {
-        resultRow.setNullAt(i)
-      }
-      resultRow
-
-    case _ =>
-      null
-  }
-
   private def handleException(e: Throwable): Any = {
     parseMode match {
-      case PermissiveMode =>
-        nullResultRow
+      case PermissiveMode => null
       case FailFastMode =>
         throw QueryExecutionErrors.malformedProtobufMessageDetectedInMessageParsingError(e)
       case _ =>
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
index b7f17fece5fa6..62d0efd7459b2 100644
--- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
@@ -79,20 +79,9 @@ class ProtobufCatalystDataConversionSuite
         .eval()
     }
 
-    val expected = {
-      val expectedSchema = ProtobufUtils.buildDescriptor(descBytes, badSchema)
-      SchemaConverters.toSqlType(expectedSchema).dataType match {
-        case st: StructType =>
-          Row.fromSeq((0 until st.length).map { _ =>
-            null
-          })
-        case _ => null
-      }
-    }
-
     checkEvaluation(
       ProtobufDataToCatalyst(binary, badSchema, Some(descBytes), Map("mode" -> "PERMISSIVE")),
-      expected)
+      expected = null)
   }
 
   protected def prepareExpectedResult(expected: Any): Any = expected match {

From 28a8b181e96d4ce71e2f9888910214d14a859b7d Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 9 Dec 2023 15:20:55 -0800
Subject: [PATCH 143/521] [SPARK-46339][SS] Directory with batch number name
 should not be treated as metadata log

### What changes were proposed in this pull request?

This patch updates the document of `CheckpointFileManager.list` method to reflect the fact it is used to return both files and directories to reduce confusion.

For the usage like `HDFSMetadataLog` where it assumes returned file status by `list` are all files, we add a filter there to avoid confusing error.

### Why are the changes needed?

`HDFSMetadataLog` takes a metadata path as parameter. When it goes to retrieves all batches metadata, it calls `CheckpointFileManager.list` to get all files under the metadata path. However, currently all implementations of `CheckpointFileManager.list` returns all files/directories under the given path. So if there is a dictionary with name of batch number (a long value), the directory will be returned too and cause trouble when `HDFSMetadataLog` goes to read it.

Actually, `CheckpointFileManager.list` method clearly defines that it lists the "files" in a path. That's being said, current implementations don't follow the doc. We tried to make `list` method implementations only return files but some usage (state metadata) of `list` method already break the assumption and they use dictionaries returned by `list` method. So we simply update `list` method document to explicitly define it returns both files/dictionaries. We add a filter in `HDFSMetadataLog` on the file statuses returned by `list` method to avoid this issue.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44272 from viirya/fix_metadatalog.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 75805f07f5caeb01104a7352b02790d03a043ded)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../execution/streaming/CheckpointFileManager.scala  |  4 ++--
 .../sql/execution/streaming/HDFSMetadataLog.scala    |  2 ++
 .../execution/streaming/HDFSMetadataLogSuite.scala   | 12 ++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
index ad3212871fc94..677e2fccb6b48 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -65,10 +65,10 @@ trait CheckpointFileManager {
   /** Open a file for reading, or throw exception if it does not exist. */
   def open(path: Path): FSDataInputStream
 
-  /** List the files in a path that match a filter. */
+  /** List the files/directories in a path that match a filter. */
   def list(path: Path, filter: PathFilter): Array[FileStatus]
 
-  /** List all the files in a path. */
+  /** List all the files/directories in a path. */
   def list(path: Path): Array[FileStatus] = {
     list(path, (_: Path) => true)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 2b0172bb9555c..9a811db679d01 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -325,6 +325,8 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
   /** List the available batches on file system. */
   protected def listBatches: Array[Long] = {
     val batchIds = fileManager.list(metadataPath, batchFilesFilter)
+      // Batches must be files
+      .filter(f => f.isFile)
       .map(f => pathToBatchId(f.getPath)) ++
       // Iterate over keySet is not thread safe. We call `toArray` to make a copy in the lock to
       // elimiate the race condition.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
index 980d532dd4779..08f245135f589 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLogSuite.scala
@@ -33,6 +33,18 @@ class HDFSMetadataLogSuite extends SharedSparkSession {
 
   private implicit def toOption[A](a: A): Option[A] = Option(a)
 
+  test("SPARK-46339: Directory with number name should not be treated as metadata log") {
+    withTempDir { temp =>
+      val dir = new File(temp, "dir")
+      val metadataLog = new HDFSMetadataLog[String](spark, dir.getAbsolutePath)
+      assert(metadataLog.metadataPath.toString.endsWith("/dir"))
+
+      // Create a directory with batch id 0
+      new File(dir, "0").mkdir()
+      assert(metadataLog.getLatest() === None)
+    }
+  }
+
   test("HDFSMetadataLog: basic") {
     withTempDir { temp =>
       val dir = new File(temp, "dir") // use non-existent directory to test whether log make the dir

From cbaefe9cc6a22c940728b6717aeaa51c7d550ddc Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Sun, 10 Dec 2023 14:03:37 -0800
Subject: [PATCH 144/521] [SPARK-45969][DOCS] Document configuration change of
 executor failure tracker

It's a follow-up of SPARK-41210 (use a new JIRA ticket because it was released in 3.5.0), this PR updates docs/migration guide about configuration change of executor failure tracker

Docs update is missing in previous changes, also is requested https://github.com/apache/spark/commit/40872e9a094f8459b0b6f626937ced48a8d98efb#r132516892 by tgravescs

Yes, docs changed

Review

No

Closes #43863 from pan3793/SPARK-45969.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 7a43de193aa5a0856e098088728dccea37f169c5)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/internal/config/package.scala       |  4 ++--
 docs/configuration.md                         | 21 +++++++++++++++++++
 docs/core-migration-guide.md                  |  6 ++++++
 docs/running-on-yarn.md                       | 17 ---------------
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 600cbf151e17b..c5e23cae1f847 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -924,7 +924,7 @@ package object config {
 
   private[spark] val MAX_EXECUTOR_FAILURES =
     ConfigBuilder("spark.executor.maxNumFailures")
-      .doc("Spark exits if the number of failed executors exceeds this threshold. " +
+      .doc("The maximum number of executor failures before failing the application. " +
         "This configuration only takes effect on YARN, or Kubernetes when " +
         "`spark.kubernetes.allocation.pods.allocator` is set to 'direct'.")
       .version("3.5.0")
@@ -933,7 +933,7 @@ package object config {
 
   private[spark] val EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS =
     ConfigBuilder("spark.executor.failuresValidityInterval")
-      .doc("Interval after which Executor failures will be considered independent and not " +
+      .doc("Interval after which executor failures will be considered independent and not " +
         "accumulate towards the attempt count. This configuration only takes effect on YARN, " +
         "or Kubernetes when `spark.kubernetes.allocation.pods.allocator` is set to 'direct'.")
       .version("3.5.0")
diff --git a/docs/configuration.md b/docs/configuration.md
index f79406c5b6d89..645c3e8208abc 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -514,6 +514,27 @@ of the most common options to set are:
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.executor.maxNumFailures</code></td>
+  <td>numExecutors * 2, with minimum of 3</td>
+  <td>
+    The maximum number of executor failures before failing the application.
+    This configuration only takes effect on YARN, or Kubernetes when 
+    `spark.kubernetes.allocation.pods.allocator` is set to 'direct'.
+  </td>
+  <td>3.5.0</td>
+</tr>
+<tr>
+  <td><code>spark.executor.failuresValidityInterval</code></td>
+  <td>(none)</td>
+  <td>
+    Interval after which executor failures will be considered independent and
+    not accumulate towards the attempt count.
+    This configuration only takes effect on YARN, or Kubernetes when 
+    `spark.kubernetes.allocation.pods.allocator` is set to 'direct'.
+  </td>
+  <td>3.5.0</td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 3f97a484e1a68..36465cc3f4e86 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -22,6 +22,12 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Core 3.4 to 3.5
+
+- Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead.
+
+- Since Spark 3.5, `spark.yarn.max.executor.failures` is deprecated. Use `spark.executor.maxNumFailures` instead.
+
 ## Upgrading from Core 3.3 to 3.4
 
 - Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index d577b70a68039..9b4e59a119eeb 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -291,14 +291,6 @@ To use a custom metrics.properties for the application master and executors, upd
   </td>
   <td>1.4.0</td>
 </tr>
-<tr>
-  <td><code>spark.yarn.max.executor.failures</code></td>
-  <td>numExecutors * 2, with minimum of 3</td>
-  <td>
-    The maximum number of executor failures before failing the application.
-  </td>
-  <td>1.0.0</td>
-</tr>
 <tr>
   <td><code>spark.yarn.historyServer.address</code></td>
   <td>(none)</td>
@@ -499,15 +491,6 @@ To use a custom metrics.properties for the application master and executors, upd
   </td>
   <td>3.3.0</td>
 </tr>
-<tr>
-  <td><code>spark.yarn.executor.failuresValidityInterval</code></td>
-  <td>(none)</td>
-  <td>
-  Defines the validity interval for executor failure tracking.
-  Executor failures which are older than the validity interval will be ignored.
-  </td>
-  <td>2.0.0</td>
-</tr>
 <tr>
   <td><code>spark.yarn.submit.waitAppCompletion</code></td>
   <td><code>true</code></td>

From 9c83bf501ccefa7c6c0ba071f69e2528f3504854 Mon Sep 17 00:00:00 2001
From: Amy Tsai <amytsai@stripe.com>
Date: Mon, 11 Dec 2023 18:35:31 +0300
Subject: [PATCH 145/521] [MINOR][DOCS] Fix documentation for
 `spark.sql.legacy.doLooseUpcast` in SQL migration guide

### What changes were proposed in this pull request?

Fixes an error in the SQL migration guide documentation for `spark.sql.legacy.doLooseUpcast`. I corrected the config name and moved it to the section for migration from Spark 2.4 to 3.0 since it was not made available until Spark 3.0.

### Why are the changes needed?

The config was documented as `spark.sql.legacy.looseUpcast` and is inaccurately included in the Spark 2.4 to Spark 2.4.1 section.

I changed the docs to match what is implemented in https://github.com/apache/spark/blob/20df062d85e80422a55afae80ddbf2060f26516c/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala#L3873

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Docs only change

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44262 from amytsai-stripe/fix-migration-docs-loose-upcast.

Authored-by: Amy Tsai <amytsai@stripe.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit bab884082c0f82e3f9053adac6c7e8a3fcfab11c)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 docs/sql-migration-guide.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 88635ee3d1f44..2eba9500e907e 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -251,6 +251,8 @@ license: |
 
   - In Spark 3.0, the column metadata will always be propagated in the API `Column.name` and `Column.as`. In Spark version 2.4 and earlier, the metadata of `NamedExpression` is set as the `explicitMetadata` for the new column at the time the API is called, it won't change even if the underlying `NamedExpression` changes metadata. To restore the behavior before Spark 3.0, you can use the API `as(alias: String, metadata: Metadata)` with explicit metadata.
 
+  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis. To restore the behavior before Spark 3.0, set `spark.sql.legacy.doLooseUpcast` to `true`.
+
 ### DDL Statements
 
   - In Spark 3.0, when inserting a value into a table column with a different data type, the type coercion is performed as per ANSI SQL standard. Certain unreasonable type conversions such as converting `string` to `int` and `double` to `boolean` are disallowed. A runtime exception is thrown if the value is out-of-range for the data type of the column. In Spark version 2.4 and below, type conversions during table insertion are allowed as long as they are valid `Cast`. When inserting an out-of-range value to an integral field, the low-order bits of the value is inserted(the same as Java/Scala numeric type casting). For example, if 257 is inserted to a field of byte type, the result is 1. The behavior is controlled by the option `spark.sql.storeAssignmentPolicy`, with a default value as "ANSI". Setting the option as "Legacy" restores the previous behavior.
@@ -464,8 +466,6 @@ license: |
     need to specify a value with units like "30s" now, to avoid being interpreted as milliseconds; otherwise,
     the extremely short interval that results will likely cause applications to fail.
 
-  - When turning a Dataset to another Dataset, Spark will up cast the fields in the original Dataset to the type of corresponding fields in the target DataSet. In version 2.4 and earlier, this up cast is not very strict, e.g. `Seq("str").toDS.as[Int]` fails, but `Seq("str").toDS.as[Boolean]` works and throw NPE during execution. In Spark 3.0, the up cast is stricter and turning String into something else is not allowed, i.e. `Seq("str").toDS.as[Boolean]` will fail during analysis. To restore the behavior before 2.4.1, set `spark.sql.legacy.looseUpcast` to `true`.
-
 ## Upgrading from Spark SQL 2.3 to 2.4
 
   - In Spark version 2.3 and earlier, the second parameter to array_contains function is implicitly promoted to the element type of first array type parameter. This type promotion can be lossy and may cause `array_contains` function to return wrong result. This problem has been addressed in 2.4 by employing a safer type promotion mechanism. This can cause some change in behavior and are illustrated in the table below.

From ac031d68a01f14cc73f05e83a790a6787aa6453d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 11 Dec 2023 15:05:21 -0800
Subject: [PATCH 146/521] [SPARK-46369][CORE] Remove `kill` link from
 `RELAUNCHING` drivers in `MasterPage`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This PR aims to remove `kill` hyperlink from `RELAUNCHING` drivers in `MasterPage`.

### Why are the changes needed?

Since Apache Spark 1.4.0 (SPARK-5495), `RELAUNCHING` drivers have `kill` hyperlinks in the `Completed Drivers` table.

![Screenshot 2023-12-11 at 1 02 29 PM](https://github.com/apache/spark/assets/9700541/38f4bf08-efb9-47e5-8a7a-f7d127429012)

However, this is a bug because the driver was already terminated by definition. Newly relaunched driver has an independent ID and there is no relationship with the previously terminated ID.

https://github.com/apache/spark/blob/7db85642600b1e3b39ca11e41d4e3e0bf1c8962b/core/src/main/scala/org/apache/spark/deploy/master/DriverState.scala#L27

If we clicked the `kill` link, `Master` always complains like the following.
```
23/12/11 21:25:50 INFO Master: Asked to kill driver 202312112113-00000
23/12/11 21:25:50 WARN Master: Driver 202312112113-00000 has already finished or does not exist
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44301 from dongjoon-hyun/SPARK-46369.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit e434c9f0d5792b7af43c87dd6145fd8a6a04d8e2)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/deploy/master/ui/MasterPage.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index a71eb33a2fe1d..e7e90aa0a37da 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -322,8 +322,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private def driverRow(driver: DriverInfo, showDuration: Boolean): Seq[Node] = {
     val killLink = if (parent.killEnabled &&
       (driver.state == DriverState.RUNNING ||
-        driver.state == DriverState.SUBMITTED ||
-        driver.state == DriverState.RELAUNCHING)) {
+        driver.state == DriverState.SUBMITTED)) {
       val confirm =
         s"if (window.confirm('Are you sure you want to kill driver ${driver.id} ?')) " +
           "{ this.parentNode.submit(); return true; } else { return false; }"

From eb1e6ad13aab3960f1543b75bf3b75b3a7d62746 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 13 Dec 2023 18:04:38 +0800
Subject: [PATCH 147/521] [SPARK-46388][SQL] HiveAnalysis misses the pattern
 guard of `query.resolved`

### What changes were proposed in this pull request?

This PR adds `query.resolved` as a pattern guard when HiveAnalysis converts InsertIntoStatement to InsertIntoHiveTable.

### Why are the changes needed?

Due to https://github.com/apache/spark/pull/41262/files#diff-ed19f376a63eba52eea59ca71f3355d4495fad4fad4db9a3324aade0d4986a47R1080, the `table` field is resolved regardless of the query field. Before, it never got a chance to be resolved as `HiveTableRelation` and then match any rule of HiveAnalysis. But now, it gets the chance always and results in a spark-kernel bug - `Invalid call to toAttribute on unresolved object.`

```
insert into t2 select cast(a as short) from t where b=1;
Invalid call to toAttribute on unresolved object
```

### Does this PR introduce _any_ user-facing change?

no, bugfix for 3.5 and later

### How was this patch tested?

added new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44326 from yaooqinn/SPARK-46388.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit ccc436d829cd0b07088e2864cb1ecc55ab97a491)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/hive/HiveStrategies.scala       |  2 +-
 .../sql/hive/execution/SQLQuerySuite.scala    | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index 3da3d4a0eb5c8..c53a6c378d457 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -161,7 +161,7 @@ object HiveAnalysis extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case InsertIntoStatement(
         r: HiveTableRelation, partSpec, _, query, overwrite, ifPartitionNotExists, _)
-        if DDLUtils.isHiveTable(r.tableMeta) =>
+        if DDLUtils.isHiveTable(r.tableMeta) && query.resolved =>
       InsertIntoHiveTable(r.tableMeta, partSpec, query, overwrite,
         ifPartitionNotExists, query.output.map(_.name))
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 9308d1eda146f..6160c3e5f6c65 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -2660,6 +2660,32 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       checkAnswer(df, Seq.empty[Row])
     }
   }
+
+  test("SPARK-46388: HiveAnalysis convert InsertIntoStatement to InsertIntoHiveTable " +
+    "iff child resolved") {
+    withTable("t") {
+      sql("CREATE TABLE t (a STRING)")
+      checkError(
+        exception = intercept[AnalysisException](sql("INSERT INTO t SELECT a*2 FROM t where b=1")),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map("objectName" -> "`b`", "proposal" -> "`a`"),
+        context = ExpectedContext(
+          fragment = "b",
+          start = 38,
+          stop = 38) )
+      checkError(
+        exception = intercept[AnalysisException](
+          sql("INSERT INTO t SELECT cast(a as short) FROM t where b=1")),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map("objectName" -> "`b`", "proposal" -> "`a`"),
+        context = ExpectedContext(
+          fragment = "b",
+          start = 51,
+          stop = 51))
+    }
+  }
 }
 
 @SlowHiveTest

From 908c472728f24034baf0b59f03b04ca148eabeca Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 14 Dec 2023 00:06:22 -0800
Subject: [PATCH 148/521] [SPARK-46396][SQL] Timestamp inference should not
 throw exception

### What changes were proposed in this pull request?

When setting `spark.sql.legacy.timeParserPolicy=LEGACY`, Spark will use the LegacyFastTimestampFormatter to infer potential timestamp columns. The inference shouldn't throw exception.

However, when the input is 23012150952, there is exception:

```

For input string: "23012150952"

java.lang.NumberFormatException: For input string: "23012150952"

at java.base/java.lang.NumberFormatException.forInputString(NumberFormatException.java:67)

at java.base/java.lang.Integer.parseInt(Integer.java:668)

at java.base/java.lang.Integer.parseInt(Integer.java:786)

at org.apache.commons.lang3.time.FastDateParser$NumberStrategy.parse(FastDateParser.java:304)

at org.apache.commons.lang3.time.FastDateParser.parse(FastDateParser.java:1045)

at org.apache.commons.lang3.time.FastDateFormat.parse(FastDateFormat.java:651)

at org.apache.spark.sql.catalyst.util.LegacyFastTimestampFormatter.parseOptional(TimestampFormatter.scala:418)

```

This PR is to fix the issue.

### Why are the changes needed?

Bug fix, Timestamp inference should not throw exception
### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

New test case + existing tests

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44338 from gengliangwang/fixParseOptional.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 4a79ae9d821e9b04fbe949251050c3e4819dff92)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/catalyst/util/TimestampFormatter.scala | 12 ++++++++----
 .../sql/catalyst/util/TimestampFormatterSuite.scala  |  3 ++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 55eee41c14ca5..0866cee9334c5 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -414,10 +414,14 @@ class LegacyFastTimestampFormatter(
 
   override def parseOptional(s: String): Option[Long] = {
     cal.clear() // Clear the calendar because it can be re-used many times
-    if (fastDateFormat.parse(s, new ParsePosition(0), cal)) {
-      Some(extractMicros(cal))
-    } else {
-      None
+    try {
+      if (fastDateFormat.parse(s, new ParsePosition(0), cal)) {
+        Some(extractMicros(cal))
+      } else {
+        None
+      }
+    } catch {
+      case NonFatal(_) => None
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 2134a0d6ecd36..27d60815766dc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -502,10 +502,11 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
 
     assert(fastFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067199999000L))
     assert(fastFormatter.parseOptional("abc").isEmpty)
+    assert(fastFormatter.parseOptional("23012150952").isEmpty)
 
     assert(simpleFormatter.parseOptional("2023-12-31 23:59:59.9990").contains(1704067208990000L))
     assert(simpleFormatter.parseOptional("abc").isEmpty)
-
+    assert(simpleFormatter.parseOptional("23012150952").isEmpty)
   }
 
   test("SPARK-45424: do not return optional parse results when only prefix match") {

From 8abf9583ac2303765255299af3e843d8248f313f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 15 Dec 2023 18:55:10 +0800
Subject: [PATCH 149/521] [SPARK-46417][SQL] Do not fail when calling
 hive.getTable and throwException is false

### What changes were proposed in this pull request?

Uses can set up their own HMS and let Spark connects to it. We have no control over it and somtimes it's not even Hive but just a HMS-API-compatible service.

Spark should be more fault-tolerant when calling HMS APIs. This PR fixes an issue in `hive.getTable` with `throwException = false`, to make sure we don't throw error when can't fetch the table.

### Why are the changes needed?

avoid query failure caused by HMS bugs.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

in our product environment

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44364 from cloud-fan/hive.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 59488039f58b18617cd6dfd6dbe3bf014af222e7)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../scala/org/apache/spark/sql/hive/client/HiveShim.scala | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 60ff9ec42f29d..7025e09ae9d9e 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -620,7 +620,13 @@ private[client] class Shim_v0_12 extends Shim with Logging {
       tableName: String,
       throwException: Boolean): Table = {
     recordHiveCall()
-    val table = hive.getTable(dbName, tableName, throwException)
+    val table = try {
+      hive.getTable(dbName, tableName, throwException)
+    } catch {
+      // Hive may have bugs and still throw an exception even if `throwException` is false.
+      case e: HiveException if !throwException =>
+        null
+    }
     if (table != null) {
       table.getTTable.setTableName(tableName)
       table.getTTable.setDbName(dbName)

From cc4f5787414e4392499a349dec5b24c8e25e50f3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Tue, 19 Dec 2023 12:21:20 +0300
Subject: [PATCH 150/521] [SPARK-46453][CONNECT] Throw exception from
 `internalError()` in `SessionHolder`

### What changes were proposed in this pull request?
In the PR, I propose to throw `SparkException` returned by `internalError` in `SessionHolder`.

### Why are the changes needed?
Without the bug fix user won't see the internal error.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
N/a

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44400 from MaxGekk/throw-internal-error.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit dc0bfc4c700c347f2f58625facec8c5771bde59a)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../org/apache/spark/sql/connect/service/SessionHolder.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
index 1cef02d7e3466..218819d114c12 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
@@ -197,7 +197,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[connect] def cacheDataFrameById(dfId: String, df: DataFrame): Unit = {
     if (dataFrameCache.putIfAbsent(dfId, df) != null) {
-      SparkException.internalError(s"A dataframe is already associated with id $dfId")
+      throw SparkException.internalError(s"A dataframe is already associated with id $dfId")
     }
   }
 
@@ -221,7 +221,7 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[connect] def cacheListenerById(id: String, listener: StreamingQueryListener): Unit = {
     if (listenerCache.putIfAbsent(id, listener) != null) {
-      SparkException.internalError(s"A listener is already associated with id $id")
+      throw SparkException.internalError(s"A listener is already associated with id $id")
     }
   }
 

From 0c00c54583fe3e56f940425aac6e0e4f05c4b9db Mon Sep 17 00:00:00 2001
From: zhouyifan279 <zhouyifan279@gmail.com>
Date: Wed, 20 Dec 2023 16:50:38 +0800
Subject: [PATCH 151/521] [SPARK-46330] Loading of Spark UI blocks for a long
 time when HybridStore enabled

### What changes were proposed in this pull request?
Move `LoadedAppUI` invalidate operation out of `FsHistoryProvider` synchronized block.

### Why are the changes needed?
When closing a HybridStore of a `LoadedAppUI` with a lot of data waiting to be written to disk, loading of other Spark UIs will be blocked for a long time.

See more details at https://issues.apache.org/jira/browse/SPARK-46330

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Passed existing tests.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44260 from zhouyifan279/SPARK-46330.

Authored-by: zhouyifan279 <zhouyifan279@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit cf54e8f9a51bf54e8fa3e1011ac370e46134b134)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/deploy/history/FsHistoryProvider.scala      | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 49b479f3124e9..387bc7d9e45b3 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -925,11 +925,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    * UI lifecycle.
    */
   private def invalidateUI(appId: String, attemptId: Option[String]): Unit = {
-    synchronized {
-      activeUIs.get((appId, attemptId)).foreach { ui =>
-        ui.invalidate()
-        ui.ui.store.close()
-      }
+    val uiOption = synchronized {
+      activeUIs.get((appId, attemptId))
+    }
+    uiOption.foreach { ui =>
+      ui.invalidate()
+      ui.ui.store.close()
     }
   }
 

From d7534a3ec1eab53bbd349f9ae31684337c734958 Mon Sep 17 00:00:00 2001
From: Aleksandar Tomic <aleksandar.tomic@databricks.com>
Date: Thu, 21 Dec 2023 15:58:15 +0800
Subject: [PATCH 152/521] [SPARK-46380][SQL] Replace current time/date prior to
 evaluating inline table expressions

With this PR proposal is to do inline table resolution in two phases:
1) If there are no expressions that depend on current context (e.g. expressions that depend on CURRENT_DATABASE, CURRENT_USER, CURRENT_TIME etc.) they will be evaluated as part of ResolveInlineTable rule.
2) Expressions that do depend on CURRENT_* evaluation will be kept as expressions and they evaluation will be delayed to post analysis phase.

This PR aims to solve two problems with inline tables.

Example1:
```sql
SELECT COUNT(DISTINCT ct) FROM VALUES
(CURRENT_TIMESTAMP()),
(CURRENT_TIMESTAMP()),
(CURRENT_TIMESTAMP()) as data(ct)
```
Prior to this change this example would return 3 (i.e. all CURRENT_TIMESTAMP expressions would return different value since they would be evaluated individually as part of inline table evaluation). After this change result is 1.

Example 2:
```sql
CREATE VIEW V as (SELECT * FROM VALUES(CURRENT_TIMESTAMP())
```
In this example VIEW would be saved with literal evaluated during VIEW creation. After this change CURRENT_TIMESTAMP() will eval during VIEW execution.

See section above.

New test that validates this behaviour is introduced.

No.

Closes #44316 from dbatomic/inline_tables_curr_time_fix.

Lead-authored-by: Aleksandar Tomic <aleksandar.tomic@databricks.com>
Co-authored-by: Aleksandar Tomic <150942779+dbatomic@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 5fe963f8560ef05925d127e82ab7ef28d6a1d7bc)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolveInlineTables.scala        | 68 ++++++++++---------
 .../sql/catalyst/analysis/unresolved.scala    | 15 ++++
 .../sql/catalyst/optimizer/Optimizer.scala    |  4 +-
 .../catalyst/optimizer/finishAnalysis.scala   | 33 +++++++++
 .../sql/catalyst/rules/RuleIdCollection.scala |  1 +
 .../sql/catalyst/trees/TreePatterns.scala     |  1 +
 .../analysis/ResolveInlineTablesSuite.scala   | 31 +++++++--
 .../analyzer-results/inline-table.sql.out     | 16 ++++-
 .../postgreSQL/create_view.sql.out            |  2 +-
 .../sql-tests/inputs/inline-table.sql         |  6 ++
 .../sql-tests/results/inline-table.sql.out    | 16 +++++
 .../spark/sql/execution/SQLViewSuite.scala    | 14 ++++
 12 files changed, 165 insertions(+), 42 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 760ea466b8579..73600f5c70649 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -17,28 +17,29 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import scala.util.control.NonFatal
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
+import org.apache.spark.sql.catalyst.optimizer.EvalInlineTables
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
+import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLExpr, toSQLId}
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
- * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[LocalRelation]].
+ * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[ResolvedInlineTable]].
  */
 object ResolveInlineTables extends Rule[LogicalPlan]
   with CastSupport with AliasHelper with EvalHelper {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
-    AlwaysProcess.fn, ruleId) {
-    case table: UnresolvedInlineTable if table.expressionsResolved =>
-      validateInputDimension(table)
-      validateInputEvaluable(table)
-      convert(table)
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveOperatorsWithPruning(AlwaysProcess.fn, ruleId) {
+      case table: UnresolvedInlineTable if table.expressionsResolved =>
+        validateInputDimension(table)
+        validateInputEvaluable(table)
+        val resolvedTable = findCommonTypesAndCast(table)
+        earlyEvalIfPossible(resolvedTable)
+    }
   }
 
   /**
@@ -74,7 +75,10 @@ object ResolveInlineTables extends Rule[LogicalPlan]
     table.rows.foreach { row =>
       row.foreach { e =>
         // Note that nondeterministic expressions are not supported since they are not foldable.
-        if (!e.resolved || !trimAliases(prepareForEval(e)).foldable) {
+        // Only exception are CURRENT_LIKE expressions, which are replaced by a literal
+        // In later stages.
+        if ((!e.resolved && !e.containsPattern(CURRENT_LIKE))
+          || !trimAliases(prepareForEval(e)).foldable) {
           e.failAnalysis(
             errorClass = "INVALID_INLINE_TABLE.CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE",
             messageParameters = Map("expr" -> toSQLExpr(e)))
@@ -84,14 +88,12 @@ object ResolveInlineTables extends Rule[LogicalPlan]
   }
 
   /**
-   * Convert a valid (with right shape and foldable inputs) [[UnresolvedInlineTable]]
-   * into a [[LocalRelation]].
-   *
    * This function attempts to coerce inputs into consistent types.
    *
    * This is package visible for unit testing.
    */
-  private[analysis] def convert(table: UnresolvedInlineTable): LocalRelation = {
+  private[analysis] def findCommonTypesAndCast(table: UnresolvedInlineTable):
+    ResolvedInlineTable = {
     // For each column, traverse all the values and find a common data type and nullability.
     val fields = table.rows.transpose.zip(table.names).map { case (column, name) =>
       val inputTypes = column.map(_.dataType)
@@ -105,26 +107,30 @@ object ResolveInlineTables extends Rule[LogicalPlan]
     val attributes = DataTypeUtils.toAttributes(StructType(fields))
     assert(fields.size == table.names.size)
 
-    val newRows: Seq[InternalRow] = table.rows.map { row =>
-      InternalRow.fromSeq(row.zipWithIndex.map { case (e, ci) =>
-        val targetType = fields(ci).dataType
-        try {
+    val castedRows: Seq[Seq[Expression]] = table.rows.map { row =>
+      row.zipWithIndex.map {
+        case (e, ci) =>
+          val targetType = fields(ci).dataType
           val castedExpr = if (DataTypeUtils.sameType(e.dataType, targetType)) {
             e
           } else {
             cast(e, targetType)
           }
-          prepareForEval(castedExpr).eval()
-        } catch {
-          case NonFatal(ex) =>
-            table.failAnalysis(
-              errorClass = "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
-              messageParameters = Map("sqlExpr" -> toSQLExpr(e)),
-              cause = ex)
-        }
-      })
+          castedExpr
+      }
     }
 
-    LocalRelation(attributes, newRows)
+    ResolvedInlineTable(castedRows, attributes)
+  }
+
+  /**
+   * This function attempts to early evaluate rows in inline table.
+   * If evaluation doesn't rely on non-deterministic expressions (e.g. current_like)
+   * expressions will be evaluated and inlined as [[LocalRelation]]
+   * This is package visible for unit testing.
+   */
+  private[analysis] def earlyEvalIfPossible(table: ResolvedInlineTable): LogicalPlan = {
+      val earlyEvalPossible = table.rows.flatten.forall(!_.containsPattern(CURRENT_LIKE))
+      if (earlyEvalPossible) EvalInlineTables(table) else table
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index b1dcb465b4778..07ad5e57306a5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -126,6 +126,21 @@ case class UnresolvedInlineTable(
   lazy val expressionsResolved: Boolean = rows.forall(_.forall(_.resolved))
 }
 
+/**
+ * An resolved inline table that holds all the expressions that were checked for
+ * the right shape and common data types.
+ * This is a preparation step for [[org.apache.spark.sql.catalyst.optimizer.EvalInlineTables]] which
+ * will produce a [[org.apache.spark.sql.catalyst.plans.logical.LocalRelation]]
+ * for this inline table.
+ *
+ * @param output list of column attributes
+ * @param rows expressions for the data rows
+ */
+case class ResolvedInlineTable(rows: Seq[Seq[Expression]], output: Seq[Attribute])
+  extends LeafNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(INLINE_TABLE_EVAL)
+}
+
 /**
  * A table-valued function, e.g.
  * {{{
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index bb2a86556c031..ec5f00d34cd8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -287,7 +287,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
       ComputeCurrentTime,
       ReplaceCurrentLike(catalogManager),
       SpecialDatetimeValues,
-      RewriteAsOfJoin)
+      RewriteAsOfJoin,
+      EvalInlineTables
+    )
 
     override def apply(plan: LogicalPlan): LogicalPlan = {
       rules.foldLeft(plan) { case (sp, rule) => rule.apply(sp) }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 466781fa1def7..d7efc16a514bd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -19,7 +19,12 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import java.time.{Instant, LocalDateTime}
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{CastSupport, ResolvedInlineTable}
+import org.apache.spark.sql.catalyst.analysis.ResolveInlineTables.prepareForEval
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -27,6 +32,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.trees.TreePatternBits
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, localDateTimeToMicros}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -71,6 +77,33 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * Computes expressions in inline tables. This rule is supposed to be called at the very end
+ * of the analysis phase, given that all the expressions need to be fully resolved/replaced
+ * at this point.
+ */
+object EvalInlineTables extends Rule[LogicalPlan] with CastSupport {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transformDownWithSubqueriesAndPruning(_.containsPattern(INLINE_TABLE_EVAL)) {
+      case table: ResolvedInlineTable =>
+        val newRows: Seq[InternalRow] =
+          table.rows.map { row => InternalRow.fromSeq(row.map { e =>
+              try {
+                prepareForEval(e).eval()
+              } catch {
+                case NonFatal(ex) =>
+                  table.failAnalysis(
+                    errorClass = "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
+                    messageParameters = Map("sqlExpr" -> toSQLExpr(e)),
+                    cause = ex)
+              }})
+          }
+
+        LocalRelation(table.output, newRows)
+    }
+  }
+}
+
 /**
  * Computes the current date and time to make sure we return the same result in a single query.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index caf679f3e7a7a..96f78d251c39a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -166,6 +166,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.SimplifyConditionals" ::
       "org.apache.spark.sql.catalyst.optimizer.SimplifyExtractValueOps" ::
       "org.apache.spark.sql.catalyst.optimizer.TransposeWindow" ::
+      "org.apache.spark.sql.catalyst.optimizer.EvalInlineTables" ::
       "org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison" ::  Nil
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index bf7b2db1719f5..ce8f5951839e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -53,6 +53,7 @@ object TreePattern extends Enumeration  {
   val IF: Value = Value
   val IN: Value = Value
   val IN_SUBQUERY: Value = Value
+  val INLINE_TABLE_EVAL: Value = Value
   val INSET: Value = Value
   val INTERSECT: Value = Value
   val INVOKE: Value = Value
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
index 2e6c6e4eaf4c3..758b6b73e4eb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
@@ -20,8 +20,9 @@ package org.apache.spark.sql.catalyst.analysis
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Literal, Rand}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, CurrentTimestamp, Literal, Rand}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
+import org.apache.spark.sql.catalyst.optimizer.{ComputeCurrentTime, EvalInlineTables}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types.{LongType, NullType, TimestampType}
 
@@ -83,9 +84,10 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
     assert(ResolveInlineTables(table) == table)
   }
 
-  test("convert") {
+  test("cast and execute") {
     val table = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
-    val converted = ResolveInlineTables.convert(table)
+    val resolved = ResolveInlineTables.findCommonTypesAndCast(table)
+    val converted = ResolveInlineTables.earlyEvalIfPossible(resolved).asInstanceOf[LocalRelation]
 
     assert(converted.output.map(_.dataType) == Seq(LongType))
     assert(converted.data.size == 2)
@@ -93,11 +95,28 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
     assert(converted.data(1).getLong(0) == 2L)
   }
 
+  test("cast and execute CURRENT_LIKE expressions") {
+    val table = UnresolvedInlineTable(Seq("c1"), Seq(
+      Seq(CurrentTimestamp()), Seq(CurrentTimestamp())))
+    val casted = ResolveInlineTables.findCommonTypesAndCast(table)
+    val earlyEval = ResolveInlineTables.earlyEvalIfPossible(casted)
+    // Early eval should keep it in expression form.
+    assert(earlyEval.isInstanceOf[ResolvedInlineTable])
+
+    EvalInlineTables(ComputeCurrentTime(earlyEval)) match {
+      case LocalRelation(output, data, _) =>
+        assert(output.map(_.dataType) == Seq(TimestampType))
+        assert(data.size == 2)
+        // Make sure that both CURRENT_TIMESTAMP expressions are evaluated to the same value.
+        assert(data(0).getLong(0) == data(1).getLong(0))
+    }
+  }
+
   test("convert TimeZoneAwareExpression") {
     val table = UnresolvedInlineTable(Seq("c1"),
       Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType))))
     val withTimeZone = ResolveTimeZone.apply(table)
-    val LocalRelation(output, data, _) = ResolveInlineTables.apply(withTimeZone)
+    val LocalRelation(output, data, _) = EvalInlineTables(ResolveInlineTables.apply(withTimeZone))
     val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType)
       .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long]
     assert(output.map(_.dataType) == Seq(TimestampType))
@@ -107,11 +126,11 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
 
   test("nullability inference in convert") {
     val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
-    val converted1 = ResolveInlineTables.convert(table1)
+    val converted1 = ResolveInlineTables.findCommonTypesAndCast(table1)
     assert(!converted1.schema.fields(0).nullable)
 
     val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType))))
-    val converted2 = ResolveInlineTables.convert(table2)
+    val converted2 = ResolveInlineTables.findCommonTypesAndCast(table2)
     assert(converted2.schema.fields(0).nullable)
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
index 2a17f092a06b7..adce16bf23578 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
@@ -73,9 +73,7 @@ Project [a#x, b#x]
 -- !query
 select a from values ("one", current_timestamp) as data(a, b)
 -- !query analysis
-Project [a#x]
-+- SubqueryAlias data
-   +- LocalRelation [a#x, b#x]
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -241,3 +239,15 @@ select * from values (10 + try_divide(5, 0))
 -- !query analysis
 Project [col1#x]
 +- LocalRelation [col1#x]
+
+
+-- !query
+select count(distinct ct) from values now(), now(), now() as data(ct)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+select count(distinct ct) from values current_timestamp(), current_timestamp() as data(ct)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
index b199cb55f2a44..7f477c80d46ca 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/create_view.sql.out
@@ -1661,7 +1661,7 @@ select * from tt7a left join tt8a using (x), tt8a tt8ax, false, false, Persisted
       :- Project [a#x, b#x, c#x, d#x, e#x]
       :  +- SubqueryAlias v
       :     +- Project [col1#x AS a#x, col2#x AS b#x, col3#x AS c#x, col4#x AS d#x, col5#x AS e#x]
-      :        +- LocalRelation [col1#x, col2#x, col3#x, col4#x, col5#x]
+      :        +- ResolvedInlineTable [[now(), 2, 3, now(), 5]], [col1#x, col2#x, col3#x, col4#x, col5#x]
       +- Project [cast(x#x as timestamp) AS x#x, y#x, z#x, x#x, z#x]
          +- Project [x#x, y#x, z#x, x#x, z#x]
             +- Join Inner
diff --git a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
index 6867248f5765d..8f65dc77c960a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/inline-table.sql
@@ -60,3 +60,9 @@ select * from values (timestamp('1991-12-06 00:00:00.0'), array(timestamp('1991-
 select * from values (try_add(5, 0));
 select * from values (try_divide(5, 0));
 select * from values (10 + try_divide(5, 0));
+
+-- now() should be kept as tempResolved inline expression.
+select count(distinct ct) from values now(), now(), now() as data(ct);
+
+-- current_timestamp() should be kept as tempResolved inline expression.
+select count(distinct ct) from values current_timestamp(), current_timestamp() as data(ct);
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index 709d7ab73f6c4..b6c90b95c1d34 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -266,3 +266,19 @@ select * from values (10 + try_divide(5, 0))
 struct<col1:double>
 -- !query output
 NULL
+
+
+-- !query
+select count(distinct ct) from values now(), now(), now() as data(ct)
+-- !query schema
+struct<count(DISTINCT ct):bigint>
+-- !query output
+1
+
+
+-- !query
+select count(distinct ct) from values current_timestamp(), current_timestamp() as data(ct)
+-- !query schema
+struct<count(DISTINCT ct):bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index e258d600a2aa8..a1147c16cc861 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -1216,4 +1216,18 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("Inline table with current time expression") {
+    withView("v1") {
+      sql("CREATE VIEW v1 (t1, t2) AS SELECT * FROM VALUES (now(), now())")
+      val r1 = sql("select t1, t2 from v1").collect()(0)
+      val ts1 = (r1.getTimestamp(0), r1.getTimestamp(1))
+      assert(ts1._1 == ts1._2)
+      Thread.sleep(1)
+      val r2 = sql("select t1, t2 from v1").collect()(0)
+      val ts2 = (r2.getTimestamp(0), r2.getTimestamp(1))
+      assert(ts2._1 == ts2._2)
+      assert(ts1._1.getTime < ts2._1.getTime)
+    }
+  }
 }

From 286c469ad1305f91ea796fd453ae896617fb3883 Mon Sep 17 00:00:00 2001
From: Jiaan Geng <beliefer@163.com>
Date: Fri, 22 Dec 2023 09:55:00 +0800
Subject: [PATCH 153/521] [SPARK-46443][SQL] Decimal precision and scale should
 decided by H2 dialect

### What changes were proposed in this pull request?
This PR fix a but by make JDBC dialect decide the decimal precision and scale.

**How to reproduce the bug?**
https://github.com/apache/spark/pull/44397 proposed DS V2 push down `PERCENTILE_CONT` and `PERCENTILE_DISC`.
The bug fired when pushdown the below SQL to H2 JDBC.
`SELECT "DEPT",PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY "SALARY" ASC NULLS FIRST) FROM "test"."employee" WHERE 1=0 GROUP BY "DEPT"`

**The root cause**
`getQueryOutputSchema` used to get the output schema of query by call `JdbcUtils.getSchema`.
The query for database H2 show below.
`SELECT "DEPT",PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY "SALARY" ASC NULLS FIRST) FROM "test"."employee" WHERE 1=0 GROUP BY "DEPT"`
We can get the five variables from `ResultSetMetaData`, please refer:
```
columnName = "PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY SALARY NULLS FIRST)"
dataType = 2
typeName = "NUMERIC"
fieldSize = 100000
fieldScale = 50000
```
Then we get the catalyst schema with `JdbcUtils.getCatalystType`, it calls `DecimalType.bounded(precision, scale)` actually.
The `DecimalType.bounded(100000, 50000)` returns `DecimalType(38, 38)`.
At finally, `makeGetter` throws exception.
```
Caused by: org.apache.spark.SparkArithmeticException: [DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION] Decimal precision 42 exceeds max precision 38. SQLSTATE: 22003
	at org.apache.spark.sql.errors.DataTypeErrors$.decimalPrecisionExceedsMaxPrecisionError(DataTypeErrors.scala:48)
	at org.apache.spark.sql.types.Decimal.set(Decimal.scala:124)
	at org.apache.spark.sql.types.Decimal$.apply(Decimal.scala:577)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$makeGetter$4(JdbcUtils.scala:408)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.nullSafeConvert(JdbcUtils.scala:552)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$makeGetter$3(JdbcUtils.scala:408)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$.$anonfun$makeGetter$3$adapted(JdbcUtils.scala:406)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:358)
	at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anon$1.getNext(JdbcUtils.scala:339)
```

### Why are the changes needed?
This PR fix the bug that `JdbcUtils` can't get the correct decimal type.

### Does this PR introduce _any_ user-facing change?
'Yes'.
Fix a bug.

### How was this patch tested?
Manual tests in https://github.com/apache/spark/pull/44397

### Was this patch authored or co-authored using generative AI tooling?
'No'.

Closes #44398 from beliefer/SPARK-46443.

Authored-by: Jiaan Geng <beliefer@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit a921da8509a19b2d23c30ad657725f760932236c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/jdbc/H2Dialect.scala    | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 8471a49153ff4..3f56eb035f5c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
-import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, ShortType, StringType}
+import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, MetadataBuilder, ShortType, StringType}
 
 private[sql] object H2Dialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
@@ -57,6 +57,20 @@ private[sql] object H2Dialect extends JdbcDialect {
   override def isSupportedFunction(funcName: String): Boolean =
     supportedFunctions.contains(funcName)
 
+  override def getCatalystType(
+      sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
+    sqlType match {
+      case Types.NUMERIC if size > 38 =>
+        // H2 supports very large decimal precision like 100000. The max precision in Spark is only
+        // 38. Here we shrink both the precision and scale of H2 decimal to fit Spark, and still
+        // keep the ratio between them.
+        val scale = if (null != md) md.build().getLong("scale") else 0L
+        val selectedScale = (DecimalType.MAX_PRECISION * (scale.toDouble / size.toDouble)).toInt
+        Option(DecimalType(DecimalType.MAX_PRECISION, selectedScale))
+      case _ => None
+    }
+  }
+
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Option(JdbcType("CLOB", Types.CLOB))
     case BooleanType => Some(JdbcType("BOOLEAN", Types.BOOLEAN))

From 98042e34796ec8d83071256142f8e121f50ad1f4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 22 Dec 2023 11:45:10 +0800
Subject: [PATCH 154/521] [SPARK-46464][DOC] Fix the scroll issue of tables
 when overflow

### What changes were proposed in this pull request?

https://spark.apache.org/docs/3.4.1/running-on-kubernetes.html#spark-properties
https://spark.apache.org/docs/latest/running-on-kubernetes.html#spark-properties

As listed above, the doc content in 3.5.0 cannot scroll horizontally. Users can only see the rest of its content when a table overflows if they zoom out as much as possible, resulting in hard-to-read minor characters.

This PR changes the HTML body overflow-x from hidden to auto to enable the underlying table to scroll horizontally.

### Why are the changes needed?

Fix documentation

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

#### Before
![image](https://github.com/apache/spark/assets/8326978/437bee91-ab0d-4616-aaaf-f99171dcf9f9)

#### After
![image](https://github.com/apache/spark/assets/8326978/327ed82b-3e14-4a27-be1a-835a7b21c000)

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44423 from yaooqinn/SPARK-46464.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit fc7d7bce7732a2bccb3a7ccf3ed6bed4ac65f8fc)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/css/custom.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/css/custom.css b/docs/css/custom.css
index e7416d9ded618..1239c0ed440ef 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -7,7 +7,7 @@ body {
   font-style: normal;
   font-weight: 400;
   overflow-wrap: anywhere;
-  overflow-x: hidden;
+  overflow-x: auto;
   padding-top: 80px;
 }
 

From a001482b43d24b4761049687b87bceba0e21c8fd Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Fri, 22 Dec 2023 17:28:59 +0800
Subject: [PATCH 155/521] [SPARK-46480][CORE][SQL][3.5] Fix NPE when table
 cache task attempt

This pr backports https://github.com/apache/spark/pull/44445 for branch-3.5

### What changes were proposed in this pull request?

This pr adds a check: we only mark the cached partition is materialized if the task is not failed and not interrupted. And adds a new method `isFailed` in `TaskContext`.

### Why are the changes needed?

Before this pr, when do cache, task failure can cause NPE in other tasks

```
java.lang.NullPointerException
	at java.nio.ByteBuffer.wrap(ByteBuffer.java:396)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificColumnarIterator.accessors1$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificColumnarIterator.hasNext(Unknown Source)
	at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
	at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:155)
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
	at org.apache.spark.scheduler.Task.run(Task.scala:131)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:497)
```

### Does this PR introduce _any_ user-facing change?

yes, it's a bug fix

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44457 from ulysses-you/fix-cache-3.5.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../scala/org/apache/spark/BarrierTaskContext.scala    |  2 ++
 core/src/main/scala/org/apache/spark/TaskContext.scala |  5 +++++
 .../main/scala/org/apache/spark/TaskContextImpl.scala  |  2 ++
 .../org/apache/spark/scheduler/TaskContextSuite.scala  | 10 ++++++++++
 project/MimaExcludes.scala                             |  4 +++-
 .../sql/execution/columnar/InMemoryRelation.scala      |  8 +++++---
 6 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index ecc0c891ea161..94ba3fe64a859 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -193,6 +193,8 @@ class BarrierTaskContext private[spark] (
 
   override def isCompleted(): Boolean = taskContext.isCompleted()
 
+  override def isFailed(): Boolean = taskContext.isFailed()
+
   override def isInterrupted(): Boolean = taskContext.isInterrupted()
 
   override def addTaskCompletionListener(listener: TaskCompletionListener): this.type = {
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index 450c00928c9e6..af7aa4979dc1c 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -94,6 +94,11 @@ abstract class TaskContext extends Serializable {
    */
   def isCompleted(): Boolean
 
+  /**
+   * Returns true if the task has failed.
+   */
+  def isFailed(): Boolean
+
   /**
    * Returns true if the task has been killed.
    */
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index 526627c28607d..46273a1b6d687 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -275,6 +275,8 @@ private[spark] class TaskContextImpl(
   @GuardedBy("this")
   override def isCompleted(): Boolean = synchronized(completed)
 
+  override def isFailed(): Boolean = synchronized(failureCauseOpt.isDefined)
+
   override def isInterrupted(): Boolean = reasonIfKilled.isDefined
 
   override def getLocalProperty(key: String): String = localProperties.getProperty(key)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 54a42c1a66184..a5c2cbf52aafd 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -669,6 +669,16 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     assert(invocationOrder === Seq("C", "B", "A", "D"))
   }
 
+  test("SPARK-46480: Add isFailed in TaskContext") {
+    val context = TaskContext.empty()
+    var isFailed = false
+    context.addTaskCompletionListener[Unit] { context =>
+      isFailed = context.isFailed()
+    }
+    context.markTaskFailed(new RuntimeException())
+    context.markTaskCompleted(None)
+    assert(isFailed)
+  }
 }
 
 private object TaskContextSuite {
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 9805ad7f09d6e..376ddfde1b937 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -72,7 +72,9 @@ object MimaExcludes {
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.api.java.function.MapGroupsWithStateFunction"),
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SaveMode"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.GroupState")
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.GroupState"),
+    // [SPARK-46480][CORE][SQL] Fix NPE when table cache task attempt
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.isFailed")
   )
 
   // Default exclude rules
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 45d006b58e879..65f7835b42cf8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -279,9 +279,11 @@ case class CachedRDDBuilder(
         cachedPlan.conf)
     }
     val cached = cb.mapPartitionsInternal { it =>
-      TaskContext.get().addTaskCompletionListener[Unit](_ => {
-        materializedPartitions.add(1L)
-      })
+      TaskContext.get().addTaskCompletionListener[Unit] { context =>
+        if (!context.isFailed() && !context.isInterrupted()) {
+          materializedPartitions.add(1L)
+        }
+      }
       new Iterator[CachedBatch] {
         override def hasNext: Boolean = it.hasNext
         override def next(): CachedBatch = {

From 0948e24c30f6f7a05110f6e45b6723897e095aeb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@gmail.com>
Date: Fri, 22 Dec 2023 23:25:12 +0800
Subject: [PATCH 156/521] [SPARK-46466][SQL][3.5] Vectorized parquet reader
 should never do rebase for timestamp ntz

backport https://github.com/apache/spark/pull/44428

### What changes were proposed in this pull request?

This fixes a correctness bug. The TIMESTAMP_NTZ is a new data type in Spark and has no legacy files that need to do calendar rebase. However, the vectorized parquet reader treat it the same as LTZ and may do rebase if the parquet file was written with the legacy rebase mode. This PR fixes it to never do rebase for NTZ.

### Why are the changes needed?

bug fix

### Does this PR introduce _any_ user-facing change?

Yes, now we can correctly write and read back NTZ value even if the date is before 1582.

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44446 from cloud-fan/ntz2.

Authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../parquet/ParquetVectorUpdaterFactory.java  | 31 ++++++++++---------
 .../parquet/ParquetQuerySuite.scala           | 12 +++++++
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
index 42442cf8ea8a4..8c4fe20853879 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
@@ -109,24 +109,32 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa
           // For unsigned int64, it stores as plain signed int64 in Parquet when dictionary
           // fallbacks. We read them as decimal values.
           return new UnsignedLongUpdater();
-        } else if (isTimestamp(sparkType) &&
-            isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MICROS)) {
-          validateTimestampType(sparkType);
+        } else if (sparkType == DataTypes.TimestampType &&
+          isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MICROS)) {
           if ("CORRECTED".equals(datetimeRebaseMode)) {
             return new LongUpdater();
           } else {
             boolean failIfRebase = "EXCEPTION".equals(datetimeRebaseMode);
             return new LongWithRebaseUpdater(failIfRebase, datetimeRebaseTz);
           }
-        } else if (isTimestamp(sparkType) &&
-            isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MILLIS)) {
-          validateTimestampType(sparkType);
+        } else if (sparkType == DataTypes.TimestampType &&
+          isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MILLIS)) {
           if ("CORRECTED".equals(datetimeRebaseMode)) {
             return new LongAsMicrosUpdater();
           } else {
             final boolean failIfRebase = "EXCEPTION".equals(datetimeRebaseMode);
             return new LongAsMicrosRebaseUpdater(failIfRebase, datetimeRebaseTz);
           }
+        } else if (sparkType == DataTypes.TimestampNTZType &&
+          isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MICROS)) {
+          validateTimestampNTZType();
+          // TIMESTAMP_NTZ is a new data type and has no legacy files that need to do rebase.
+          return new LongUpdater();
+        } else if (sparkType == DataTypes.TimestampNTZType &&
+          isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit.MILLIS)) {
+          validateTimestampNTZType();
+          // TIMESTAMP_NTZ is a new data type and has no legacy files that need to do rebase.
+          return new LongAsMicrosUpdater();
         } else if (sparkType instanceof DayTimeIntervalType) {
           return new LongUpdater();
         }
@@ -196,12 +204,11 @@ boolean isTimestampTypeMatched(LogicalTypeAnnotation.TimeUnit unit) {
       ((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).getUnit() == unit;
   }
 
-  void validateTimestampType(DataType sparkType) {
+  private void validateTimestampNTZType() {
     assert(logicalTypeAnnotation instanceof TimestampLogicalTypeAnnotation);
-    // Throw an exception if the Parquet type is TimestampLTZ and the Catalyst type is TimestampNTZ.
+    // Throw an exception if the Parquet type is TimestampLTZ as the Catalyst type is TimestampNTZ.
     // This is to avoid mistakes in reading the timestamp values.
-    if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC() &&
-      sparkType == DataTypes.TimestampNTZType) {
+    if (((TimestampLogicalTypeAnnotation) logicalTypeAnnotation).isAdjustedToUTC()) {
       convertErrorForTimestampNTZ("int64 time(" + logicalTypeAnnotation + ")");
     }
   }
@@ -1152,10 +1159,6 @@ private static boolean isLongDecimal(DataType dt) {
     return false;
   }
 
-  private static boolean isTimestamp(DataType dt) {
-    return dt == DataTypes.TimestampType || dt == DataTypes.TimestampNTZType;
-  }
-
   private static boolean isDecimalTypeMatched(ColumnDescriptor descriptor, DataType dt) {
     DecimalType d = (DecimalType) dt;
     LogicalTypeAnnotation typeAnnotation = descriptor.getPrimitiveType().getLogicalTypeAnnotation();
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index ea5444a1791fd..828ec39c7d727 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -255,6 +255,18 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
+  test("SPARK-46466: write and read TimestampNTZ with legacy rebase mode") {
+    withSQLConf(SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key -> "LEGACY") {
+      withTable("ts") {
+        sql("create table ts (c1 timestamp_ntz) using parquet")
+        sql("insert into ts values (timestamp_ntz'0900-01-01 01:10:10')")
+        withAllParquetReaders {
+          checkAnswer(spark.table("ts"), sql("select timestamp_ntz'0900-01-01 01:10:10'"))
+        }
+      }
+    }
+  }
+
   test("Enabling/disabling merging partfiles when merging parquet schema") {
     def testSchemaMerging(expectedColumnNumber: Int): Unit = {
       withTempDir { dir =>

From 432ab15013b7109d020fe66dee1c4287d9bc7cc3 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 27 Dec 2023 11:46:43 -0800
Subject: [PATCH 157/521] [SPARK-46478][SQL][3.5] Revert SPARK-43049 to use
 oracle varchar(255) for string

### What changes were proposed in this pull request?

Revert SPARK-43049 to use Oracle Varchar (255) for string for performance consideration

### Why are the changes needed?

for performance consideration

### Does this PR introduce _any_ user-facing change?

yes, storing strings in Oracle table, which is defined by spark DDL with string columns. Users will get an error if string values exceed 255

```java
org.apache.spark.SparkRuntimeException: [EXCEED_LIMIT_LENGTH] Exceeds char/varchar type length limitation: 255. SQLSTATE: 54006
[info]   at org.apache.spark.sql.errors.QueryExecutionErrors$.exceedMaxLimit(QueryExecutionErrors.scala:2512)
```

### How was this patch tested?

revised unit tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44493 from yaooqinn/SPARK-46478-B.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/jdbc/OracleIntegrationSuite.scala     |  3 +--
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  | 23 ++++++++++++++-----
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala |  2 +-
 .../sql/catalyst/util/CharVarcharUtils.scala  |  3 ++-
 .../apache/spark/sql/jdbc/OracleDialect.scala |  2 +-
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala |  4 ++--
 6 files changed, 24 insertions(+), 13 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 483f6087c81d2..70afad781ca25 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -173,8 +173,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
   }
 
 
-  // SPARK-43049: Use CLOB instead of VARCHAR(255) for StringType for Oracle jdbc-am""
-  test("SPARK-12941: String datatypes to be mapped to CLOB in Oracle") {
+  test("SPARK-12941: String datatypes to be mapped to VARCHAR(255) in Oracle") {
     // create a sample dataframe with string type
     val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x")
     // write the dataframe to the oracle table tbl
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 5124199328ce2..6b5dd043a617f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -22,8 +22,9 @@ import java.util.Locale
 
 import org.scalatest.time.SpanSugar._
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkRuntimeException}
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.DatabaseOnDocker
 import org.apache.spark.sql.types._
@@ -86,6 +87,11 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
       s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
   }
 
+  override val defaultMetadata: Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY, "varchar(255)")
+    .build()
+
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort))
@@ -104,11 +110,11 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", DecimalType(10, 0), true, defaultMetadata)
+    var expectedSchema = new StructType().add("ID", DecimalType(10, 0), true, super.defaultMetadata)
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE LONG")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", DecimalType(19, 0), true, defaultMetadata)
+    expectedSchema = new StructType().add("ID", DecimalType(19, 0), true, super.defaultMetadata)
     assert(t.schema === expectedSchema)
     // Update column type from LONG to INTEGER
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER"
@@ -129,12 +135,17 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
 
   override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
 
-  test("SPARK-43049: Use CLOB instead of VARCHAR(255) for StringType for Oracle JDBC") {
+  test("SPARK-46478: Revert SPARK-43049 to use varchar(255) for string") {
     val tableName = catalogName + ".t1"
     withTable(tableName) {
       sql(s"CREATE TABLE $tableName(c1 string)")
-      sql(s"INSERT INTO $tableName SELECT rpad('hi', 256, 'spark')")
-      assert(sql(s"SELECT char_length(c1) from $tableName").head().get(0) === 256)
+      checkError(
+        exception = intercept[SparkRuntimeException] {
+          sql(s"INSERT INTO $tableName SELECT rpad('hi', 256, 'spark')")
+        },
+        errorClass = "EXCEED_LIMIT_LENGTH",
+        parameters = Map("limit" -> "255")
+      )
     }
   }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index b5f5b0e5f20bd..99f435611f2c4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -49,7 +49,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   def notSupportsTableComment: Boolean = false
 
-  val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
+  def defaultMetadata: Metadata = new MetadataBuilder().putLong("scale", 0).build()
 
   def testUpdateColumnNullability(tbl: String): Unit = {
     sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index b9d83d444909d..f3c272785a7be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -28,7 +28,8 @@ import org.apache.spark.sql.types._
 
 object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
 
-  private val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING"
+  // visible for testing
+  private[sql] val CHAR_VARCHAR_TYPE_STRING_METADATA_KEY = "__CHAR_VARCHAR_TYPE_STRING"
 
   /**
    * Replaces CharType/VarcharType with StringType recursively in the given struct type. If a
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 3a0333cca33fd..95774d38e50ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -118,7 +118,7 @@ private case object OracleDialect extends JdbcDialect {
     case DoubleType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.DOUBLE))
     case ByteType => Some(JdbcType("NUMBER(3)", java.sql.Types.SMALLINT))
     case ShortType => Some(JdbcType("NUMBER(5)", java.sql.Types.SMALLINT))
-    case StringType => Some(JdbcType("CLOB", java.sql.Types.CLOB))
+    case StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR))
     case _ => None
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 71c7245b06090..c4145f4cbf73b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1274,7 +1274,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
   test("SPARK 12941: The data type mapping for StringType to Oracle") {
     val oracleDialect = JdbcDialects.get("jdbc:oracle://127.0.0.1/db")
     assert(oracleDialect.getJDBCType(StringType).
-      map(_.databaseTypeDefinition).get == "CLOB")
+      map(_.databaseTypeDefinition).get == "VARCHAR2(255)")
   }
 
   test("SPARK-16625: General data types to be mapped to Oracle") {
@@ -1292,7 +1292,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     assert(getJdbcType(oracleDialect, DoubleType) == "NUMBER(19, 4)")
     assert(getJdbcType(oracleDialect, ByteType) == "NUMBER(3)")
     assert(getJdbcType(oracleDialect, ShortType) == "NUMBER(5)")
-    assert(getJdbcType(oracleDialect, StringType) == "CLOB")
+    assert(getJdbcType(oracleDialect, StringType) == "VARCHAR2(255)")
     assert(getJdbcType(oracleDialect, BinaryType) == "BLOB")
     assert(getJdbcType(oracleDialect, DateType) == "DATE")
     assert(getJdbcType(oracleDialect, TimestampType) == "TIMESTAMP")

From 5d4a913e3f3ba906c029e5e8a08194eac8ef250e Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 28 Dec 2023 10:53:02 +0800
Subject: [PATCH 158/521] [SPARK-46514][TESTS] Fix
 HiveMetastoreLazyInitializationSuite

### What changes were proposed in this pull request?

This PR enabled the assertion in HiveMetastoreLazyInitializationSuite

### Why are the changes needed?

fix test intenton

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

pass HiveMetastoreLazyInitializationSuite

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44500 from yaooqinn/SPARK-46514.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit d0245d34c004935bb2c904bfd906836df3d574fa)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala  | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
index af11b817d65b0..b8739ce56e41a 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
@@ -61,11 +61,10 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite {
         spark.sql("show tables")
       })
       for (msg <- Seq(
-        "show tables",
         "Could not connect to meta store",
         "org.apache.thrift.transport.TTransportException",
         "Connection refused")) {
-        exceptionString.contains(msg)
+        assert(exceptionString.contains(msg))
       }
     } finally {
       Thread.currentThread().setContextClassLoader(originalClassLoader)

From 6838f0db692892fe5ffdd86e4a59a8e9733d5d1b Mon Sep 17 00:00:00 2001
From: zouxxyy <zouxinyu.zxy@alibaba-inc.com>
Date: Thu, 28 Dec 2023 19:57:01 +0300
Subject: [PATCH 159/521] [SPARK-46535][SQL] Fix NPE when describe extended a
 column without col stats

### What changes were proposed in this pull request?

### Why are the changes needed?

Currently executing DESCRIBE TABLE EXTENDED a column without col stats with v2 table will throw a null pointer exception.

```text
Cannot invoke "org.apache.spark.sql.connector.read.colstats.ColumnStatistics.min()" because the return value of "scala.Option.get()" is null
java.lang.NullPointerException: Cannot invoke "org.apache.spark.sql.connector.read.colstats.ColumnStatistics.min()" because the return value of "scala.Option.get()" is null
	at org.apache.spark.sql.execution.datasources.v2.DescribeColumnExec.run(DescribeColumnExec.scala:63)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result$lzycompute(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.result(V2CommandExec.scala:43)
	at org.apache.spark.sql.execution.datasources.v2.V2CommandExec.executeCollect(V2CommandExec.scala:49)
	at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:118)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$6(SQLExecution.scala:150)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:241)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId0$1(SQLExecution.scala:116)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:918)
```

This RP will fix it

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Add a new test `describe extended (formatted) a column without col stats`

### Was this patch authored or co-authored using generative AI tooling?

Closes #44524 from Zouxxyy/dev/fix-stats.

Lead-authored-by: zouxxyy <zouxinyu.zxy@alibaba-inc.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit af8228ce9aee99eae9d08dbdefaaad32cf5438eb)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../datasources/v2/DescribeColumnExec.scala   |  2 +-
 .../command/v2/DescribeTableSuite.scala       | 21 +++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
index 61ccda3fc9543..2683d8d547f00 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
@@ -53,7 +53,7 @@ case class DescribeColumnExec(
           read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match {
             case s: SupportsReportStatistics =>
               val stats = s.estimateStatistics()
-              Some(stats.columnStats().get(FieldReference.column(column.name)))
+              Option(stats.columnStats().get(FieldReference.column(column.name)))
             case _ => None
           }
         case _ => None
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
index e2f2aee56115f..a21baebe24d8f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -175,4 +175,25 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
           Row("max_col_len", "NULL")))
     }
   }
+
+  test("SPARK-46535: describe extended (formatted) a column without col stats") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(
+        s"""
+           |CREATE TABLE $tbl
+           |(key INT COMMENT 'column_comment', col STRING)
+           |$defaultUsing""".stripMargin)
+
+      val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("info_name", StringType),
+        ("info_value", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col_name", "key"),
+          Row("data_type", "int"),
+          Row("comment", "column_comment")))
+    }
+  }
 }

From f0e5fc973c1c87dbe1b0574e5d14e97b55abfa03 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Tue, 2 Jan 2024 08:13:29 -0800
Subject: [PATCH 160/521] [SPARK-46562][SQL] Remove retrieval of `keytabFile`
 from `UserGroupInformation` in `HiveAuthFactory`

### What changes were proposed in this pull request?
This pr removed the retrieval of `keytabFile` from `UserGroupInformation` in `HiveAuthFactory` because `keytabFile` no longer exists in `UserGroupInformation` after Hadoop 3.0.3. Therefore, in `HiveAuthFactory`, `keytabFile` will always be null and in `HiveAuthFactory`, `keytabFile` will only be used when it is not null.

For the specific changes in Hadoop, please refer to https://issues.apache.org/jira/browse/HADOOP-9747 | https://github.com/apache/hadoop/commit/59cf7588779145ad5850ad63426743dfe03d8347.

### Why are the changes needed?
Clean up the invalid code.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44557 from LuciferYang/remove-keytabFile.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit bc7e949cf99382ecf70d5b59fca9e7e415fbbb48)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/hive/service/auth/HiveAuthFactory.java  | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
index 8d77b238ff41f..e3316cef241c3 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -17,7 +17,6 @@
 package org.apache.hive.service.auth;
 
 import java.io.IOException;
-import java.lang.reflect.Field;
 import java.lang.reflect.Method;
 import java.util.HashMap;
 import java.util.Map;
@@ -85,18 +84,9 @@ public String getAuthName() {
   public static final String HS2_PROXY_USER = "hive.server2.proxy.user";
   public static final String HS2_CLIENT_TOKEN = "hiveserver2ClientToken";
 
-  private static Field keytabFile = null;
   private static Method getKeytab = null;
   static {
     Class<?> clz = UserGroupInformation.class;
-    try {
-      keytabFile = clz.getDeclaredField("keytabFile");
-      keytabFile.setAccessible(true);
-    } catch (NoSuchFieldException nfe) {
-      LOG.debug("Cannot find private field \"keytabFile\" in class: " +
-        UserGroupInformation.class.getCanonicalName(), nfe);
-      keytabFile = null;
-    }
 
     try {
       getKeytab = clz.getDeclaredMethod("getKeytab");
@@ -347,9 +337,7 @@ public static boolean needUgiLogin(UserGroupInformation ugi, String principal, S
   private static String getKeytabFromUgi() {
     synchronized (UserGroupInformation.class) {
       try {
-        if (keytabFile != null) {
-          return (String) keytabFile.get(null);
-        } else if (getKeytab != null) {
+        if (getKeytab != null) {
           return (String) getKeytab.invoke(UserGroupInformation.getCurrentUser());
         } else {
           return null;

From 6e8dbacf8a1402878a2a4be295bbe78e7c78327e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 15 Dec 2023 17:52:46 -0800
Subject: [PATCH 161/521] [SPARK-46425][INFRA] Pin the bundler version in CI

Currently documentation build is broken:

https://github.com/apache/spark/actions/runs/7226413850/job/19691970695

```
...
ERROR:  Error installing bundler:
	The last version of bundler (>= 0) to support your Ruby & RubyGems was 2.4.22. Try installing it with `gem install bundler -v 2.4.22`
	bundler requires Ruby version >= 3.0.0. The current ruby version is 2.7.0.0.
```

This PR uses the suggestion.

To recover the CI.

No, dev-only.

CI in this PR verify it.

No.

Closes #44376 from HyukjinKwon/SPARK-46425.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit d0da1172b7d87b68a8af8464c6486aa586324241)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index f202a7d49c9a2..32f6a44102bf9 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -687,7 +687,7 @@ jobs:
         Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'e1071', 'roxygen2', 'ggplot2', 'mvtnorm', 'statmod'), repos='https://cloud.r-project.org/')"
         Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')"
         Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
-        gem install bundler
+        gem install bundler -v 2.4.22
         cd docs
         bundle install
     - name: R linter

From 2891d92e9d8a5050f457bb116530d46de3babf97 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 3 Jan 2024 05:54:57 -0800
Subject: [PATCH 162/521] [SPARK-46577][SQL]
 HiveMetastoreLazyInitializationSuite leaks hive's SessionState
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

The upcoming tests with the new hive configurations will have no effect due to the leaked SessionState.

```
06:21:12.848 pool-1-thread-1 INFO ThriftServerWithSparkContextInHttpSuite: Trying to start HiveThriftServer2: mode=http, attempt=0
....
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service:OperationManager is inited.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service:SessionManager is inited.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service: CLIService is inited.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service:ThriftBinaryCLIService is inited.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service: HiveServer2 is inited.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service:OperationManager is started.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service:SessionManager is started.
06:21:12.851 pool-1-thread-1 INFO AbstractService: Service: CLIService is started.
06:21:12.852 pool-1-thread-1 INFO AbstractService: Service:ThriftBinaryCLIService is started.
06:21:12.852 pool-1-thread-1 INFO ThriftCLIService: Starting ThriftBinaryCLIService on port 10000 with 5...500 worker threads
06:21:12.852 pool-1-thread-1 INFO AbstractService: Service:HiveServer2 is started.
```

As the logs above revealed, ThriftServerWithSparkContextInHttpSuite started the ThriftBinaryCLIService instead of the ThriftHttpCLIService. This is because in HiveClientImpl, the new configurations are only applied to hive conf during initializing but not for existing ones.

This cause ThriftServerWithSparkContextInHttpSuite retrying or even aborting.

### Why are the changes needed?

Fix flakiness in tests

### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

ran tests locally with the hive-thriftserver module locally，
### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44578 from yaooqinn/SPARK-46577.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 605fecd22cc18fc9b93fb26d4aa6088f5a314f92)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/hive/HiveMetastoreLazyInitializationSuite.scala     | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
index b8739ce56e41a..cb85993e5e099 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreLazyInitializationSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.hive
 
+import org.apache.hadoop.hive.ql.metadata.Hive
+import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.logging.log4j.LogManager
 import org.apache.logging.log4j.core.Logger
 
@@ -69,6 +71,10 @@ class HiveMetastoreLazyInitializationSuite extends SparkFunSuite {
     } finally {
       Thread.currentThread().setContextClassLoader(originalClassLoader)
       spark.sparkContext.setLogLevel(originalLevel.toString)
+      SparkSession.clearActiveSession()
+      SparkSession.clearDefaultSession()
+      SessionState.detachSession()
+      Hive.closeCurrent()
       spark.stop()
     }
   }

From fb90ade2c7390077d2755fc43b73e63f5cf44f21 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Wed, 3 Jan 2024 12:07:15 -0800
Subject: [PATCH 163/521] [SPARK-46546][DOCS] Fix the formatting of tables in
 `running-on-yarn` pages

### What changes were proposed in this pull request?
The pr aims to fix the formatting of tables in `running-on-yarn` pages.

### Why are the changes needed?
Make the tables on the page display normally.
Before:
<img width="1288" alt="image" src="https://github.com/apache/spark/assets/15246973/26facec4-d805-4549-a640-120c499bd7fd">

After:
<img width="1310" alt="image" src="https://github.com/apache/spark/assets/15246973/cf6c20ef-a4ce-4532-9acd-ab9cec41881a">

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually check.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44540 from panbingkun/SPARK-46546.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 85b44ccef4c4aeec302c12e03833590c7d8d6b9e)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/running-on-yarn.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9b4e59a119eeb..ce7121b806cb0 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -866,7 +866,7 @@ to avoid garbage collection issues during shuffle.
 The following extra configuration options are available when the shuffle service is running on YARN:
 
 <table>
-<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th></tr></thead>
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
 <tr>
   <td><code>spark.yarn.shuffle.stopOnFailure</code></td>
   <td><code>false</code></td>
@@ -875,6 +875,7 @@ The following extra configuration options are available when the shuffle service
     initialization. This prevents application failures caused by running containers on
     NodeManagers where the Spark Shuffle Service is not running.
   </td>
+  <td>2.1.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.shuffle.service.metrics.namespace</code></td>
@@ -883,6 +884,7 @@ The following extra configuration options are available when the shuffle service
     The namespace to use when emitting shuffle service metrics into Hadoop metrics2 system of the
     NodeManager.
   </td>
+  <td>3.2.0</td>
 </tr>
 <tr>
   <td><code>spark.yarn.shuffle.service.logs.namespace</code></td>
@@ -894,6 +896,7 @@ The following extra configuration options are available when the shuffle service
     may expect the logger name to look like a class name, it's generally recommended to provide a value which
     would be a valid Java package or class name and not include spaces.
   </td>
+  <td>3.3.0</td>
 </tr>
 <tr>
   <td><code>spark.shuffle.service.db.backend</code></td>

From 1b7ee9e56b30d99ae948ceb8ca42f025e3d24c96 Mon Sep 17 00:00:00 2001
From: Xinyi Yu <xinyi.yu@databricks.com>
Date: Fri, 5 Jan 2024 22:57:28 +0800
Subject: [PATCH 164/521] [SPARK-46602][SQL] Propagate `allowExisting` in view
 creation when the view/table does not exists

### What changes were proposed in this pull request?
This PR fixes the undesired behavior that concurrent `CREATE VIEW IF NOT EXISTS` queries could throw `TABLE_OR_VIEW_ALREADY_EXISTS` exceptions. It's because the current implementation did not propagate the 'IF NOT EXISTS' when the detecting view/table does not exists.

### Why are the changes needed?
Fix the above issue.

### Does this PR introduce _any_ user-facing change?
Yes in the sense that if fixes an issue in concurrent case.

### How was this patch tested?
Without the fix the following test failed while with this PR if passed. But following the [comment](https://github.com/apache/spark/pull/44603#discussion_r1442515458), I removed the test from this PR.
```scala
  test("CREATE VIEW IF NOT EXISTS never throws TABLE_OR_VIEW_ALREADY_EXISTS") {
    // Concurrently create a view with the same name, so that some of the queries may all
    // get that the view does not exist and try to create it. But with IF NOT EXISTS, the
    // queries should not fail.
    import ExecutionContext.Implicits.global
    val concurrency = 10
    val tableName = "table_name"
    val viewName = "view_name"
    withTable(tableName) {
      sql(s"CREATE TABLE $tableName (id int) USING parquet")
      withView("view_name") {
        val futures = (0 to concurrency).map { _ =>
          Future {
            Try {
              sql(s"CREATE VIEW IF NOT EXISTS $viewName AS SELECT * FROM $tableName")
            }
          }
        }
        futures.map { future =>
           val res = ThreadUtils.awaitResult(future, 5.seconds)
           assert(
             res.isSuccess,
             s"Failed to create view: ${if (res.isFailure) res.failed.get.getMessage}"
           )
        }
      }
    }
  }
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44603 from anchovYu/create-view-if-not-exist-fix.

Authored-by: Xinyi Yu <xinyi.yu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 9b3c70f6094c97ed61018d9fca8a50320574ab49)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/execution/command/views.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index 3718794ea5909..b6159f92f9cef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -167,7 +167,7 @@ case class CreateViewCommand(
       }
     } else {
       // Create the view if it doesn't exist.
-      catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = false)
+      catalog.createTable(prepareTable(sparkSession, analyzedPlan), ignoreIfExists = allowExisting)
     }
     Seq.empty[Row]
   }

From 9f095b71ca2fab7211f84fbf3a16d2f9ffb3d957 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 5 Jan 2024 11:23:23 -0800
Subject: [PATCH 165/521] [SPARK-46609][SQL] Avoid exponential explosion in
 PartitioningPreservingUnaryExecNode

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/37525 . When expanding the output partitioning/ordering with aliases, we have a threshold to avoid exponential explosion. However, we missed to apply this threshold in one place. This PR fixes it.

### Why are the changes needed?

to avoid OOM

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44614 from cloud-fan/oom.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit f8115da1a2bb33e6344dd69cc38ca7a68c3654b1)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../AliasAwareOutputExpression.scala          |  4 +--
 ...rojectedOrderingAndPartitioningSuite.scala | 30 +++++++++++++++++--
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index e1dcab80af307..428fe65501fb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -30,7 +30,7 @@ trait PartitioningPreservingUnaryExecNode extends UnaryExecNode
   with AliasAwareOutputExpression {
   final override def outputPartitioning: Partitioning = {
     val partitionings: Seq[Partitioning] = if (hasAlias) {
-      flattenPartitioning(child.outputPartitioning).flatMap {
+      flattenPartitioning(child.outputPartitioning).iterator.flatMap {
         case e: Expression =>
           // We need unique partitionings but if the input partitioning is
           // `HashPartitioning(Seq(id + id))` and we have `id -> a` and `id -> b` aliases then after
@@ -44,7 +44,7 @@ trait PartitioningPreservingUnaryExecNode extends UnaryExecNode
             .take(aliasCandidateLimit)
             .asInstanceOf[Stream[Partitioning]]
         case o => Seq(o)
-      }
+      }.take(aliasCandidateLimit).toSeq
     } else {
       // Filter valid partitiongs (only reference output attributes of the current plan node)
       val outputSet = AttributeSet(outputExpressions.map(_.toAttribute))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
index f5839e9975602..ec13d48d45f84 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, PartitioningCollection, UnknownPartitioning}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference}
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StringType
 
 class ProjectedOrderingAndPartitioningSuite
   extends SharedSparkSession with AdaptiveSparkPlanHelper {
@@ -101,6 +104,22 @@ class ProjectedOrderingAndPartitioningSuite
     }
   }
 
+  test("SPARK-46609: Avoid exponential explosion in PartitioningPreservingUnaryExecNode") {
+    withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key -> "2") {
+      val output = Seq(AttributeReference("a", StringType)(), AttributeReference("b", StringType)())
+      val plan = ProjectExec(
+        Seq(
+          Alias(output(0), "a1")(),
+          Alias(output(0), "a2")(),
+          Alias(output(1), "b1")(),
+          Alias(output(1), "b2")()
+        ),
+        DummyLeafPlanExec(output)
+      )
+      assert(plan.outputPartitioning.asInstanceOf[PartitioningCollection].partitionings.length == 2)
+    }
+  }
+
   test("SPARK-42049: Improve AliasAwareOutputExpression - multi-references to complex " +
     "expressions") {
     val df2 = spark.range(2).repartition($"id" + $"id").selectExpr("id + id as a", "id + id as b")
@@ -192,3 +211,10 @@ class ProjectedOrderingAndPartitioningSuite
     assert(outputOrdering.head.sameOrderExpressions.size == 0)
   }
 }
+
+private case class DummyLeafPlanExec(output: Seq[Attribute]) extends LeafExecNode {
+  override protected def doExecute(): RDD[InternalRow] = null
+  override def outputPartitioning: Partitioning = {
+    PartitioningCollection(output.map(attr => HashPartitioning(Seq(attr), 4)))
+  }
+}

From fe22ec74db7895c6ea1f39236162ae39027111f4 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 6 Jan 2024 12:38:35 -0800
Subject: [PATCH 166/521] [SPARK-46598][SQL] OrcColumnarBatchReader should
 respect the memory mode when creating column vectors for the missing column

This PR fixes a long-standing bug that `OrcColumnarBatchReader` does not respect the memory mode when creating column vectors for missing columbs. This PR fixes it.

To not violate the memory mode requirement

No

new test

no

Closes #44598 from cloud-fan/orc.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 0c1c5e93e376b97a6d2dae99e973b9385155727a)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../orc/OrcColumnarBatchReader.java            | 17 ++++++++++++-----
 .../datasources/orc/OrcFileFormat.scala        |  9 ++++++++-
 .../v2/orc/OrcPartitionReaderFactory.scala     |  6 ++++--
 .../execution/datasources/v2/orc/OrcScan.scala |  8 +++++++-
 .../orc/OrcColumnarBatchReaderSuite.scala      | 18 +++++++++++++++---
 5 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index b6184baa2e0ed..5bfe22450f36b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -31,12 +31,11 @@
 import org.apache.orc.TypeDescription;
 import org.apache.orc.mapred.OrcInputFormat;
 
+import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns;
 import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.VectorizedRowBatchWrap;
-import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
-import org.apache.spark.sql.execution.vectorized.ConstantColumnVector;
-import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.*;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 
@@ -73,11 +72,14 @@ public class OrcColumnarBatchReader extends RecordReader<Void, ColumnarBatch> {
   @VisibleForTesting
   public ColumnarBatch columnarBatch;
 
+  private final MemoryMode memoryMode;
+
   // The wrapped ORC column vectors.
   private org.apache.spark.sql.vectorized.ColumnVector[] orcVectorWrappers;
 
-  public OrcColumnarBatchReader(int capacity) {
+  public OrcColumnarBatchReader(int capacity, MemoryMode memoryMode) {
     this.capacity = capacity;
+    this.memoryMode = memoryMode;
   }
 
 
@@ -177,7 +179,12 @@ public void initBatch(
         int colId = requestedDataColIds[i];
         // Initialize the missing columns once.
         if (colId == -1) {
-          OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
+          final WritableColumnVector missingCol;
+          if (memoryMode == MemoryMode.OFF_HEAP) {
+            missingCol = new OffHeapColumnVector(capacity, dt);
+          } else {
+            missingCol = new OnHeapColumnVector(capacity, dt);
+          }
           // Check if the missing column has an associated default value in the schema metadata.
           // If so, fill the corresponding column vector with the value.
           Object defaultValue = ResolveDefaultColumns.existenceDefaultValues(requiredSchema)[i];
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index b7e6f11f67d69..53d2b08431f85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -31,6 +31,7 @@ import org.apache.orc.mapred.OrcStruct
 import org.apache.orc.mapreduce._
 
 import org.apache.spark.TaskContext
+import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
@@ -152,6 +153,12 @@ class OrcFileFormat
       assert(supportBatch(sparkSession, resultSchema))
     }
 
+    val memoryMode = if (sqlConf.offHeapColumnVectorEnabled) {
+      MemoryMode.OFF_HEAP
+    } else {
+      MemoryMode.ON_HEAP
+    }
+
     OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(hadoopConf, sqlConf.caseSensitiveAnalysis)
 
     val broadcastedConf =
@@ -196,7 +203,7 @@ class OrcFileFormat
         val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
         if (enableVectorizedReader) {
-          val batchReader = new OrcColumnarBatchReader(capacity)
+          val batchReader = new OrcColumnarBatchReader(capacity, memoryMode)
           // SPARK-23399 Register a task completion listener first to call `close()` in all cases.
           // There is a possibility that `initialize` and `initBatch` hit some errors (like OOM)
           // after opening a file.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index 2b7bdae6b31b4..b23071e50cbed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -26,6 +26,7 @@ import org.apache.orc.mapred.OrcStruct
 import org.apache.orc.mapreduce.OrcInputFormat
 
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
@@ -57,7 +58,8 @@ case class OrcPartitionReaderFactory(
     partitionSchema: StructType,
     filters: Array[Filter],
     aggregation: Option[Aggregation],
-    options: OrcOptions) extends FilePartitionReaderFactory {
+    options: OrcOptions,
+    memoryMode: MemoryMode) extends FilePartitionReaderFactory {
   private val resultSchema = StructType(readDataSchema.fields ++ partitionSchema.fields)
   private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
   private val capacity = sqlConf.orcVectorizedReaderBatchSize
@@ -146,7 +148,7 @@ case class OrcPartitionReaderFactory(
       val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
       val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
-      val batchReader = new OrcColumnarBatchReader(capacity)
+      val batchReader = new OrcColumnarBatchReader(capacity, memoryMode)
       batchReader.initialize(fileSplit, taskAttemptContext)
       val requestedPartitionColIds =
         Array.fill(readDataSchema.length)(-1) ++ Range(0, partitionSchema.length)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index 072ab26774e52..ca37d22eeb1e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -21,6 +21,7 @@ import scala.collection.JavaConverters.mapAsScalaMapConverter
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
@@ -64,11 +65,16 @@ case class OrcScan(
   override def createReaderFactory(): PartitionReaderFactory = {
     val broadcastedConf = sparkSession.sparkContext.broadcast(
       new SerializableConfiguration(hadoopConf))
+    val memoryMode = if (sparkSession.sessionState.conf.offHeapColumnVectorEnabled) {
+      MemoryMode.OFF_HEAP
+    } else {
+      MemoryMode.ON_HEAP
+    }
     // The partition values are already truncated in `FileScan.partitions`.
     // We should use `readPartitionSchema` as the partition schema here.
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
       dataSchema, readDataSchema, readPartitionSchema, pushedFilters, pushedAggregate,
-      new OrcOptions(options.asScala.toMap, sparkSession.sessionState.conf))
+      new OrcOptions(options.asScala.toMap, sparkSession.sessionState.conf), memoryMode)
   }
 
   override def equals(obj: Any): Boolean = obj match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
index a9389c1c21b40..06ea12f83ce75 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -26,11 +26,12 @@ import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 import org.apache.orc.TypeDescription
 
 import org.apache.spark.TestUtils
+import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
+import org.apache.spark.sql.execution.vectorized.{ConstantColumnVector, OffHeapColumnVector}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -53,7 +54,7 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
         requestedDataColIds: Array[Int],
         requestedPartitionColIds: Array[Int],
         resultFields: Array[StructField]): OrcColumnarBatchReader = {
-      val reader = new OrcColumnarBatchReader(4096)
+      val reader = new OrcColumnarBatchReader(4096, MemoryMode.ON_HEAP)
       reader.initBatch(
         orcFileSchema,
         resultFields,
@@ -117,7 +118,7 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
         val fileSplit = new FileSplit(new Path(file.getCanonicalPath), 0L, file.length, Array.empty)
         val taskConf = sqlContext.sessionState.newHadoopConf()
         val orcFileSchema = TypeDescription.fromString(schema.simpleString)
-        val vectorizedReader = new OrcColumnarBatchReader(4096)
+        val vectorizedReader = new OrcColumnarBatchReader(4096, MemoryMode.ON_HEAP)
         val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
         val taskAttemptContext = new TaskAttemptContextImpl(taskConf, attemptId)
 
@@ -148,4 +149,15 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-46598: off-heap mode") {
+    val reader = new OrcColumnarBatchReader(4096, MemoryMode.OFF_HEAP)
+    reader.initBatch(
+      TypeDescription.fromString("struct<col1:int,col2:int>"),
+      StructType.fromDDL("col1 int, col2 int, col3 int").fields,
+      Array(0, 1, -1),
+      Array(-1, -1, -1),
+      InternalRow.empty)
+    assert(reader.columnarBatch.column(2).isInstanceOf[OffHeapColumnVector])
+  }
 }

From 75b567daa705016f2ddb74dc8404ae66bc33bbcd Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 8 Jan 2024 16:24:06 -0800
Subject: [PATCH 167/521] [SPARK-46628][INFRA] Use SPDX short identifier in
 `license` name

### What changes were proposed in this pull request?

This PR aims to use SPDX short identifier as `license`'s `name` field.

- https://spdx.org/licenses/Apache-2.0.html

### Why are the changes needed?

SPDX short identifier is recommended as `name` field by `Apache Maven`.
- https://maven.apache.org/pom.html#Licenses

ASF pom file has been using it. This PR aims to match with ASF pom file.
- https://github.com/apache/maven-apache-parent/pull/118
- https://github.com/apache/maven-apache-parent/blob/7888bdb8ee653ecc03b5fee136540a607193c240/pom.xml#L46
```
<name>Apache-2.0</name>
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44631 from dongjoon-hyun/SPARK-46628.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit d008f81a9d8d4b5e8e434469755405f6ae747e75)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index 14e0ab3e0f620..9e945f8d959a4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -32,7 +32,7 @@
   <url>https://spark.apache.org/</url>
   <licenses>
     <license>
-      <name>Apache 2.0 License</name>
+      <name>Apache-2.0</name>
       <url>http://www.apache.org/licenses/LICENSE-2.0.html</url>
       <distribution>repo</distribution>
     </license>

From 2b0c3e12636b93758879ee3ff23626ea1f218264 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Tue, 9 Jan 2024 09:07:34 +0800
Subject: [PATCH 168/521] [SPARK-46610][SQL] Create table should throw
 exception when no value for a key in options

### What changes were proposed in this pull request?

Before SPARK-43529, there was a check from `visitPropertyKeyValues` that throws for null values for option keys. After SPARK-43529, a new function is used to support expressions in options but the new function lose the check.

This PR adds the check back.

### Why are the changes needed?

Throw exception when a option value is null.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

UT

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #44615 from amaliujia/fix_create_table_options.

Lead-authored-by: Rui Wang <rui.wang@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit e7536f2484afce412256bf711452acde8df5a287)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/parser/AstBuilder.scala       |  4 +++-
 .../spark/sql/catalyst/parser/DDLParserSuite.scala   | 12 ++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index b80ea8fddcfe2..90fbdd94dc386 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -3274,7 +3274,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       ctx: ExpressionPropertyListContext): OptionList = {
     val options = ctx.expressionProperty.asScala.map { property =>
       val key: String = visitPropertyKey(property.key)
-      val value: Expression = Option(property.value).map(expression).orNull
+      val value: Expression = Option(property.value).map(expression).getOrElse {
+        operationNotAllowed(s"A value must be specified for the key: $key.", ctx)
+      }
       key -> value
     }.toSeq
     OptionList(options)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 31fd232181a4f..6f36a8c9719cb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -2356,6 +2356,18 @@ class DDLParserSuite extends AnalysisTest {
         stop = 42))
   }
 
+  test("SPARK-46610: throw exception when no value for a key in create table options") {
+    val createTableSql = "create table test_table using my_data_source options (password)"
+    checkError(
+      exception = parseException(createTableSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "A value must be specified for the key: password."),
+      context = ExpectedContext(
+        fragment = createTableSql,
+        start = 0,
+        stop = 62))
+  }
+
   test("UNCACHE TABLE") {
     comparePlans(
       parsePlan("UNCACHE TABLE a.b.c"),

From a753239ab1afaeddf4c991d42b93e4845f12e576 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Mon, 8 Jan 2024 22:22:06 -0400
Subject: [PATCH 169/521] [SPARK-46600][SQL] Move shared code between SqlConf
 and SqlApiConf to SqlApiConfHelper

### What changes were proposed in this pull request?

This code proposes to introduce a new object named `SqlApiConfHelper` to contain shared code between `SqlApiConf` and `SqlConf`.

### Why are the changes needed?

As of now, SqlConf will access some of the variables of SqlApiConf while SqlApiConf also try to initialize SqlConf upon initialization.  This PR is to avoid potential circular dependency between SqlConf and SqlApiConf. The shared variables or access to the shared variables are moved to the new `SqlApiConfHelper`. So either SqlApiConf and SqlConf wants to initialize the other side, they will only initialize the same third object.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

Existing UT

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #44602 from amaliujia/refactor_sql_api.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
(cherry picked from commit 03fc5e26b866491b52f89f4d24beade7d1669a37)
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 .../spark/sql/internal/SqlApiConf.scala       | 26 +++-------
 .../spark/sql/internal/SqlApiConfHelper.scala | 48 +++++++++++++++++++
 .../apache/spark/sql/internal/SQLConf.scala   | 12 ++---
 3 files changed, 61 insertions(+), 25 deletions(-)
 create mode 100644 sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
index d746e9037ec48..5ec72b83837ee 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.internal
 
 import java.util.TimeZone
-import java.util.concurrent.atomic.AtomicReference
 
 import scala.util.Try
 
@@ -48,25 +47,14 @@ private[sql] trait SqlApiConf {
 
 private[sql] object SqlApiConf {
   // Shared keys.
-  val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled"
-  val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy"
-  val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive"
-  val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone"
-  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold"
+  val ANSI_ENABLED_KEY: String = SqlApiConfHelper.ANSI_ENABLED_KEY
+  val LEGACY_TIME_PARSER_POLICY_KEY: String = SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY
+  val CASE_SENSITIVE_KEY: String = SqlApiConfHelper.CASE_SENSITIVE_KEY
+  val SESSION_LOCAL_TIMEZONE_KEY: String = SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY
+  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String =
+    SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY
 
-  /**
-   * Defines a getter that returns the [[SqlApiConf]] within scope.
-   */
-  private val confGetter = new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf)
-
-  /**
-   * Sets the active config getter.
-   */
-  private[sql] def setConfGetter(getter: () => SqlApiConf): Unit = {
-    confGetter.set(getter)
-  }
-
-  def get: SqlApiConf = confGetter.get()()
+  def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()()
 
   // Force load SQLConf. This will trigger the installation of a confGetter that points to SQLConf.
   Try(SparkClassUtils.classForName("org.apache.spark.sql.internal.SQLConf$"))
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
new file mode 100644
index 0000000000000..79b6cb9231c51
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import java.util.concurrent.atomic.AtomicReference
+
+/**
+ * SqlApiConfHelper is created to avoid a deadlock during a concurrent access to SQLConf and
+ * SqlApiConf, which is because SQLConf and SqlApiConf tries to load each other upon
+ * initializations. SqlApiConfHelper is private to sql package and is not supposed to be
+ * accessed by end users. Variables and methods within SqlApiConfHelper are defined to
+ * be used by SQLConf and SqlApiConf only.
+ */
+private[sql] object SqlApiConfHelper {
+  // Shared keys.
+  val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled"
+  val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy"
+  val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive"
+  val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone"
+  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold"
+
+  val confGetter: AtomicReference[() => SqlApiConf] = {
+    new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf)
+  }
+
+  def getConfGetter: AtomicReference[() => SqlApiConf] = confGetter
+
+  /**
+   * Sets the active config getter.
+   */
+  def setConfGetter(getter: () => SqlApiConf): Unit = {
+    confGetter.set(getter)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 70bd21ac1709d..de4a89667aff6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -181,7 +181,7 @@ object SQLConf {
 
   // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will always try to
   // load SqlConf to make sure both classes are in sync from the get go.
-  SqlApiConf.setConfGetter(() => SQLConf.get)
+  SqlApiConfHelper.setConfGetter(() => SQLConf.get)
 
   /**
    * Returns the active config object within the current scope. If there is an active SparkSession,
@@ -894,7 +894,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val CASE_SENSITIVE = buildConf(SqlApiConf.CASE_SENSITIVE_KEY)
+  val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY)
     .internal()
     .doc("Whether the query analyzer should be case sensitive or not. " +
       "Default to case insensitive. It is highly discouraged to turn on case sensitive mode.")
@@ -2676,7 +2676,7 @@ object SQLConf {
     Try { DateTimeUtils.getZoneId(zone) }.isSuccess
   }
 
-  val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConf.SESSION_LOCAL_TIMEZONE_KEY)
+  val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY)
     .doc("The ID of session local timezone in the format of either region-based zone IDs or " +
       "zone offsets. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. " +
       "Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm' or '(+|-)HH:mm:ss', e.g '-08', " +
@@ -3180,7 +3180,7 @@ object SQLConf {
       .checkValues(StoreAssignmentPolicy.values.map(_.toString))
       .createWithDefault(StoreAssignmentPolicy.ANSI.toString)
 
-  val ANSI_ENABLED = buildConf(SqlApiConf.ANSI_ENABLED_KEY)
+  val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY)
     .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " +
       "For example, Spark will throw an exception at runtime instead of returning null results " +
       "when the inputs to a SQL operator/function are invalid." +
@@ -3779,7 +3779,7 @@ object SQLConf {
     .checkValues(LegacyBehaviorPolicy.values.map(_.toString))
     .createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString)
 
-  val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConf.LEGACY_TIME_PARSER_POLICY_KEY)
+  val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY)
     .internal()
     .doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " +
       "dates/timestamps in a locale-sensitive manner, which is the approach before Spark 3.0. " +
@@ -4344,7 +4344,7 @@ object SQLConf {
       .createWithDefault(false)
 
   val LOCAL_RELATION_CACHE_THRESHOLD =
-    buildConf(SqlApiConf.LOCAL_RELATION_CACHE_THRESHOLD_KEY)
+    buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY)
       .doc("The threshold for the size in bytes of local relations to be cached at " +
         "the driver side after serialization.")
       .version("3.5.0")

From d3e3084808453769ba0cd4278ee8650e40c185ea Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 10 Jan 2024 09:32:30 +0900
Subject: [PATCH 170/521] [SPARK-46637][DOCS] Enhancing the Visual Appeal of
 Spark doc website

### What changes were proposed in this pull request?

Enhance the Visual Appeal of Spark doc website after https://github.com/apache/spark/pull/40269:
#### 1. There is a weird indent on the top right side of the first paragraph of the Spark 3.5.0 doc overview page
Before this PR
<img width="680" alt="image" src="https://github.com/apache/spark/assets/1097932/84d21ca1-a4d0-4bd4-8f20-a34fa5db4000">

After this PR:
<img width="1035" alt="image" src="https://github.com/apache/spark/assets/1097932/4ffc0d5a-ed75-44c5-b20a-475ff401afa8">

#### 2. All the titles are too big and therefore less readable. In the website https://spark.apache.org/downloads.html, titles are h2 while in doc site https://spark.apache.org/docs/latest/ titles are h1. So we should make the font size of titles smaller.
Before this PR:
<img width="935" alt="image" src="https://github.com/apache/spark/assets/1097932/5bbbd9eb-432a-42c0-98be-ff00a9099cd6">
After this PR:
<img width="965" alt="image" src="https://github.com/apache/spark/assets/1097932/dc94c1fb-6ac1-41a8-b4a4-19b3034125d7">

#### 3. The banner image can't be displayed correct. Even when it shows up, it will be hover by the text. To make it simple, let's not show the banner image as we did in https://spark.apache.org/docs/3.4.2/
<img width="570" alt="image" src="https://github.com/apache/spark/assets/1097932/f6d34261-a352-44e2-9633-6e96b311a0b3">
<img width="1228" alt="image" src="https://github.com/apache/spark/assets/1097932/c49ce6b6-13d9-4d8f-97a9-7ed8b037be57">

### Why are the changes needed?

Improve the Visual Appeal of Spark doc website

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually build doc and verify on local setup.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44642 from gengliangwang/enhance_doc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/_layouts/global.html          |  26 +++++++++------------
 docs/css/custom.css                |  35 ++++++++++++-----------------
 docs/img/spark-hero-thin-light.jpg | Bin 278664 -> 0 bytes
 3 files changed, 25 insertions(+), 36 deletions(-)
 delete mode 100644 docs/img/spark-hero-thin-light.jpg

diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index 8c4435fdf31d9..5116472eaa769 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -138,25 +138,21 @@
 
         {% if page.url == "/" %}
             <section class="hero-banner position-relative">
-            <div class="bg">
-            </div>
             <div class="container position-relative">
                 <div class="row">
                   <h1 style="max-width: 680px;">Apache Spark - A Unified engine for large-scale data analytics</h1>
                 </div>
-                <div class="row mt-5">
-                  <div class="col-12 col-lg-6 no-gutters">
-                    Apache Spark is a unified analytics engine for large-scale data processing.
-                    It provides high-level APIs in Java, Scala, Python and R,
-                    and an optimized engine that supports general execution graphs.
-                    It also supports a rich set of higher-level tools including
-                    <a href="sql-programming-guide.html">Spark SQL</a> for SQL and structured data processing,
-                    <a href="api/python/getting_started/quickstart_ps.html">pandas API on Spark</a> for pandas workloads,
-                    <a href="ml-guide.html">MLlib</a> for machine learning,
-                    <a href="graphx-programming-guide.html">GraphX</a> for graph processing,
-                     and <a href="structured-streaming-programming-guide.html">Structured Streaming</a>
-                     for incremental computation and stream processing.
-                  </div>
+                <div class="content mr-3">
+                  Apache Spark is a unified analytics engine for large-scale data processing.
+                  It provides high-level APIs in Java, Scala, Python and R,
+                  and an optimized engine that supports general execution graphs.
+                  It also supports a rich set of higher-level tools including
+                  <a href="sql-programming-guide.html">Spark SQL</a> for SQL and structured data processing,
+                  <a href="api/python/getting_started/quickstart_ps.html">pandas API on Spark</a> for pandas workloads,
+                  <a href="ml-guide.html">MLlib</a> for machine learning,
+                  <a href="graphx-programming-guide.html">GraphX</a> for graph processing,
+                   and <a href="structured-streaming-programming-guide.html">Structured Streaming</a>
+                   for incremental computation and stream processing.
                 </div>
             </div>
           </section>
diff --git a/docs/css/custom.css b/docs/css/custom.css
index 1239c0ed440ef..8158938866c48 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -95,18 +95,7 @@ section {
   border-color: transparent;
 }
 
-.hero-banner .bg {
-  background: url(/img/spark-hero-thin-light.jpg) no-repeat;
-  transform: translate(36%, 0%);
-  height: 475px;
-  top: 0;
-  position: absolute;
-  right: 0;
-  width: 100%;
-  opacity: 50%;
-}
-
-.hero-banner h1 {
+.hero-banner .container .row h1 {
   color: #0B9ACE;
   font-style: normal;
   font-weight: normal;
@@ -115,13 +104,6 @@ section {
   letter-spacing: -0.045em;
 }
 
-.hero-banner h2 {
-  font-style: normal;
-  font-weight: bold;
-  font-size: 32px;
-  line-height: 42px;
-}
-
 .what-is-spark {
   font-style: normal;
   font-weight: normal;
@@ -822,18 +804,29 @@ ul {
   margin-bottom: 10px;
 }
 
-.global h2, .global .h2 {
+.global h1, .global .h1 {
   font-size: 30px;
 }
 
-.global h3 {
+#content h1.title {
+  font-size: 40px;
+}
+
+.global h2 {
   font-size: 24px !important;
 }
 
+.global h3 {
+  font-size: 20px !important;
+}
+
 .global h4 {
   font-size: 18px !important;
 }
 
+.global h5 {
+  font-size: 16px !important;
+}
 
 .global h1:first-letter, .global h2:first-letter, .global h3:first-letter, .global h4:first-letter, .global h5:first-letter, .global h6:first-letter, .global .h1:first-letter, .global .h2:first-letter, .global .h3:first-letter, .global .h4:first-letter, .global .h5:first-letter, .global .h6:first-letter {
   text-transform: uppercase;
diff --git a/docs/img/spark-hero-thin-light.jpg b/docs/img/spark-hero-thin-light.jpg
deleted file mode 100644
index 4d9ed926b361fc5b9182db20bc04b3ac766ac38c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 278664
zcmdSAcTiKo+cq3T1-qySQbeVLROtxV00Ytj1W70=Achcnh;+Mx6s0Ovfq)P~1X3g+
zprRlkAPE6UNFqYO(4v&k;p6wbZRY#yoA3SW*)y|y&z#-+>~&^m_b%6N{oMKmIBaJF
zv;l~S001wxAHdcKAl3RV>^1=4-~iA70082ET_Q&SqT7<l_5%=+0qpuO833>ok^SE?
zNJQ;__SpddJcR*v{?9&;?e;%qTl0TL|Ib$RjmZDe_-4od>b*1Ujp+Z9MJE3<Y-<K!
z;(RAGDm3y=XxOQ9+PVOfOF)O6|Ixl({!3Q=FY$1{eCkjJpnfO+-kw*}%a;<jJ_01f
zMJ9KKiHb-7c1VbbN{DQI0Vr(GVwcE&mH!&HU5V@v-MMSGnE0N(`?fm}4g+?Gh>Gsm
zDY|Rde?B9Uu>BseQ)1VV(|Q+oA9cAYb|(Co{{5G4#T75reUb$A{Z%@5E8@YPy~m}b
zPsk{%sH&-J7#JEEo0ytizG7`-dlhKs>gEpixb6w@^AETkcn1~~8HJ2S-HnNT_~`MI
zr_Yj8GPAOCa`Rrj&Mz%1M^{w7tE#SVz!Qk14~^u{tzW*jwSVjAq|y5s1A{+@m?OVO
z$9UrtfB2LC=H?d`g-gpTtN+2Z{XhQ)tnK=LAp3ve+WrY5J9h3A-6{4TTp~MA{{b$s
zbJuCT-A69Eh}{f7dPe`g__0eb-`0KFqj(PVSMpXw-`?X&1|!P<{sZm5ko|uH_Tc{)
zvi}Y2|KMT)E^jZ)|GToat>phs!}jPc_V%(&-(mvxi;8S7CQ%816<~8y4e(~iy}f|{
zX3ff|@Z|>DQem_Eq*&h9L|w8}EJ08JAF&c2vR4ptQ;;{x0?^r7-=p8>dKw<MbPYNL
zeR7+7`YM<Gt^O<+mF&h>r?-A~asRisphU;2)iK+>$jjPk1qlqr0zbH>j^Aw@Kt1jF
zI}74fB^L&07|8WYoei$^Y2svu_16|bYL}ub9qek#_9PjU9BbByx85jQPm26^^tWT#
zIQytF?&-ol*E4^7j?nD0cITBr^)%*hJZb7z+5!NnB~kOiyG}Vx7E6wt)doIk{R&D|
z$uqdI5;%=kj_I$mbf2kn1-om!dq=nbNwX_V*aGl2bw_1Zw&$mTR5o-$L#WikI=drH
z>aLG@)4EZ`_&@4)u316?TD>D;h-Pr*N~-whwxgmUmt*dYHDu~X-iwG)ym#<IXk!Y`
zK{IF*Xp!49z;4Dx<HOgPSf9Y}vtwB0ZZctoGQ@R|&*aP@ig?&})@hQc?$g#+FLaTV
zLgSH2>u<uGPW0@i2@yk{s?iJfbA)MDQ|O?>u^6v{Shf~)5Kf7}%0ej3MKxUY#gfl0
z4=*2HE@^X?GwO(%Ipftv4`zN_N<nFSmKskH`!(8~lg_YPkz<+0j_86Mm<)KkyO`!0
zo|D#`nIoXv^v&>A0*$qTuOS@FKR>TW%Am%pN?jbM7;!XW-)gNmDkglz?x5rulK2sm
z(w!sndhc)OejHU){ws1|&QPfgm7o*j)?A-MdY&l!$1m+u!bhy$`8;E7Tkir?mY#uW
z+6K<Yb+Wo5IHw*~na__qq)I7YWF796b$kEq(~U}tzD5l@As7s`wbw#s6YK=GRI<`c
zB(d;p`9ecqZ=hzBoxnQA7gdVD%(eZH7I6j~i{AOz*XPT^uxw0(<EI~BGh{P)sto+6
zAi|M}J>osRDNDe!x_#LYb5dCjiLmIDGo*AGg@ji!JHn9~7ei%^X9BOQc&vSwg<Y}w
z_{@9QSUijsa%=)ze$!DK6w;MPSs;h5luwrp@E=XOmYr}c&-Rn|=aSKZ16zPV+*xm~
zF;qc+dHy$Y78kLp@b~w**N$y_DfVjT?V~P!zG9*on#0<xAG!a<K^WuX;B-j*cer|_
zPK*)V1%cRw<snSh%&j%s0^4uD?)Gxck+Wc^tXNTv8=A@5)yKUjqK7)~B%XZkz>bSK
z<2~6RPv>6U0w|pwel^0Tj05gBX}tX0{#Y7k*Q{FnBPFb&_N_E77U<KlxJa)cHIBQM
zeH$L@M<cl8Qi^Id#4~tJGdK!Hz7U<8K5Vq-SBIWzrP7hzeRB`*x2@+$_Rz-1mn#T_
zji20Fif<n&uC0R!g=jC)^a*2h6p%9zkJX$ki_?-N(6T?yT3(m+FjbN(JuLO|<M2e;
z5f8C+eM(k6qd6owm^o<MT=1u6b#0KRu8r)(yCB%G00d#GK9tQ^ptuxCihX+%P(QTR
zx<BT2FZ)xk*Tl{nv&_Ee#oS~~C~j<FWz}~yCwTN*?QcTXBqv>aeXxNWnlmFAlM_*G
z-)XCTuH83B<Up$bjY5(BvA<O|pn5_n(Hk|17a&-3KcCPj>GiQBtdPc*K~ReeD8r>o
zvJqvn?+^a|h!cH5)r}1_Cr^a!6mWByQ`A~qQmb1_o^>48-cT)~q-c>bRSks&+#c-m
zUeXT<73<D*!7sGMmN>hON%T4cJCqLP-LV;rtE}D7ns4z*nX<)wdPEOdC;O}kh_hWj
z#YNaVe@~wN=U$PwFYl#W=B0=84W?_)F!Op5U#8u`w?6IyeSH}1{8qg=uzq0535nCO
zw*VK_q{_{xidQ*0cn|TDEbvr9D24WYLE%Z#4uX?R7yNfd^5J3F>TbVAdXvg@AA7QD
z=`DHCj*LH^Y8gOx6BH$l@=C=zLyLMo<QGdg?a8&1)TDgATb}*adNLo4Mq3tc0S*}D
z=@=GR;*>@@Jm5z&91TcmDvg1bqy)d+tE%0GlvH1;aIpwWD8hqtoj<{0JAnt9oJ-sH
zJGU9tgkQNY=`MM}P2_^yy+&NGs;OgH;qZjI(HKv&qkDVT>hcMUYo<irNmx*nw!W0_
z{0Je=)XaMsoXPjcGneRPIWL9}V?F}TDKxddn~m)E8*jfM{w;WnLNQ{2`&2`%tg8iM
zQL6rVSXSbEC={A-G-v?kU7qusr!kl4{J{d)vu9@WpU_9##AG7c2>~S!1v3`N&4a{z
zHVdYD(i_|}5G!C~)@r#&J%%*lKeBawY<pHZer?MA@;`hI?^mHO{?$n{)cYvQXUh9%
z18RY@K0tFXjfE?h!5w6wR1&d}Dv6D2bMqQaQWBZFxBJk}f0Uq!llO{#jvdid5&Xr3
zV&Yjb$dFA422V6h4H2jg35gl*7?`FJK7@HEPj6QnCN%I2>rLkD$~+GO-7zn>1vvKl
z^v%?yV_A|HOhx95hQk;dUm4MrHbxUwE+4)c(<~<=a)_zNN|3U3PDdRJob8!|usKjg
ze}&K^y8cmYqa8D7g{z^Tx+L{=lg1A_qtq1`s`nJw(0aqDvPvPJHwf4$KK=g1ZUs$J
z$aR<clpJH7C-~9s6wJo3#lE;??r2dFd6xIE=$du^a1i<*sM6BQP>Hay5xNrloAW+B
zjbHgy_V2iRy8E3F#d|kHYj3R?WduiD^|o@_Y1seJ>TITHuI!VmB4GtC<Il&@>ab~8
z7xUI6ZZ*k!3vlodw9Q=WY|StkHis{743vSoS1ANOs@MYXzbc@Z9h!3;0xb4+_q(i+
zu8wlG7*Xebdcv2-<cgw->I|)`(q0cIZ6Uw23mmf*?`{EpSe{s`(hBvJKz(1N_!{tT
zXI#n--oT?fdBihh$8sFJ{m|vRIB4ZO9{HAi@^%LDP5kbDL(%ccqa~}Thhm*#Cf3eB
zrKd8ba>3E5g+YeZZ!fdU_RaJf|1%U8G1%;dB}x^?swpK3-|*#@f<+95xiBI;BZw+(
z$_HwD$K6f6@qGsq^UGrM4F}QnZ$J%b=@X}}m?~enqcO<7fZc)*r@rg+dXce=u5C`r
zoj{l+_AccLgL(@jo-JV<s0eAFE_Ck3s1i>8`Hk<_a$lTq0{82rO97B6`XZ+!=4O71
z-TIj2We5S<pskP11Q~=Tpqg+?g7KFOps@h-sA6GbBO&j0=}MHxj3mSBNAl^H_jAzw
z<MP?Tc$*Ww3X{v{laB5W93KN8i2%;cjr`i^=Ma=~izA%203zNAf(EvD#h|g#84m(J
zO_|2+LgH+=$mn488<K&qZ@Sz&MU?i-`gk_+w!-Y=lc|yO5iPUU9a{hk2Rwx0(?OYC
zDM1vNZ2_jHM|TYz1LG>S7(C@A1bRA6_U_Nfo%;^Y&MjI#63J6deenfww_T-C!>~Gg
zg=}qGL7pm|Ba&u`W_)XbW8Rs#&RFB|W_61=DiWePJIp#^k_mp@xWNp(eL>;l>yDjK
zei^k9Lq5I%YC7XE<0T@Nz!8#tgsFU&^!dUH7U>Kmo)+p<f+iTe_4w%a=v15jM#IC;
z>npv9w#_Dz-RAIa%edxb^X5S^Tj;R8*~q)CWX9vOL9xU&#GQ8-_%e$+igbCEHg`1L
z(b8pM)={s=F7ST4%KI#jzNB9es+CVZhs~LAgAr@T87!LiU`scLjptfpv~B9t$3&Mi
zfxRa><aKSYp+5iG``L^6hql1qKK8s9=V;#t^imj5$BigD%plLH7z=6N`tje>c(?S~
zsIm&m5ZU~Ga<AFMGkIjj@kzc#%Y$`^0J8%gU(6oVo*z2KtHDE!HC52nSJUKxYp=le
z$+v7)wG~E&Zzm+Ds<)Mg^;JoOvC@$p-D_Qko~He-zNLDM*U6IxJqiHhN5u&``_A`<
z4O#4obS$g`RjSh~K|#l=Tp{*HQ;KVgPaZ&*2Y?8+G6sIVO1<Nx-(p{!p7|=AN4<-`
zx?F&1Mq@Y%Dm`sSl_17X@VzYulSkH6QBbIf<i+Cf*R?!$WP}fRZwuqR&gWNN8HX<Q
zkM8YzxP9KJ^*N0$F`r|eRc!%u{vD(hES;X_+Z?va`_S?nGwvRKjL*oq>IfUaDtDcC
z&G^fzK&#POT~XJA&-!W^6i(WqVUFcXQEA7{sD6`ldnzgoc(?P2NRMOKv2<WZFJ#TS
z6z-13Eko&Z)IryISbtVXBqhkhh17%_EVwd*E^#Ots6@i=%sEJ6YxKL3;Tq-6mmUv|
zC17N#0ruzgOx#ace#w`0_UeAuwx`k5TUmN?xB#Y%(3yjA#(}1U&TO=%Qr;JA0+B)?
z_d$`}b;7{UxQAfoUKV^)JvoGsi80L0pFAmhs*~d1=v^Z-edNT&{kiKq+2;n^ZV!E*
zQXl!7nGGX#f$IDENmt|g?27yPe87oI8bQ97^LTv~jQDHEDiedE@<Vf3cA+ad0%n)3
zI=;93W{Pe6i~aW#Z{OH)S?2Lt=ZTj+!#4n|y$|PgMVcWIKp665vUewfi*OZ0uY`W5
zJ1DXrc@;!6RKIgxrh=$BC1(rLiOq#11}-j08f18CcUu@RG#@OFI(na%k;8z>JrE6P
zn+0}saP7H{Kb!*40@dgO`gROPM%L;`Ed2#Y{O;T=?Ptb8~dum5vGO!3`{+f5rr
z+wXP-T|nW<6X{YRPKi^DDGXy{9XrFi9WoP8o6-S`Y9TjkQbPkcgVBzrW&>2eH%wW|
z&=I{%#e4er2H@bW3Xx2Zu45AN$Vg%71I@~Pk1(n4s%U9jfc)sC?U{2aon+2nsKd$3
zi7&tZ+*I#G=0hXCdUk?ZtT9<zfR0;>G@+&YKaVs*uY8a`Z1wW9={HFnwz`#n16Ucg
zXLm2&+b`m^+sz0t;Efcn@ngWxU~)>4b!?^p3uHmns5c!r>txd4-FNF?bNw%VQzmU*
z_|a<nUzJQy7Ok8;Q=h_H(h{O+3vmYyvVt$Z9`og}J4MKp$azV2;j^W{pt;8LFA^8s
zF%Wz{7$17ay8I26;thfL+TY3aNYQ=dXC2jq#xykHaOB1MmMeHF6A{OZv(GScA)b2E
zdZhTOg;L^c-@K>i-;3S;;hA@Qk(XA`Le+xI1W=sZ(b%X0$`7tB2JCbl9Y>ig9;9Pg
z3D_z#jQuB2O5;bLf*2E@&N4wjbXNakkb|nS{PCs^Of#-5)|(U;yhJ2k#I$7gC0dq5
z;Rh2s+8EAnw2vXq+VF;xo=s$inaC!B_E_wJIj|Qg1GC+U`t|cmXzg4(S_K`1&s@U7
zz0DDUNhyLWSw6W}K-YHvstj6+yw^Rr_v#5zrGSVJqJY8%wYSkY44<h<h8kfjDLicB
z_X%xyIn&7#j)8|DbmFGa%`hKN+V|~Kw!=KOPC3l}z+2l})nCGNueM2*>QTA1o0GGd
z0-PJ8X|U-Q3)~Q-PTkZpZpID`S;$hcvs2w^sMvQWUe(6#|8*lNF4M+T;S2D5_~TfH
zjUdNJ$OtN!%tQH8_zY&B?*T&Ys^*Zn)_M&MqFhni!Sq4Y+|B$A{ScLKk~!hdSlnaT
z9Z>nRvM3T2)*kZ3op~k5N#KvDYd^_X&NSF*9D#N$GWGc4_E^JtX~MKC=aMF-$=3l_
z9bI1l)_<h>tPu9&VL_G)LOs%mQ%E5Z3$TkRO&RFTWAUO-uN2e27F(@Z;(j-)*3ovW
z(9WOlp09Gd(UnkD^)0Ru@MO0+qU@WF@1df9XX`X7q(pk{#i>IBD0fXP7FnZi7}7N7
zIEN1@TZ*wd1ZuT0tc$MlEyy=$R=aFpi}bH{uqX=@w`jzDPq?J_QSnr<M1+X~rwtYT
zt<84L3mcCl<IL9=>lsRdgpgVMk`u<s0X3vWhG81`e>VmhDUMgQSocI!YfYwliRaRH
zU$D|op$Y|w*aW5_!#8AU3F5&vjnT@XXlimPnS)e%cFIKsBclmG;wuxT<GDg@kF5UP
z2lUC*+^TqAoF<uKk7|G@N2Qex6b?Yw7wGN@k_*5ge%#NO5$%enV<V#LPE3mL-EI3;
zTEGo~*f2GToR0D}Pp+>~Nha9PqY&>^2WCVwmw?JhmPc8L)*AXN1Dk3S_qyzXVFzbS
zCn<Z^%F?-F3Af{>zvNOB9kDSQQNMVqrK!m1I0`>NmQ5)qQIbS+?4r(0xV$TG4AmBl
z*a`pRyQ(6x8oPbq9v)}wuBg6$;J5Z6Q^y$EB1T15*#0aHU0H1me)nl^w4-+Qx`rD)
zn{77tV5wq!^kJDY4eLGSkkm^?Iq;+M3_fW5o^<??k|W&>k$5oP26hlQ7N=xGfNOS5
zSQYa1CrrbQnVRs~_HK*E*iUC=O|@`Sh%V#r*Iry`|DX`3G1)azrHV*kF2c&Op~xFy
zDU+J^HW3+f9N|^<wr+Z=Ysh?+O`2mhbF?l473U)n&0q>|^P0~1gQH3L99EgxMqcZ{
z^({cL-i?kV-A$oFT-41x!&XpyW_&f;EXOz{sKS!u8;dZ!bO1|^2D_NQqfew{Q*-Rx
z&;cq%=XYr)DVa86dbIv5da-@pG4GRR8ZVu24#xyt^m}0FySf|p=7#8tV>7+x&A0b2
zel^Gvg1o9)mV%B%RB3LT`8JBtgYI`{{HJ?X9Xc7OtL?auFPKYGq&1@a8kr9>kMZT)
zrq=i?5Cuc`>?dO@l(HYTX=x=}r$$?!226?uj`OON*EM}s+Ydcmy3sN<bG4i|&X^@n
z_DnjH1UP@XlVr+(6Kbv6aTrtXUWPc?vZvur`uE$UOM7?(Z`Vw>zt<$rsY{EhKTb1C
z%~3?jdn%W+SI-(AoLb@eZvnni`dU!7Cs)<EO!^b@{H<_v3_mt3fxCQ*MEuSmq^m0(
zv8kVQ^?Tk`4<0x-wLiXy+MY`ZivYGkbIZ$f&}w}Z(`+-aFwNI7CWsz8J3#haSM>sA
z;~ce{7N|kE+-$8Djltfl?G7x2QBc(pT~8nTp}QMj05#&a)wpj`N(w~<@)WHIdv+Qv
zO^u4SO80=Usu9s1NwD@Alr1N|J`1ac8)?64vjir!5=xrJHjtoxYb3zSF85{ei_f;N
zAKsHfIVZflJABaQs*lZYmNTe&@T(~|cXcw4T)Y8oahr)jeg)cc3t?X??_5R&G{AkI
ztg1~l7s!D7UVf!E56=aD80mEk1Ny!ECUS3-`MyeX3WVAmJDXc{7JF<JHHED~(jNBp
zp*vfO%a#b-98aQvKp&rxPqV%s{iGN)n#+1A=hG@`D!+0ovGDO&1g!Zw8V+x2cHj)Q
zSpQ+9|NI1DsEi4))Su<tEP5Yt&FAa4*Sn7&8@3vf^-z2`kLs98-2%9`!Ki_n-iruI
zjBNm`^7k<L)^B{}?!nxbEz4Lej=6DU;9sBh>w6t;qHh2V3Hn#t-+Z!qA__jr^VaF&
z;#PbSW&F2vNli_r4m^~FK|~4WVy_b9f-1+O6`M!b2X~@YV;^Q+H#Ij`!u7%D_$#ZB
zwtr9Z5M2{1hya#-C}M;4V_I56MD>Hh_pymN-!J|H`k{dvG1wL@Gv*)8UX3pM8Gqb3
z79o|id^>5Y@4urbaOg!6+?Of%lvKY3cto>*{QOIj#NzVpj8BPT#3Jyd@CN!I4pX0-
zVBRX!*Doj_IqCB{nM36bmahEe&?j}KLX~})<zdyOk0vAq1NnnLovC@}Jdi=1{qGm(
zg#zuXP7~)gPqmIMsATh-JQ|P+_$2aY+7_T>Io&t5*WKn(Qe1M?or^YmkyS~?KHC36
z(>D$7a^05G{^>v(2b)|l_C#QP*Ubvgs%~Q1ThHTn@NAXuiL;t_26ehR=daB!Zn(Cd
zKlixf?}DFW;c2jL_L;Ydb_QdMkF@DoSp$R^WpL95pL72G(|Iw;@POb>EF>JOesjVQ
z6!|U_ll%^!2I<iTMyN1kbP4;ltfHl|3VDNn^3Sx()Lqc~$lR@OB6`Dw-4Prpy4G%`
zGf2z|w_{7Pg}#g##5yZ)B1gl#VwR^#z|P3eZpu<H?8@4XmgheV67&VG<fjfagi=#W
z<I7v=Y7&LX;vGsF??GkC&u{wq07Wi(R)GxOOu7}`6tCc!##L($2ob?KIMq@@p0Smd
zT~;b61cXX;=c%irGaZem611@i7Nav;fG1Xa47LEftq+z}+a4_Y9i!;daxzLL=>2{_
z(l@2y<5tunk8st_v|K|J?pgP_*jMcuZeCR*u9P+XYC0;yW7%-Y&!3p(FlS@saIb6h
ziPxf{%&TO$G;=aDPPy`2iHAxqSyLNkKRokBUqP4j{kl|1?f1D-i#Jk{ow^#jvU?iL
z=@XN^xqSpi%^N%dyHeoT0h#hu;lYc$O^jny8wO%2zU9#oaTw}${9tUfasMUVW9s4u
zUCuo$5p>*7efQ1Xr|kayC$;L$al~2n?94nqgi*UlSt}~<dDot!l+!|~C_-y3a`Vu|
zX)3R^FLWq32Lq&Po+^az);+d=&aQev)+Wf(g+008;>}s*Fp+%t@PH1K>N?X@vyc+h
zp^3)@b|DsDb}W~*$s|3&4)O9$L-z;YxM%q_Eqt@y)9mYErP&<%_ZOL`FP&>>5zTXN
z>zUL(mgO}MBb5zl*1|}_BQExy#&;XT{N^?8s!`tc5U9-XIF;#wSct{gG^eQ=XsD1!
zkoWNVw=tb}gzW$?+pRWu<2)jw;OVMS$({o|Xs%e?6Xl+3=mH*&SoX9vpr2tD?nuZY
zruRZS%gMcPZ)@dpmw!mOM|*#cU451vKU9I&1heT@!%EH;YzHPTBM>k&%NhpX8Qz9n
zqeS_>?nxT>{&Sf3ZR~^60^sJr`)nTAfhd5-0bwDvdJtv@AJYd5sH*XkfnBue4cLW=
zND6XVW0Dd{DKLy(Skr5j)~=Uxe-)pFc&8)^f%1wcrhU&ERSyWC2dNWFn7K=FUok<f
zxCLsKVK||%xX!8DW{OxvU05;qE~R~KTFLmFsb6WKckg<{K@-`#E}MP3Fvb?60~_)<
zK3o}HL=V{{$~drxD!UR~6Ji%U76v;9Q-?l&L%a;AcLF61ozEQ$>|gcQ3qT+}f(A0?
z_}qYi1VmIz!3}C=1Fs%IoaF}45y5Oh{bXmauHpl*<oVOQ@|ASyg`2(aMNfSCHdOk&
zfzD~h+VHioC}on300jpI1*i(a&-+3Ie=-O7Y!>lJaNf?{ysHXEk@!Q`(#!7`f0!6l
z4e8ut0rl2iWhJh`Xahr~UbsH?ES`|LVm?EmuCZQyqV|_PF^+zrHs)V!f8(!ydwOHA
z&?*i<eE+bpOuD_dFNQF_K(p9CM)T^mj??68Mb(pO8{X*3fWIs2NA6~|Lzs61KM4^D
z?@SMco2pn?*^^W^tqe7@zdeyin>zfRu8^tx$vk$7@j2i_%q_i0=hTo=QXx9@L16-`
z7ixiEboh+6j%bcfwmsGeaYRI#ux<1v1^#97(=2`|LRE)gc|88qk3>0E0d8gg-P$=x
zZ`UuVtHptekwg5x7fB5ZV-ex|uNW+Qe7GGEy$LUzxLZRjdsI^Q`I0ZW^%E&hGqOLA
zl?8^e)ezDYqr~SqJn491(P+@CWKB1m*6)_weMtG>;(EA)c7MXX!{v|7DX~W^GP)m#
zz3<U`S(4Y96%DbF+ycB-4S|<0F}N|F3-I5<<CEMtZzR~;kU{m$!QPze3A_8}kZAhd
zMWxa7@~B%8JH8Sra3~*Jf<y%nTON?>9A**_Wz9Vs{oq>WOl=;aL^D~DCzmPmX+iAa
z$C=)52c04+&*HHtt^7X7WN)0|;A>>42U1|^dX>}!ZSd%eU~4f3x;h|hh6*ulZaWWr
zx0iUl5~i2>B394YRyJ{|(?YkASl|L~SSm|cWE=VyIC#SlOln`bD=!2B4;;*884hkI
zVHOb(lcoIi@RgU>#b^B=YFhNyD$;8jJ{e0XAlb3vr{|SIp5AENtiG;pZr6~l2<kjG
zDm#c79DY&{6m+ZGXnF?(bai{kys31|IE&sl&nO_C%w*E@`v}Oj59-~l5s#qwzI>qn
zlUbO5rl<5Xom#|_SErNpkEr1H&oeJHllOAlzW+3jn~_$hyV&XZY{*2|?IR89#YVQQ
zMmj`9fg2}!5X|xk?Lf3yL&XFL!QcG)n8IV(F%(ZADe*l<Zy$K%Uwv?`{u|065N=ge
zJ*?K_Kd>16`TEBgcIdHtyT0uW`}DrbL?b*aoM)da8PuA0z;Q$O)fF5B2BBvLtu_aS
zc=N4EYVREElH_HOrgiQ|8C@ryqug#~wJ7-{GmRUJumeU;4c2l}rMtT>ioin;{d_MU
zGJ8GQP~9!H4$(PrUfVKJCf%#2yb_e5j}O#Ik_B5-fjrVr>XbV2QA>`@@aH}~dXEDR
zcxxb9MXVQ*8Wej8bCm+vd^AJx_RxVose}BOF-cZTbScZW!Tr~BS>K$%o9o&hVgRXq
zDPtIAyYnx7+KUB4pgjE7flzD^`GYU&7!^dbjaO9nBw`|PzZn@SM{);*1|g04eP1c6
z{sJ_b5R5$+GcYB5l3~g~%Gm}U-U3wG`{5C7b~En1ff^3sZs93oRkP|^y8TwkTG3U8
z-PpyY!j_rA@`Sy*k$65XX!PH$I;k{i0;SB>o*(CFGX(>h=L<gtys$qaZ};m6WL3bc
zzn7ZS-efyw?!>;HL%sHT1(S&$J?Yuug5CmHR(bXp7dS#O6^n&UE66*u<+a0QCvIsV
zaZo$w`O1CeGaEkzSavqL^5js-L=4jN#{xhrRfpbxMfd8bjZva*nB(MlR#*zprPao6
zDRI$Li3l+@nyGiOcaPC5BOY*s%-^bx^rR}<C_J|l5({Yw5JtgX{ZHDr^;-a*p1b#L
zj=o<>xAp$&ob_!NJl)Chj<xl+kr3nIK^f2(pe}$Ny^#(@F9-Unhl?lElx8YzZ*Na&
z;uwc)5kLst^~w1SI`#U#dJC+!S$ucrTg6O!)2YJ42g2Q|b}oQ0hL~)8hIbYvL91&l
zr?!vlSmX$W*=Vd<uL&w}Ipqte0f;#)F8{FX)$8Ie6R!%7i*>&^SlB4*Dq7#*j~oLZ
zMFuJ)v`K6MKAtIA0(m5?mw1ODTZb+(C6NBb{KzirI8$#m?k0x>4G#K3I+B%n$~FC1
zOlFu>Q`nnA5$@kp9@}<JE>ea;n*B;ONIDwL{hh*s4^KV7Xw5j9Y3jrbHMi_{P0R>g
z-}~EkN5mIO@hpq=^M-$|MNF?_Oih3~e&!CghWe8=(3MDCfcW%HK2mS9e?4zMvcxLH
z)eewb9H`oN+OE%h&Y8f3XT%@0{m}bs7%`CvWOuD@0gfVI+<A_-b3{(z%}QIX<3&;4
zT0DjUyD>!L{n}hq`(8;gg_-y-i~M_W14n{7`vxg9eNjU(3Cto1r<#27pUf6+V94q!
zD}ol>bJwqMKd(X1qsJQTA4{zTA=tzj>Ocl<Aez<H{gbJoG2nfiG!9$zAQt$>gx+3o
zgt$zks((}ztDLu3-Cyh!={2*sQgpa^AQ2k9=uLq!Fp68W|9(z~=*Sj;r6BD6i<|Bk
z;q@&L@c(ST3Jx>3b5Y{$kB*1kqW?w)oILaIoDFMwAr94^b*L+{%HbJeuybDZ`P4G>
zak!^Yr4a2{`e`0tn9%V%U|-~=LN>$9nY`MjTx$oMvn|G#iSm}8C+n^l?yWXd-?**!
zybvvD-FIUBNa0aOY;0~G<9%KHqL)|Y^q)4@y!o=4sYLzdlHW7F+erwmps6p-8iTVt
zSJSJa*@xvW->igAbkP?mhN0S>ZEUc#qN9lN>y|2$q^G)#;FCFKQNOC+U6T9pJXM{(
zoFgLvGXYU~MqCR|&UPxHEO2f+{~K-*+(x)k-JhlaGbNa+_hVg0gv-geXzcB5zq_?|
z$7bwO6~*W5NqtesRLx^;j`~J37Rg>uL_-q&KJE%L8*iqtpHk#aLCv^9B*}PW$Y*YO
zZ%%-VX-6dnn4}Rsc$?ER*#iy|`W~uWj%<y^B_m}KQkMyF&l|&VXC}HHi^k8}YrK)L
zX*7NLeE(mGq6a#LK?{HTc|Q1l^>*(>l~pxOOsB_C?4R1vA~naFe)X@NuBg-_L)*L?
zm>PBl5k-8r)-uxD>Y(`1$j;A=I#`)($*v-2yvTU{2CseS<zBfb%J*|VWr8}ZHAf=P
z(uigbaHm7ZSJbt}@Jkg!1Qz@}?@DGo&9Xr3e!3JV$WYU6m|=E_RUO~cXAd9p8APBv
zW6O{8lKnoYuRtzX!sPz)PbNJYQ#cp)%_MlopXO_m5SF>-5cLe%)YF_)&9m?J;TPAO
zM#-|Z3fNsxLura9*XQ<3$Y3xlUH88Gg<UbzVBJI^Dq+d$>x+w%7kU^Y5#!@oL33+k
z+wmQ2Ssc_}=;RpkvlgmEfc(KplelhU(~HfuOHHnK3X9&P*M!-2cd0AHyh^Pet#G_v
zDAHjly8vFGE%j-qtwQrc@?H!l_^b+JeS}~~Ho=bhCnhf~@$}E-=o7z5`NousMEJv}
zDL;!}p87VgVl~$@B$||XOCLmSBB^0g!;9{GpmB74rYl<VSDU?YR%_8l!~f>xn9}}y
z=j<SGgKL&YnjuyI52fYvn&;pg(8@Gq@}x$iS#eW{;`>}*vjdj4HzVI!>Su9`ez}XC
zG0A@C@>%1aOf!Le{c4<?S%!g!W<Jm+ANtu0GBe38NMM)fbTVr-5(gThvuoe5kQ&gM
z#U(!^XMjY`Tq1u2D_=jAw(pJJ*S&x`DDK2w%-nj}`Ct3}vrSe$Y-;%r-<lp{uT(79
zEoCKyvMTcBHFFPmT6ZqFI4#G*6tr{c&_BPk#v>|oS<gcY>rRGp_?gEQA7I)KXq(8X
zVhlS+{jpM&iV2)KR(0ZU>iW!G;tc9`@i3p=q9*hR<uFvy#py<V)-Ggc+e=j)O6lSA
zNBiZDo5xJLy*qRyZq6dBX_h*WQ<Iw)2P5Fng!GpXpJ-n;fjGcKvcQD-My1qu#^3fn
zTsqzReRGC!@0*wfP*ECmyi@{(M`kj5m|BK}m8{sxmRw?dd2t>pt*C5g?IN{yP?N!&
z<q(1D&c)GI=U#;AzC4uUeDL6G`KY`ZQY%#i`dJ;tSnGunSJCs$X-UbGDy#uWFo_IA
z@Do~o)j#ab(e5T(Tw{w^vE|0H=)Cg>Kd|Qba9AZ8x1BWdti%cutCyA{f;e`Aah9tp
zWBB^eJM7TtOjE4u^(TxIY=)vUP+B&5Y}W6GNMR)1bQGpVH3L-{!Z9prCUejy7Iv~I
z%GS^>?GKJQh%je)BN|YssX&3JdZgv?zqpiU?Vx(-oi%*R{-N%%$zqk9Jx_FHmvZw#
zx|#`zZU}QAiEzGWnoBNIcs-&Iv0M^_BNgvDr!^K4BT*Y{1I8>9wd~qr5LZ8plX%5o
z<>r&{)q@ntxOeDEgPA0xuha7@FPBgc&L0@ziohiPa85QHC;nO?c^uiCckU0ly1^TN
z8#R;}Q@GyM_}%4Uz0wsI-vYmX%Qc_3sTDzy9ei=jnuhb5!Kjo!1-R~nN<x;D{`z}8
z{(!Rmp@Y|1oY2uKo$-PtD6y$7G}oA(%UObDntnXVz99PMyvUEpHuHa}xB~xaYCMZU
zJ-HS{pkL|7%?f?3n+p)zRz53=H0Z{zIe+-vMqd&8U$JrY`O(?Gj(TnT&H~M;I@p@k
zl_<g>luyhHP;OXhF2q9P1n`+Mw)dTm-J|yo0z}13Mh735w%O=q6-RFYcC^5)d^@HE
z414H`7aYpr`zAJnAj$!Vtmm<{xf(t}M)|Q~#m*<vPd|1aEB<}?qy1fvsMIh2?tKu;
zRol&D=Ex9Av`p4WO=Bx=rw?%PS#$?)7R#DiJ~>eNG$p&Zo>q7Ur{2w74(BX+&?mkX
zjdpS-Jv@4qj)Xih+%-l=Ok8hzcdrMDbvTp<J9*U)7Set0;RB$#?2pPzIZBnXz_n`0
z35VW%NB)NEAx*Pdp%1OxO_4m*7_$gx)DHU>p2=bFMaFpfAp)L%;pS$828ZZAG~f_N
zkX!w$egEUPhixvghV4bQi}q~+P?u?X<3UmuU)eO>+vcCllY?SSzpBr77=t9Lx_vw#
z_EOK9z$f>p6q$tx={eFB`Kbb_ovcb1I{eUaJn_)JxPhv`bznvL;smhlqHUHsc+CxE
ztV#NomOJ{zquOc$Kcsg@h%G$k{n+CnX>VE&D<2qV>iVR)4;OeJoiz6JYf5!)Q#YX-
zF*_EV*H94QeiK}dd<4O~`h8}$ps;4-QCR?94l-Y#9Klh2QJ{CEUbGRPV00<>S)wlo
z4k?QtJXEq+HyclK7~0)9*{N>U)cg8Ro%>?-7J%}obxK;(!f3^3ZnDx5<n#5bXXwLL
zU|V)5rdUUk3QRJHDaTc;7C5Vw9R4a7TlnJTvzuE0n;*d`TY!YNzKFij&3yUMuoF9V
zlz877(dswLwB&ee-x2<tAL-5APBaY`r@agq8TZoVi^+$z_l~(c9(s5%7iD0arKzZ*
zM&b=${M8Sto{i7%YzH_Kj`!XFQ@wl0?qtNyjF;cU9*?~1KWeA5XOAY;`<k~kJgytl
zCT#gCzd&A&gV~(R4((3GNR=&zMQIHMmD6c_Ait)xuR&(xE%93~#!K<W<wLW0R>UD2
zQ@!_c2?G5~Vy7dlw^=~&lU@dLp)nlyYSn@6NZYZ}q8V(Sar}|O%&c6EQq)*Q&Ur%%
zGNx_HTe;U5-VFUYV0}ctJyOxDtMK_gW%W>LIklXI2W6*1a<clIAMA)E{>F*Fv&F^|
zC$eG<cOlHy-h)u=BED+Z-b|!_%lOHZ_~+{&#~D;44CxYE7QN4tcnIH_8~sO52#=Py
z-wsr52bn#)*K6uG@Ie`5Of}Tljt&i=xrjG726LW~q*3?BwbEE}Mp?#f;76q<Vx3ON
zN+{z;v%>rFFQvxl*yNoe6S{g=x_{)p$Wm&Qx7@U?J?DkIG1KjXCeP`_g1J{`j^Q|#
z$*jEUEPC!^&8dIpZSq{|KL-k_G)cI`WI(+{lxNyJ*6zJ3TXzZTPJgX0x4QqO*KAp6
zZkk10OQH`c+MWlG;l7woGo0-tT)_$@M-G@pEV7`a7T<=lo%d>4=Fw^<M-HEUw)W2C
z_PSKMiB)W4wKhS+lPc#rp4S_d*XyepQIySIK-=4sVjvQIP%I9`;F8%5)-Dgmf5(fa
zmX1jMGdUZ3GF<RA5?C>)Bs(zaII}Sj&D=OI44!n!96(>8H`qz=>=SD}5{VGXRJHac
zXaCC*iWM^ykCWxizilFa1kz2E!%K{rnND#b!s+6v?iKc(C_2yXUu9w;cYi2Co(b<N
zFG+m7JaEY}P`qS%)9OoPkJ*i*?b4q#AM%=01U(x*9hzE3gEZPQnsG3?<eUqHmAB5>
z0vIBR<UGY8R@76j>ik-(C1*GgeQTviaEIj(TvAMQT9~1h;bTID<*RE|{E9RIv3Slq
zvv1z}nbC#oAE%A=q%C3tSMPJvB^!BxoUS+>2A@mfmXqC}^t@cQxo_tN0)@+Acm2~m
zh;}ps=xro{4vnTUUgD#`<drDj$E!mdON8Dn`hPpeWqo;rumuRw5)($!{QvqsNPC)`
zXb6vfjAzRaJ5?;(^h6;uzF4Zx&Ev}stlmzw8=lk$%!y+qc{tLD<>4<AA5z>k=$W7G
zJRa$&*&i-bs#G<^o;_>m4p*8mO`JN2Ha4{#T`ow`U9sNV&u6$`#hF<D?17W6K0LGV
z5Jp`Vr|9_T-f*A6!y~(PSxwZ(tlN%e90}Sq_*a2T`t2mSoHFNd88T?79?!ybB}2oc
zU3e_W$L#|pKs<#xu=DE8G?y@Lf6U3tPdfixDpIp88TwpM2%EFb-7`mj<h_wDqdj@-
z_tMh%giBB0yp+U0<@mN5fx6A3K#<+Ri0mSuxg8b=<TRhb<`HU!p_}rCGUwl09JQwn
zn0)*R*jLyfDYi54KuL~`lTuvj7-IE$d2T_=bA&v$rDDlr#=rMb706wlmAHVXM=gTW
zo5-+IK2D3*HRU7z*QQ}K0c@dT^YE}OX=ZTx1Oj_1M8CJMfh7KXtpobzr}tpiiPX0b
z-%JjM(3Ta2<7>7Pm{{~++!eGPHm0q}#1e%F=1EUt&2kBNI-J4}aTRbymycTiwR}aC
zxpuKu0q#2q(x_RANQdThJW)11-1hXwiJN2(k>;cF8Q3&Tulw9*_3EfwEN5sC7G}&~
z(Q_gr6Ktf!oc0|;au7afPxDStEHr9#o71(v!T0QHNgtN~;B!dUMkI^<q0`2azq3%{
z#h0>>dAZsADgW8Udhg>en~DShSd#r~Qxv^|HN>y>7Q~UNVNp}Hn2|d)>tDN+5G$Vd
z0H=C(yg4DN+<59&VOFjVCEv-0NeR8esi(~@HFrR|VSlKz-?S3A4P}`_+I=2zBt9j?
zsptq$qwR6Bo10i<%kZ<*qVF+hlb)#uwC#!Pn$3|??F#(9`8^ahM(ndB&VheaD$8Oe
zi|fgFcQTPiwi7n2E8Q(gPgHJ&$^PUYx!SE6v+pdxf-&80MXvYKW4$EIesJqiTzM$7
zdaXe@g5YHb(GGC*a2rt=ufkH|uo%j`XGDqK>p&SZliN%9>UIxRW=Z~(IY#SNEh{hU
z`ky7^ygk+Mw*h33e*^s@7jdg{zfxgga%GQa{-v-lH}jhxpzmDL&e`{lf5I)+E@U7`
zY8jkM8}tqEE>_k+EmeDWaJ;J67ej_iJUtgd&BTFg{Cy2|>1X{P_!m9xOfmUfY;;dt
zZg;PUGcUIhlH%1{7M(^&Z23N&XKxYe|FW;PT6;!dVX)xzI>us{#E*zd_RwqdT+->8
zrg%P6RGQmcc>~k+ekYG;VQaWCPHTuJR>cJ6Rx_!Ybo;q<y_Afzt|4nfk>(*>MDzp4
zZHALq!=tMYn(e=F)vlpTMRsiGrJHu#AKnp8&ilfienXgD^=|PswIFjdAu6TCb2)Su
zzGTv4Oefz$1~~O+<w@4fu3OO51=R>!d>BytSl#w@1C3GHSDBq^Y_;+-r$ka_9I3%^
zM!CG8#s5WQOrwLh(IA=phe0P<-@zJ&UV4<76WuYda+*YuczpiwVTsi6dqcO|_nHm#
zrG(e_A^eNDDAg^%w;>B6&`z6<T0~HZf5;SLc*j&g0O~A%;&2Hk;B|+lw0TkESVPpl
z9<fec+1WWyyq(a641{ZC(x&VyQHap4XkzS%P8eJ{#`sR#tf0HS;yJaX_^D*=a>@a-
z6UrZQc7IHI9vr4-`msZ{L@v|mnt$3S&FUxe{j+&RCFf%8rb_!f)vb&gp{CW~ESRk)
zl|kn@+{q-ScM}B_o5Gjr?X7}GOzszc>EODfj)Cv4!0$w2*QcuGm!k0>q&<V9+ARXj
zRT{lE3U`L;#n-Sb%5rV4$Ezv_`_Vw0oP8{?FvlT3g=QA%v;LHl<L|B)jZGP^ywqiV
zx3hxWrq0dP!Uk%Ftf0m%*)%-X{4yOIMLpBQa(Z8%c7s45rrPe5s0FxR0E{IFO;Km<
z8~ml#H?fdcDGeA%1O|lY1Z6D=hAfGHGJz->ZH;UIb54c1q~5gK$4&tQbu6F==B&?r
z!+4xJT=fEQB?7Sa@zBk}Rn&CjH>Lc~4!+%(PPThaV_tVKX}Tta*xcp8otPi=Z<@iV
zu;+LAya|;4B&zsec>iIOiI=@y59=ZI;|VmndlCUox=kF=>FijWL3-n3gG!tn$>wbM
zgyi`h-;HKgUNziwj&L&l=wtSeTe0VQ$PBahA0;7nYQ=K!8t$<-vuDa%3&A2G7M<WK
z*x7jxj^b|9-32`HzNRWe^Lt#slxx4x<rnrC$`%cuHTw1T=F@D%>9}{lEO?9Qfhvuo
zmkuq_((pOJc!SgMwMCgctrgeCcBot<1g$n?*V81>aL(D338!68=1#jVg_dO_b+2ak
zgbOR1ay-ksDuvx&yrqY`yKzsa?`q#ltr_#XXTzLv)Y!8*<NCZFob~y0ENkd1r$QgQ
zFcA_y<KOp<r|z%?kYHqo{LWuxT(yvWw*`3mpme$@Ps_>vdYzohEBkYv8<}FguTR}s
zkTpHmn|Fd0j`I~=52;r)oq*kds&~&1#o2;fc)Sd1YIRG#L)<GfsHIzfCWOP8N|n;c
ziejq;jrBK|IRbGHtWFv=P_s)ua4?CU-t#B?fvv+(pxTi9#`jBzh=sssQB(s>+losU
zMmkS_?D(`ROE%K_;ybq#FIt(S*DE_sYV0fx=i)N}N)pcWJ3i~i08Kx;%edgcaz8t}
z61|M%wY|>Eta*{Z&GjKJxdzu|r<=WAP(OCigA0czmkJu@C-a|m+&|&edvf`6+DBtf
z&=Y|ItouX+!lzz%Hz@IXteJX;#nFZqPtTtpY<k>=1I2V=M(#NDe+4)99(}eZ!TIM3
zv~}9%H6;Ty{rTfLRw|Fn^$y#7P>rsP-y5P@7K%Jj_^NZgyTe%Lcchw@Y2DpN*=Sn7
z+S*!ZCES8({#LfrpxWADazJeF%r=AIS#2#kq>8!3_eVEKAG9WXl)82=xG(CxBL4ne
zmayu+Gb?<4kN7SV^@m;&Ia!!p)3sb99SN)_DQTn?oYnHviQ_8=wGeMF$Src>7?gEA
zXpO_4%57=Nv|D8r<oDtla`KH8FR^ZXNYY(!P`I|Dcp>bV*HsD-u>U5<pq-s@XV=|I
zrN(Mk@_^u|HwPq`9#tBk%A^(vVk%})ZN>z%jABT{NLUA2T3BU;M6Nj++mFEJW&5BX
zo*PHl`rUrG4<7v6@chP*Hef&75v1&#I14qcm2!zpoYcpHedep&Bkc!tV|9#;jaqU-
zt?Z6*HH28_vVb7B;^(E~RkI}j=Fbm8rhEx?w|ge*V8l|B@Xcm-J!`IZ-DuD2#>tJy
z!ll}vrWAn$XuckQW(EC++q#Rvf`-<c!y*mJo|L~mw6bQ{rC{7u!%LjwCKww$0>3wb
z^O>`eo!0oe9+RPv56&WD;{o4BO-@L*@xUHJT{k{fZf&~6fL*@2V%h6QxKyYhs5WZ=
zerWAQoZd~N&{n5fwCF~+;Efmd$VIz|EWOgamY6(m^HA)5?!NS^3g;s1w&OHBh(il9
zuFhAu)VAt9<cqvr6Md19)95GT#$<H%08u=qx;8PdbParJP2WVL-e(vU{Q43xsW}NL
zJtm4D6B!1(x+@=NrEPP&;>|o4mdh8`d<$;>5Qb-x41>s4<1l!FcWEt^g<<_Qd|LeR
zgn!C_UZ2au4!ViHG2qw9V}32&-4T}dOg&^k0<`HC+1pwZ-y}@xj%#mJXXS&od9{T#
zCG#-yROprF?r%Fizq<g<)6aL9`V_vH4KJyP>;>svcU+w}p+nW$ZRhPt_FM{y$th8U
zeLh#Pd3%;H%}k0yB$yBXaMJzNrSatuKvh-#iBshp(Z4kcIztb$9A~BwX0wdd;C#0A
zy6P=g@OYK94@)^AUPzuBPe88^mb#7ZQF=-tU5e}vX^WFfFVc@4I2zK+aq-UGrY??i
z_(9ErhQ3`VX+@k`sLw_SgYsvcQ1_<b$!^JGYq@~v6QAc(4~o^b%Qo_q1cU3_7vHc+
zPL{-sYi9=M+A&k`7>ZU7gXS@09yfRfFnYGTSVUAlseR|s_Ve*At&do7m>A#H4daCf
zj=F_b9G6VOIV2{=F^zxtrN8FWGukcM<0DxLUkjq4tlp$n+(OSYo7EXO)WOPd<=j^P
ze={%0>YP=G0;}C&`7|Q#FJ;7mk>2<(f0f(WLjLT{_dSTvF1wNcq$=$%8_v6$<=D3;
zRA<u?si1LM#(q^yJnXirDb7|RxpcEi1?`x<Qi`$J0<>WRPqgn#I91l=Dyb8lFAPE|
z{#|$fdN;n}F~kiX(VsR7rdk3W&)Es~JpwhUJ`dv1B0%Ve&q0`2TBS|WeW5Ux^bDDH
z`HPQYzYd{02};cZF&43)5rGcDyn`Dxh|1Fs32yMWvAjNdp$^(sS9)d(;Nst?t#G#0
z=9pi%%R7x6Q^gbZxkhQD3embLOR&DDT4pZwkD?;1UPW%6#=Qn-MTEVTGTtkZ6`vA8
zX-Hgz&_h5Na5)cDUOTycmASp&fNiGa{^h2d_Ae8M_BC4Mz1KZ`ORw*JTS>+tRQJI#
z^%2prLg=3jzkpI|Mn;tio2N8LfzQsaZRVy|8Dzkg!~b@H|81;3yOImh!Y6OMt?D?a
z_<3e%zj&f&xuRm1*R#Rbb}EwV>#cfa_sV&ib~RZ-w=$g=t`E#-ASG#}xpVshGDJG|
zRX%PA7p-xAQ3q|#N1UvmBNgHTx>k3MWbC=Kd6ce5TY$O|iD{RCDr~w}=Osb@ZwGXF
z=H&+mc|TJXGj818_xDv(KyZKu2+u;q)%Dp(tm&mjXB1Ec0z=KX7KbOF>8+n!D{8PY
zz_lI_+(Z2iD{FxN2OHuO8d;N~L%wUFTbb@;6TR|b80aOZ?zR4|ijofEClC<OGNw6o
zF=)Wf8)AMeNj`WDl|u+x#i*4pYO9Hv1gn2Lt1ojcBKqLo`+tlo{LcT<i}%9Tl053E
zD<O=65S@Y!*6aC&CKtSiPcFBH>=_LP^VZ!PuWAdhHHdO@<7+MK&9yl_CP41k#r118
z;r+x50P=^@B8{BDy<rcFfI+1M3itK4iRvS?8>E1FNVsd6oqLd@?xUlKKP3J>pP3eX
z0sRVE+i!v^vwoqmzrtJ5HpNE&$eH@oSKm8ImRI}7q}MEn0Ka`lXd<u0b~lr>JF9+V
z5NijGnT45|>3O{JqW;S9T3jMD-eqNfBHZ-|TLxI|WbI&XmxwgNBXBy#O@v}>+xgnO
zvW;BZW@=8Tp@%{f1!?I{&a?`@l?q7FHR-Gs%4J%*`aFgV;uWQ2G=}RyOXZB|ahd~#
zjo`<fdr+jjdwi@A7pTcUq*f~=J3v<1N;Jdw^xZ>`o-4+e=;<4YpE^nkw=B^ZHT5#`
z*f*rA98Dkvu(~F`3%D@&abAF|ni5X=pX27_Ce&L?cE}snee0XoftRPOZRE5MnjdLZ
zF!!IxdZaW8>m<hVRW*r4_y)nhNY4aXgaBg*<otlJd}HnmWI*G=z{sGS#XAM3rOR)e
z`+ulA54I%x|9{)wQ!UM`yW9h9xJ@lfO}NEenBvIP6bamr;%Jw<B;n3kDsH3{L_x6O
z$eo~)AZWRAAZZFY`}2Di|L?&wfaAJ8eBSTZdA_ji2No+%Rq>u}3EO!S&5hWzmsl7<
z10hbgR3}*yH4sf2I+Uua;SuN%H84J*2gk8x^|j8Q52$(mNa&ZT6!)x*KX9V}d2pxr
z<oSuZVHNd3y#2KDUE;trO|X{uq_`yt*E7vslODE%5ZXw*8r-WhCdKc@JD)8oFu(`?
z$V11(|MbRQjBllD!Yi0mb^x2THUh>w5Gvdywb9IuK7GI`Twf&-fSGA(xzi!bFTT=o
zn6*E%O-;pT94qMwbKItpZokD{1@&+ip(jZIAq@BoepPmQMA=x3p)w&<jkA&nK=rNr
zU`T)Rbq>6@V`4g!wx6@vAK`$GzEu2tv@ZD^%baeEKT$T^JhcmToc2`NV8R6$?i?|o
zshT%FE~3)A)f<Qi<A#n2Q~L<LJHswN=awL84GokUAb=4K{<8dk;m)m7%xq4}i0ql8
z9aZ=fj*kOwi3cyeKHI?Aii1{u{S#25SXYO<<NgBSo6}RDrrG~-BF)AsqFP(4R7WSm
zZ=J}nB>`Mx9IHDn>`-8n(o&F+SDN-7Wg<m4dtR+;j>FFxkLJ~u%WgNnyKQl^esbf%
z5t+S5b}3!n5*v1|LTPX10$9>WLrNwQA`L>`a$l%GSm1~8JYOq~h*^%~uVqy&Ev~s}
zZo5PR7jFztWtzXA*=gss{}x4^NLj{&QW5cdNbfaD9S&eeM{_3Fj!7X^*=r;?PG%)C
z^Zb3K;f?Pzx%-b^?uo5d!~?v@!irgmP>0I*SZ}CwFys^Oa=i1ZIS3IOdLeJK9A3KP
zDBa#*MD<wR&p>+K#@VFHCOvyzK5bDzN6S1J#GQ(s|3TcX(_gO93EsEHMi7EZwH+8)
z^DFNweew>vX>@CuTj$2F)`kY|E|tE6e&U<IS7s>3mpPWjluQ>L`&A$qO6rRBF7NG#
z2L0>_op-!aa<g`STBd0alwD+-e>%&?Sj)U3{p7dgl)4kCO^+NCM+Sks;_3`bVn|lZ
zs|M45gxbhby;C`~#3rJ0NdFLKuepUjzzcf99JHsqEq8h|^_r%k){mbqpm(S1zgB%&
zNf-z_HWu2`U9b1a*{e2TCK;6-iA)V_GCEs_ZGnJC=p_@oC@;0_>-7_)sbtIRRsX&%
zd8Ijm-DS<ic;-?7Bsa}(Y5qJ_r<U!NO2~Y^#`$TZ>b$_gXrnF~fxDc!`!zm{>pq|S
z+7y!xwx<LdZ(lH!T{HRDEUrE_)J@UehUm@T;QPzNY;5J_S1PW6eIbwfe2*uE=E%As
z+Lc=SD4Du@wzxLp0<~XX8_<6<6nc1c?yJH5XK2%*i?(w^!`@FNaj!g)xwqyACY}`;
zYk}6jb%Ai%aW|Su^<Ek}Beg5DCz8qGZf`qx)7KSr^*Fv#N>2hCu2GsB?GRb!QdKj^
z|5|c>SUA>wJG-U3*s<WHwcgjy6DI+8SY<2DPtSb%D?>{e|KKNTGN*}aypcQ<-)Ax@
z0yEP1xdI0)W~?Wiv3!x@EH<vg4Z+}q&O53`vlBEXcKdosUa&(?L)o#=m6-pI1%Jl&
zw2rChg+J*CEf4@rvR}jwR%BgLR#Hp24pXE|EdnL!U%hb75uZL`oQpR~E*>TiRuy%P
zp@Wccz)BwRKpDRM+EXtWN+gM*A6D<5ueq4>8b76!Zd`-5nuY4RC!>(g8EgM?T9SzK
ze&u=<xt_J4sQkY&oyu~Iy-wor8=0$(N|G=#K=;}T1zb$QGce|!Jb_oYvOF=jP{#L8
z#;wLiC$sp%D?FjG>O06K+>u&y*|a&{!<FTbmYzOGMb8;_u5WGnstW<d<#~8Rp;tUk
zrD<Ob5Amu8?sHc#?BT>_#aaM5PcZ^`clgg=;yYMEXLD5R?9f0_+JB>W3^aP`Z|^Z5
z3Bi7BmoYwY2OH7s`l^KN`c?!q=L3O#59_jRJ?|D_PCZpNY6v4YoPnFM)3EbDawf|p
zbX*#|`xpsuoz=<>T)UPTg05G%rX;qEt<w6>@5VXHTcnJff6^%38qUSOy8h3KS7(*l
z==c^5L8qvH>CB14c;>|Os%2mgD#0mvqLe9&Bfoz~M9ofmcV$P{ov%@Mxu{<RPH^iz
zX>0P+BMgr7=>cemWyMG`8<DgMl*3P^An)!*KS~1vTn;*UNnWTh)GaK^X@umdoMsA-
z0~Fm=z~r7-zGMAPOG1I9Cez~6>Wc$@5RvDy9w{2ER2em4uW1oCa*a(l(=w!-z=$BB
zS-w*-AY|SkFMD@+eSArb-h^s_x}na|DTN)f4|kfq*Ga$F*4eG0n)BEnXdcP9vm6f<
zaO3kum^>{Tre|iJN7Uo^Z*^<jX~NVxC)AceJf3|RdG1@w#}C|qC7I~d#1B`j<}-Ih
zIA)6Q`Q-exx)lThm4d=Rk(}C~D7pU7;Jzog6!!u8UFNK4ywLZton$O&9@@wr*HlS;
zxSkrVV7nb9^jbbiY0NSZKR(K=n_EKF@ZsE_)1?ZbyMaZ%Qgkmi?o!W)xs_d^!7J+C
z#pPY&_mO{P+U{Fy+`IG(j<b)?B2prG^R&U)!uX{I&M8(lWuRzB;(9i>vzj+YX$-YB
zy7l~8{Fa!TL9-bJr?0LaM*C~|#+W9ev^xZ`I<t(wGW$F&Za`|C>@922@Ob1>A2gm*
z?68{|x)R*We%}B1gD~I3L`A`}cu;o8(zU5s0+*TxICXpx%i{2$;5Q;-S{{*~EcV3{
zi4olWQ8$WDJ_I`^>~7Og9Q)@tPpNVboZbX3C_;@i<!9#|p)L~Qc>|OZPE9OxunAKV
z+<dWJqk6Z~i}<!nkwQ-Xm6b5uUzy1)4wkya{9U+jg2lIVN0by45URW{+yAsR%Uu;)
z>=}DIrF@9!XTe%o|10yoadTFwRlj$Nz;Q#SNfy_P8=L+P{7+r74z>I8ayIvFIrO<#
z==&&f>#}+L)gS*R0Wy-;1K|_i(<6ucU!vM!x3A-*yS$dJs<`@RIiW=hhAc*3x}fRb
z(Vkt+la%S(;ja+(5d)>R#n23uLHY3WtOeZCyHzl$#mzRoR{Bk)m^2;2`Bk_SxrxkN
z%E6FwPmulBPxi*09ZRGXDgfYT>s(b(vwn>og~_2;=7F3W=*<4*eXsRJ%ln1`X-o#A
z0o#oeZ%&T3Uy@w>0=m9<EPj}I-k0rR>vVMs&|td7r@f;%GooB={x^79W4^d(x8ZJE
zZMl_OMY1a?mOu8cq@lGlH-=rFXou5E!N;-VPZ_+3+zGAzoQPb4scC*eDQtl^D%^JK
z<gKu<>l%N&rgY`jE5B(DEAdc|WBlWzLgCbb`GI^K&<tH;>0s(wU_h#5R*T~SQGB8_
zQ#mO9ch&fN(5C}eTk(fy%#yqPzLyPO1j^}hu}^mt*7|4NQ&HV09KEly>u!SoSkSZo
zCduz^geSY5zX0w-aF0=SD-c0|x2L1qE6w$S^oEU&-YakQs+Plszv400KJ6W(d3Gc$
zKCH=T9M52o9b~~@s{l*<3oIQS^T3<m>z;&_H3p^|E2_SHH@|o+2&IVHlDw`KX9HYT
z2C^c{coEq6!K65S5M5Omu#|?kVmlS>z2WrO^)@ZE6g5A;#Nc}M3}TwOt_s^i*X(Gi
zL4bzqy%AlH%5S?q%u3_uF!NSN5TTST;xW2S1U=8l^DdbtWgB8GLMtcJD<NY#A6vs1
z_6plHl)~(lK|B7@-2A2BEEASsd*DP#Lo~)W3frfK)<fJKt4^z@|JL)YYah}FNm09{
z-%E~fGpc5BW`?z1KK8Zf!<Qc`&U+>92R*cYs(>C<AI3gh?tA*FzuCbyn^84mzkbep
zT-1mg1W|HA&bP@Xmp(1+brMzGCQ3ye^Mo22aJ>@hUHN6DvpGR;VBIM}Y;@@4ZGN5k
ze#X;R#lN7B#5rS%pUMF57Ez;>@yJ8luI>PF9gL=kb6LPTPo75vo5>9!yVE9Cbiqb-
zW<`$IZJ0N0Hs`ZzM;v2eI_3nv=NvMJz?}}O&xq_a-$i4*mhgS-AMR5;h$<O1I=lYz
z!DFQlfM^}p3eJ#Ildj6?2<=r<H58vlr!tck+wu**Z|6@;0V*7XwF4gUgg^$BO!{8j
z`d23NjO6gqs`Ws}vhBa@dFgM`zRc#W#UVsA_joje;Vr;J;%J6lRdbmMb8b~6lF__`
zZM(!6s7e}$xV-Jb7sV}?Wlrv2lu_JKk32Y>*D7TxmKS|uDfD=R$b;cbbLvxPr^@=o
z`*3HAZ%c=*L=oTiGMM%Lg<M=s*F76i&3SD7dfQ$_JJUM~Wvo?xqON<3h2feRz!0|T
z)0uOf2Ze&6b=6oZIIU70D?+gxfQ@Wb5PAJQ&h&Od%<fI&uyeZ<PHW1H?$6_qQ%{m;
zxIy5w&K%x|Sr=MObxw{<^(U|fg|npJ=9S*_nXw3D3A~GSPJgX_@uZ%K>Qs9818hhB
zbw$%{@|0JQG+%)`g~$$ACBWAP!F*(7LjzEZf?0YWzraC}ItgUzMS(l*LN)27{JS-k
zy;okO9^*ZzyV&tW<`xqrbMS%<tX)GKFfbMzyHS!wK(9B+3#)6K+Cg?iTxB-D?nW(z
z9xVPXiIYaIZ}uC;0}ma#ce^9r-GdQ^Lm3U}9yh_{B$-^lm3m;YSL^1FUC1lD)4rDX
zePZ29^}fj}JrJaUShb0%o6B^dM-OYUE0ZfvM7`DO0QWsMMDn92W<+{8DFJO+_{aN~
zzvC}o{EyQ|FP)f5Q;rnZ5ayx`qbJ#RNwrCbm1|~>>T}h!dHc^F&RQ^KV%*O~eE!~0
zHd@WmCWpA!3M#?VR<#bn#Gr1S4l8t4z)~2{Fhor{xx(<1CyE6#1)xfg{>;$AF#E{0
zOc6FcIYu?@v&Lzoux;n(7~29y-*v}{lMJf(;@RmYI%SjnliV*+io|jUYnW1r9ItxS
zM+<7ZG?9N!hWiM8R~9=QkpITtA~rXDC;H!NVZ52|e4l=-WIDP~7~LsNn^SUFkl5J@
zBsWA<>&~5)4bWcfu>5@yxy5DMKNI(rVpHn`uY0Y|!sC_Jv^%;bX*i*d#ZmY8&`8@A
z2oV6l{3J7$y^o_M@ztJxWtww7x0M*&KCh9<Eq>qlj@+>24I6P|kD+<7>9!8{CpQCm
zAukw9=Pp(2(X#quHmi$Y5n0hQ_U(StO(IJKQSnr?7lL0FvDgqn6PntPnI{a`5hSM&
zY4)Bgq+i^2h?$_88Sx(@rW$Ad@LAq!PuZ3f2g*MSgl`vm$#W*{@`J;H3PJ_Gb{u)Q
zyoBag1}!+^yt;9l9Jp^Ln=;WRF0CSH=?qThx`BND$_U*ku3EfJFQ?jYwlf!DLwz~E
z`<%_IPwbNq((;x2P`T+%3+I{p9alfBAE<AiHFz6u6?5FB2+cH>ua;2X5dz4b2!P7=
zh5$kCV38Vdtz;$41mq*8%nmvWX4)}za^?oOA6nNkHy06>g0bki&t%#rsea$RVLb=(
zg|iQG_wU!WbS*NkzPa|k+4Wh2@@e{`&uTI4zGJz{tK3a-yQg!>E0VeC*HY(Yjgp$y
zAH0xk7^eTbW+_Xoo(8%0Va=iob?<;qIrWzS*SXPbuH=+|@$<!<`~NJREpaGU->6cC
zR=aMLSRFaFm>D5HjAGzbKt8ZgrR2#0?Pz~{zBxvBr!p~<$Lj}k5TL?F<jvG6<91E1
zN#fy|_fK56JZmq837^R9-#uJ5y!&p0+N`OiFW-dUA-wgj_cfiABI|8Jmv}3)>z-3k
z8#Mf@_x1lxm*778@g^LL1K|cDm<QNM<t#uFei63NnPChauu3c!EPV2SHmw$3(SFq7
zwRLODSBNt@p(l|U_9v`9G~$W6V7Wpk(!7!pD46NaZW{(y`PQUXF{w#j-;!}O$_b><
zhYKs|(7NagrsQQ2C`n5{6_5qaO_s>=$lUqpqqt)o_YkZt>B*0jlyHoj`lw<?BW3(^
zpCoKa$mVaVINBkW8Jk+D??0&`=SM}yL@r05g61BJjj1&XoVyy;SM3t%=(@kNkh@Xs
z4D2rVs{IeNbO-+I6rx?3wQVA3OV7@Z@6Gn~OKvv66ZL-6#RR^ZgT`L;&#7*}To%^C
zY!BcZdB6iS)(m!N-HgOW<ox`;&yG2*l%kwQE*B3KxHYN9KxR)lA6rTuu!>gLwdo1w
z)psJ=fnyOw$OW;}O;0UL7(+vSwV^3SNS1RC5OwQ)YfUpH!Hu6c$7f5~M}pM@wUQd4
zDC9LU=e1mCqS^Fd<87|SsmSVc+q!l+A2hONK0w_!4);6~N4`DV4;HY(<I0yOGT=Uo
zIa#YS=u)TZ*x#;N8iC+kqE?KlPMw66ou1=;VWX(QIXk+LYxDaUt={{|gPBhk_LFjD
zY3?`9?W0HDbejM#&tTl)+F|kTLrtHRG=db$eF-E{v~$y0S1s)ujvn2Xl`Am~#j65m
z=^#fn7;3lA{?*Zt$cnM<_>$c@{U`TDyO~;P;<vpu<#`PRsvRSQ$gQ>-i!&Vr!Z`GA
z+Ty0f!EleoR8skIw7Lka$3dafOo#~`XdYA#;rGyb80@`}@qTD)?0xX`*7;F!2T5`<
z^Y}}??+_cP^;hNuk;dc_>Ax-0*V-t)u=cu4iWzhSsxH)^Lg6fN94%awc6`I_v0X*A
zYJJ?vWwqy)V@(<-Ykq@pjzUN_l+x({o%hsR94hUYe?=QD#(yiM>nBt)s7x9o6b5Ha
zKGuG8@T?kbqj32-Apd7$y4hiw;fs>%TN@^MjWHD75X(~!7Yd=li3Pv!Mz?sV7bWBl
zU__-C9f(o(dhPU5!1V_S_AyVvGN=C45e+Cljl6&G`cij9r-Sa>zcScP>HA1NpT>{%
z5!zK3G#0cuAxUfXI#Irdf;iBqCPu98gC}8IHWggLw=n}$e$9^5S>9Evq<fEul%~h4
zme`vIxMt-`bvTE`j|tUcN5_uYh`HN_rrK{LdU0(hBf{A#r0H*iE{>%aefeH5sU#+k
znJwDjStdLWofJ=oIK=wQbX(A%ggGDk8;A0*@Q~B;=DGJPdw&-3)EfxL)uNT-l{l1<
z>{zR>KU2~uILS;V6{J=%9Zf|NzD+)kIAJt%*(buW{1PU@>e7i;*O^3J9F1Y&O`-p$
zFrl6IdbHHK3J4=32~88@u&b*7o;x^}k-)4U^(O^RHB_Ur`Ov9&T3nrUfN569X8T%M
zSpn8MyXS&!uPB>}cz<O$%b%u5v7eyy>50|#jf7d{BY)hNXL+P0iPhi1|DC&;we>^$
z4~nY!X6QMSletnRq{X0`1#Oz`-slh!K9aGBApjJk#urja!wI^Z!+&Ld-FOX9AGF+)
z(0igSr}9xa;m_0$B>qC9r_$3S-od91H#p2jRvnqVS~h!5^bFMgwADV;O>k)Np2q!I
z{|a{Nq}+;Y;2!I&>sq!?I2ZDQLHzRPl)Ot9C9p)<BggMK;U^oRttN5DP7zMX$G2<Q
zWgmT|eVfk;Cz!MY(~klS-4Evmd2h}h(9zC1v$8`9T^;gkX1`X;_BiFCmQ|b`Garz6
z0iINXvp@CYM-U3<5r9Ucf?js!QkT-9xEmk(QZ+x?>bPU-hDkA|#v-tHz%o=V(0`@K
zqBmS&W<bL^PDr=a?t6TeTnavtOwkUvFZ-y;<t_)64h^aP6ZfX+#`)GC=TIAyD;XWl
z`Fq;J+y}jn3a9T#=(V9pd?=heANPu&rJ*j0mnT(>#N%>e_<Z`cRnqt49~CKB1nFKi
zf0T7EyA70h!IZS{u-5AZ<T~QA=>bXgg*P&faVx_-)3k%JCa_3%N~x1aMZ9iNMHXm|
z*~<-S&u4DiVumYs1D->8nw2`m2s`&}6pM7G;cHEy-)?>k+zuJ+Q$2k3gpFKS>-*vm
zx|@I6@xAo$ryu^xcu2mLon@*M?VP>XlUQtHZr=EykXn*g#h#GjNAb$qA?JZQSvG1)
z=+MHN+P=~vj(gy}?3>#Z6h5{kf@MZik6<&8r`CMu!<F~~2^*spolwW58H$Ouw6rnl
zN<==7W*9`oi5)RR(PWb9v(y*%CT)(47j_mH|7{u(S*AVXUhgS&2`DxNkMRu*SXu+K
z0EGx1bf}seX_d^KnHL>z4BXLu3fjC0c>imeekL&IMxZWkCQQYtR|<@*{>MSiC-{@^
zSKZH^|1dESj&)dj1xZIBS1V!(S_UgoeZljuKc;uVzN;WKJVr8=GzoBEA(gJIPmlF5
zqF9DYFZsU5eeA8cf$43S;?;s9uQ@Np-r8sPe%@OCO#-!@J6hG_#TopRwO`qA_eax8
z6K8wMrZ!7LFf|tI6P6MB=!rnjI=$i<*Bo+IuAn=(L8~+XCq~h^9iBNx;=0xN8YuMR
z$s5fL*IUu&zjSsC4bNRytv%Wm^MwoY*8)aNVIxAJVs91s0Adi_8^N9%c7+Y8De2eb
zZOkvibV?+B{7~wdyBZ7EUt2iI+)BECzW%7A_Pu}ipZoabY!M_ce-&Pdna0p7Emye`
z&w37Em|(8u%TF(l@m+${u*5Xo9fExqcU+b``gxm7esZI1ddlKzK0(_@sD=_PB6L>u
z7Af?b^({kuO_EU^Q$z<{tFEf5T9!L~ar^p<(NRP*_I>;4@{RLPLvv(q1wQpKhI1FO
zn7P9B!TF7xlq`BkFc-w9xx@5=y3M6QdJ%(96l3Ir>fMxNKKL9x|8=KIY6R$Dscy40
zMrR(K9LM*{UtC(*$eJF~%j_xU#+%!oXKhj)UrT_Sh?%Z7EYj2Z@%^2Gj*Bnp=2R;L
zW9PYswa!v+*%47J45OEVp%bIdA;pu7`Fvb#DUprv*~sVJY-#F0s%Ggg^ts~m?Spx-
z>S$1jpm+);NF$m#V<Da)Y`7Z&Kp<dNVszGRBVV~?BE{r9ZfHXLbOrA<8b%J!jxSA0
zI19J?ym^Fr@XYx<$D0K@B|m?etVvu(x9JVX9CJFBjcn3BI#8`5`?OQj>{|m9=Iv69
z7wG`aZI2*~L<<gU@7*J?v?QkNj^}Lyu4Oj77zeuTtHgX{Z&oxAV84Fev*EP=y5*n9
z?<3F5FjolWj6r19P9jP04~B1j!!mHcTn^kOarqePyW4Rk$#oECY9$|*)HQ4BhOd2l
zd@cL%pIdp$0#+7L)afAXgo8v8>rIF7&J=TSRi>CCtlXT_ukw#CWK30JvHeSi)<PQi
z$`talZjs6M^yEm%z21Mu*9Rtj3#SLriKx0kVN9iJY#b@8$KCPzkcAj6eRg?S%$nF^
zwkT{~MWfCwO~tW2={ql`312Y6B9nW#ydR74RQeD(Tl8n$@PIR<rlyt+f)kedkYvaC
zh|>6S&*{F>|890`@j?*?eHMk!OsJ__p=m>?Er%C-iY6WWYze4gclzmcXEN-gS|cyT
zUToh&W^mNMRQhD?t`5@BvWmzBXj7CRWEEs6nvo5ytC~1vwz)ZVL%?yALsgz}37V3B
zPu19R+^ON2YzQ^xqxXdmkAGW5Yx!j_O}jif<$I@OhFm~)a;_Mt=I^$T_&&|s^c{4E
zyH>L<(I}<f_HN;6Cep1%W#_k6ufgPIaDII@QTVKIu7Iwk^DXJ99bU$35AGkj%HsEa
z2kM4FTs;1>=N)!lO3nQBFN$cDcw_lvBN{qc%ad91hVgE~OVOBFbgUCDTlHVLTq5kZ
z2@iDjNM|3&&2iha8gM(tL!E@0=r%L3mb=IO#%)nQI(dC$?ZjY2jJMHOTmvUsd1zyO
zGwR<t#mE;m6Z*mY0`uG=k#kC&dU4|PhPU72H~4jWhDu!qu04CLVvivEA=(+X0XZtP
z%^H4hIL~&;B^(<})3JtpjCyIm7Jxnu-1&7!+w)~L<u9LYiiX}<g>Sv^cJnC6GDO?{
zugsoLxfJ(_k4~{O*&#P8%1NRSQVhH^JoL5E%%^C1;)4Iez79_MvGh82CWoE%{>xLl
zV>(ExnqzhpX3ySlys4M!l!|eK#UPyZczS|MUWai9gKGZlB6YB$b-B21;*+2FAqb|X
zR84PY>Dbx3xWG64G}|kjkTcFLw`C<cEQd(TRVbf&g3^_;S?Px3ow=z$v~F7bPT^Kk
zgH8X-vF$JBnxh<AbhJE=#5x{Qgz}p#t&ZkXr6Yfub6>SP9H3NV2+1b9T>_ObMpn=d
zK9`BzL#oCG7b8Nw4Id*u)%XC_D5tqp?Rx|IH`)l?-@gG*-u<|9dG{fOTh<dLT=bwz
zc<CumqTe)cLx5$(rjIPHVp=SevmFE-#t_`Q)Cvb+_Pylylm&VBH46(vvFnmEg&2+D
z3M+lgj;J2-KJd^R<9>Qp{WjC4^e%nWbLeb7N<;Mi!%(i}(TFVb2D@EPv8=tDEW*ZQ
z(dk$MoUmXOE|?bsEe@y3-zg-dG`Fm=P&Wu{0eZX;zewn5q9<Yk+V<O=_H2Nb4`UBv
zLr#C`HMjn7C@1Q3ZhM&XnOwy49gwl9R}IYaUBEqRx;9`RG|6MA^#l}{@K*-f3qhB;
zq%j-ZxeiU}eGZm(2;!kwf-SE$`ou%e;LOx$9H_yit0Bt(bfEs`jZ$8~WmCxA71Y?t
zm2fTmdrcFhQMnB_H!ziy0COmvyS=uQ1^O#vz=aMuMpCnj-^OO9?|jHTf$^{b9Hr6^
z<iB*rJE9gQngV>Kf-0yQp>9j&=C8wxntx?*$qKt(<|rQ0l3BjV!}){^PC6cO>Ms))
z*;nf2lAty*R{)!8a8ThRiSIz!+6f4q1nI-l{@^c0ga#eAiVjr(cAtQ}wCxIf;CmzY
zBtzKSjyQeWDQ7~viT_}0W>~=d@N%GzC*npL@r3KZ<VZGCw6@-h4&JDq(FT-X0X7PW
zvFZI|3FQ>HNBr~RHM)lKy#>>6tqx$De`0QAXn#_-7<yOnF=q1WnJ*uFb_(nTdkpWI
zHMMATdxF}%$$w=&cP={AGP7MWAvksSKYsJhDNS7DjAx>iQBR-yox;qvj!ejl+%5hk
z883G$@N`FCls>AT2y^`B*0FJz>UdEHb`yXmdHX3bSP&Le^mX=8L|jdtFmd>+y{-0|
z=}&s_fUp{8@OH%tm5TeYFz%_J7vYM|_HWdhQOZALWxTV2Kxz`X!Z&@~B(Q4cTof}0
zR?Ri&8#n-v8*`wB2JPs!%4KCbOebPkT>QX~`=UGP!|9;&viwuSwUZ`EQ+a=uS{-hW
znKEnXeek`|CCDTm4lk;!`*$uSRgZUrT{)E_rSd6Gvo*_%FG|g3USR9)sjkK?3UFR}
z<F9A!sRf<=k=dXfKA@Xrts9Bahg`W+d5sN;m+y5P!P>VvQ+)-UU|vfsrHBnRxPIx%
zrSaXmHxIrQsX;VUQm^jjp3jX|m@hIs?UL#eS%HU?Ye?l3Fr&XRy@cYT!qy(}0)g36
ziCj(8mO%1xD;hd|-XW2rj1Ot%s;~#?D?hD{URU1GQDnc}Rjgk!@4oJ9?j9SA%RAaH
zFs$t1RQw>Yw4h)dqY>Y@-h?v?yQ<<J$F{okG?JV@H9i{2UYyrSk4PR;6Z8q=c}@Hh
zlI^lxph<^ABsc;hnqXvcy4yuH0&|==`5@#%v*eO^bK&Fk?DEuh4ge^1RH3GSQ}OWi
z0ggVx%rg!UvNWJEvk}@v%wpsEjYj?N#^p15Ii|hr{BL7<jnNv3lugj)X`9j+H2X=O
zl!~u&oMQ5&s!*dh&u_)mpG?S0)ZNl!5oQW#*PtWCvzxvX5@hrz{W{VruY}*Y+)$E2
zxxAkEW$N_5o6$XdULt3DC1yI3sH210Z2T)@)%ch5zj8x7n|}}Chnt+dIZCCbP3FfG
zU-x^c@S?HSLHDuUNvA@5rk(cl3Xj*W$0||-O*O6cT(C~5XLAW!#(@eg-VH``neOrW
zQCh%rsRUEo^T`pfiu?T)2&f5-fK2;39=l_1U1_7fY4i;>!5HuHM}N&~R+jt6JD{Gm
z4qwm*y&i4g{OedVW%J8>?r3F8@p})zG9dPBt<M4RD!gSk2ET!SP`{TD!)_aU^Xe5A
z2nl;zBwfBayR>KonI+Ngg+7UNZ>m`*rQYkn9n1aoEk;98$2tWm+f3$&;08RxV+!Bw
z0<Z*N35)oi8{ag!>uICe%e$eP`y(o1<!4`*Z$0RFm5yD%_Kx(k@s#h?eZV|z*D-MJ
zv6oNFErxj#uWC^jf4IPVXni6t$nbr1kTcN(?539!YJ-rAq9rG@&ZT7nM>=}l*-INt
zuN$_kUUijgNuWk4ZG$o?x)Gr;rm)pE)ayz0Bx?>IIzi2&uoG|t4Ba>)F!NOob|gAp
zPatwk7QveU8zZ%wN8%1bZK(@~QUN&V+oY(C6K*cb1-76(n+p(omw(!VBcFYY?=mXw
zoNO2<x@39O0pJE)U`(7ICzL|X`}uZ36+y)Novy<=x6g9a-xn^`LcxVInUwN65}kp~
z;28bVCheo_l#`)@kKccDaO+!LqlX``_2H-BW2Nfc0rQz<&*i~2po0yO2sdh&k5@et
zJX1xgS(mS?8St=l@53;K->GlLm|r95D%A<ja@#r=Z=U{dYcnLY5(d4#R5J^&qc{cz
z4MATGuBwq<VMY+y(?uvW8)l!m)N|#__we+g;d48;Y}1%H8UJIqCkR4>19*cQlz5|&
zaW)DIfsWLPrFLc#B8s(IZz$JmxbQ3k5q0kl^<B16{o@P6K6z=O0ghYz$CIjStShvl
zY`HSGly!za1iDoB&#J$f8*_9ss}pPrzwgYNUxpL8uj1~a;Cr=Q%RJqTqAk0v?9km#
z7N)+RG2TPJ?*Xr3ekg8o%3l|~&+1BosaSENcUq|R9~5?Okx>w$_1uGBefNM*drf~I
zYqzoK*`pVL2gs449pEBl?#5((xy8wZ$js=Ra;Lgd4I@GVtTK3VK)|!@qq@FipRKa-
zPjb8dsnE7U{(S2ZcZ+i|r=$E!6gWEc0fD%&x?qXgB*Aj-<!B&ODr0TiJz%AJk_i;O
zE>927@H=d~?_%J{i_bD~pI<zF)W3C4&+&;f84C`>#K6KDs$F%jgS@zJ8I9SRp|69Y
z)v_4{pD}!<_$N7GR{coo9#ww|O+%M;D>dQd(VGzwGTgm5(`;J$#)&?=Cae?$wNM>?
zS?meQMoa44$pyV`)#=?DH2R;XnKvhIyxFqYY;tYepWL(}lMk!7ZOH4T6k=^`r_f53
z7)=qOf3vPe40V@#3g#E2i<gwzI;vw4D!54P^LZUbUB{7IM!yc4^zP1+5k5%!u)&ve
zb*9m3Yk!Z#N5`*;DXD>O4KNG=LzyB_AdpxXGFjjGF*YV<W*+_Go8qk)?z5V0o#sKN
zDyfs@w(}ASkrY(uJ``z44js}#`blr&*jZ+^aF)rfZXcS3f15G{c<PvN?N9DB<$ioB
zqhh6L5gXBFepy71j&gq6i|mVSt18GB_x&Ww6LOH_K#Vsthz|vYEY)U_J^2hXZ^itB
zzkHu3XbW<l7w6dix+^Qk;3OAZ_|x*zu;k(C@%kK7d&(|pCxz+WQ@5QWyJ|fZT}@R&
zzO~!_dXvY<6T%?1$n)MRA~1>%aC8B5XpfbY8qfXu3hSwaQW|d+v;a;_(o#dwR9$A@
zuij?{p?ju{R*yfstMZOLy4HTW*P^R9PU4O}kQCIAmA-Q4KYZjT#I}j`wc|;oRFbZ(
zc2T)A2qjdgY>LQ@25RZp>NrGWzpYgP_TCH$$v>6-SS8dH?C7r5X*-S+j-U#&s-wQ7
zD73E($Gu6HY5!UDcFfcikh(slHk(FOUxq{JE1~g1#H#N^N3`0V4OtdXR5-s}H)62C
zNsZ|KdVq(P*)5gcx1O`SonW)PU8WN%LH9{T3~<CqZBy?^L%VmRxkscSKMFAv13<{v
zFjJ@W*RR$act&1Z^X<Sph4sQtwpBsG)qUL>w>?ScUm_$jXP@%;pq)%zn}LBbTO&8G
zXi0?w)638zL5n;`PNCWDxQkl&T+E8g=g)#=`4wRfpd(IAwfkD&=xurC)NYl~%=l6v
z)-g;4>!*51n6tbwkSe|Oxff6WdA;fS0(U!&o1d(u*(xdBeWk;Ab~!{}PzfL<5$_Q_
zDX-Gix+a|3f18xdbVSR~{gwHtNAn`T5tbZ3xUy{jJMMh8;o|t?(*_{zzcPioMju1R
z!DzjgCGVuc#Jf9{To}Hlt$~WC%|A<uG)i@SMW&^(Kv3XwQf2c1b4kneN-%9LTebg<
zP)*}BzpQn@Gi&LypWr`F;PhHZxx@T8B^+mXRQ1cTq0uLjVVkLUeTdU}tXVVM)mh`l
z%|(Gm@S};{3b9GX-Fu7;wN&4%xDQrnh{O7iCPE0+S*lnNH150sh#wBjNmYu-FkS-~
z6DLwhs0FuB)F^q9)uC0>QSXhHmYew=X{qprYSD5o#aNzVo!dc<dnqTBr9y6hYq8To
zm%J&)H|ZK`t-Y`Pv+ylPP+ToC@m3{6z8<S9o8%b1YzAAdXW{7&KYR8$2I_&9<2Zp2
zPG|k+fkYgenlv(6j4x_7d%eu0O%Tt%z<vnWa!<Em_pk8@m<G5grKZxb{lRBqHo<}K
znz=MEG}ApMza5(fL>4u@f6OSkz+ep$y}eDU;U-cEITHDJP3|r^^W5h%oX=dBA13{y
zl`|ZvM0PoTKKaTha{~30&%aJLs9N}zktDep;wfl1Jbp*~C)$wOSZ>k^VZ6H+pSgM0
z4I#_mSggoS_f`=K2NyoQt1h0^{m-}Nr&(N3yYFR0pW9UH*2dZ&r<%_EQ}{cwA@Y*h
zgy-77h#0-ZJlkQvgqKfd09dq(QJ8yb_=VK*<);T9#wBy(_Oi<6tEQa$`E{^}&(;Bn
z8eIeP{;Vn)w+u%L{x0j+-WH3lMDz30;~tR(IbDHgZKfWjVAGH0zL(GAk(@7m&4{r+
zDiy>HMnmWkcB6PulfIF5_*=U`+gzfldR39>M{g<u#-dOo(c&l1RqJuebuZN)5Hx$a
zcdn&R6wT5!_qf&#8)apF&%Ugfv!~#~&D>iA5jC~h!Xj7cm1&(j?l+8Y!bX~@lHf|#
zkz&YW|3`1KhG%TF$jgm>n5zyk6&QXz&lK2=eR{zt?QX=7@rl7=57!5LA1{0L4;LwM
z<dedi_s!oEl{{|Gcmw>Oys2KHdjbfUA*1nmQkQUE^bA*X@Bh|WmPU*@)&RxSU_29J
zquExf{q<YzQ(2AIC+bKMMQ~rf?5To1Ckr%{E3Bu?Z||V5*9#im0(CGwCZ5iJW$<!B
z?L`Nk0F9AkRSV(k8i9MmBV3a2(cZ@nmg`7&?eoIgwR4W0wnLRiJifHYJF!#K-oa3#
z=T#qGPrp*|W8?en3+PkuZ1;!L&r+rreyFcmzGt=T=(&|7cg;T%rwik6hwyw9fC-Ys
zC!&qNGFh4V#ST+pa~y)vBRQ*|t5LfYHpc(kX}#rAT-&xSu7`DSsHaKV?wstzMv@-$
z$B<7uz#R?u@g{RD*m&whtnKmAr^ygVC=pv%gUeA1MT1S<{eG(-+V6X@c-LI^T_4sk
z4uTK<$Efx5L6A(?a7_yHWr%Yh5YBVQ(LZIgjA27$Vh!krxm-0R^OfwN)La<(3`DX3
zRl|cvRCDQ!6Bm$|{EIyA?7oArb?r#;*4pbiwN~TxaDUaos|RMl7xpL;Rq#JTFpJM>
zGJgNCn8`CLnV25|d0N~R|BL_cdl><&&y^n6dcQipE>7kmCxYNy5T;~V^N=hY|0lg;
zx4}<GnQQrXi%KrIgv^WY0KR29b{Bp44#NznnXzJhoNJnTF?xf2F{QeLe#sRpeuw;z
zv4DF;P=WXrX1a^V6;3O9Q`yzXZ%UOZDJ8vHDvgh%p-4ns#c!_dSl*ZO$*qK7=Rb0{
z@LZq(@w=yc9as4sIpwA;o5Sz`Bj}5D!->BXrsI7!3L@XV?@QL~f*<a+8#)o5dRh;V
zY|=Y29GNQ)`K|o}e{rC_nu8R_@*7~R86K24Hq-a1qhs1e#|mS|6@%btXC@uq+3yMK
z3zKWtI{kV3O+{sD)TO91k?FAyb$ZI5XAhLMcna3(FjQ6cEQTJ-zsM#$ORkX+CibFN
z1RHT|=7xnJeFXgCdl9Ae@+8bWYCN2i`hfE_U#GYHc{;1IMD!9tY|64)oEQ1C2v1S0
z3<qWVWf2C3F5&A9<k<EbT}&r#Pv~e+S>SONPoDa(GrWgvzzQAM7!;9wa+b~go)+A*
zE2S%ui2;pub9mG|%mz+Xsy?{u)^P=MOpu1fJs$hia<5q<7h^af^<4V^`O&}4zS|FJ
zIC(?A*CymCOW=-rX?E5+uA?hm!|A)GN75tn|H{n&Qp#(e<Ba?X9&C#JD>F{>-@eIX
z6DhvQa}w~TZK~6jR{PeAyz0b%)8FWq=<Kndsbc<>>6rD+@m<GXJBx=1M|u~QXiM+!
z)EZd*uT(ni-+)5c27n9FrPW}l(14)<BG)(dQ<T=83-4ClCEkWrBj>Bt1CkZ~N%FK;
zMY*{tT0DXQvw+L6zM?&@l@OrZ^d~*%+~T=R5Q&J-9(V~Mm>EoYR*iwPOd9%g^NJLR
zyx5I)@1=(Ig#_+j8EQ3YK``a~$K@Atb<OsV%}ElB^asZ;sToL6SwT>QqbeQs?Ddmg
zM<(L%W<PF&3iLh>?#medqjW#l>-@K$t!`$Z1RLVu3471MI7&El-SyXE8Uf<_A{>9>
zQ)=L|BGpWK+MZ|#t3(f=FMxI1?dCbLZvNL$&wloid+3?}lfyAr+_qLc@qR16%bzUs
z=uv9i@D}R*Cg$in*LVHX>{9T#CnlAe(ob*}*&f<WTpwHi{d#g>!L4FI>K@TBn$iM#
zv0fF==i=~L)meE(98B9ll=rdu)k%&68=ygNY>Y~mPDU3ynsax!xlrUzE*ib@%Dmix
zaxGV|jm$}vb3t@KlD(0lz-rpc$ZK-Z?}*%rZW|4Cevv{oyt5F<i9m8IVZ^)<eSz;@
z%l$)tocsr)2Oh39>k=MtYq&5K#i6?j=dypDe$jPhk^k<IO!l8SwsADO=owPZDD-<)
zs4L}(VPzjPu##S5(RC0y$0d4exanxpO<0L57qV>NaYm20lE#GehcXo=+zU7NPbBTy
z>v3PZYuD9>rz5Ey#}RSe9*kHjp^@CDmm~_v$_9aI`H#)!XsXL2j0rRbIkz0LPQp!j
zX>P}z+>*Mbb<d^mVHZzt+imoI_{aO7ygq;rt_d9&AIcDr+!CPKvxO9KO`Wt^F`gpv
zbRZPbLsar_5;N^fqOPTZ8f#3_j>su@bzR?k@O-`e4q6aWQBA9!{Sb}ynepmz4iST(
z^t;)-Li!7!)5A!t^qAylCePO4c(0-H{FgrxF8)b+g;CZj0mXQ=OkFGXodG<0UNyGX
z$IHLee%;vx7P)u+z|!jkx6Tj>D}vG=z$O4!wOld41g~!2Yk7rn0xP2_hOYUmTN@lF
zm|5O{A>tq50&;+yHZ(ct8NhL|!&K}Z$5~}_KVFAJPa0iIz1}SG-%jrRH2%1CDnzW|
zq59p%pf1lYT5Bab<T&T02SdNcF6#ukHplx%b;E61`>g6X37LrTc7>#SpR#3@fAPbf
zQMmMO`zhOj@$g)#%vhxMVoOLa)&VsD5TsO5J@wp}xg}}8F)tAkEQB28o+zKSN<~SF
zrbM_~<%hRz|8jjBRf&ehnIAeWG)j<|xF5&xq*3fnZyFS&bCac|xmXP^4OpUx2(+>l
z^%Y7JjiXh&!{b|HPLj<2?V|@{r*@gh=zC4%9wq+F9#TW5G1jXoC??av8!weE=YYm@
z`qXF$V^T9#1aFydNkF$ojud?G4{igmYkuGrWIKBsKzR<btPR~N4rFj}K(WKgS1z-b
zZcohNG&x#wU<Mr*K`$=2xuK6*9Qq}i@#a~`eAuS(55=2^QRTmi;*WrcTo4_x>H#$A
zK>Xxq#;P`Ctx?hFAj$yEzETl?eD8zwt)*V4?5zZmt-uFqMGx{L9~&0m;?*E1oI;^M
zf|iioISzyz1d(gcpW_(KYzmp6?7=#bG-KC$1Umh-Q?;f-`w<<3*rrnX{eh_;#ot-E
zp_M<t2kp;xD6Jd-aKIJrH3)&!*v6O>A#n6EhCYFUqGSou03t#U{)5#XioTNlq34a^
z2L_%!{Z>Uwd{z+8EFzY5+F6!Zdgw;gZC4QP$_kHoc0r-&dt)y9($nKP2kz$vdSfU$
zm1a?dTGyozg10d})n~oC=x4<6YfhcY@vd_FNb4L2aCHtfxZY)1LZ|>6fqi{FxmVz!
zp|GIsMmR`cmC@kf*EV@Zqoz!{rk{01@<Evo%i6o7l}u)txf5oYucnM9OncmOizTme
zCK8}S!Fid<_;1#G&pr0r{VCVk$;saK{HALIB0dKJXJ)wAhc#-BDX(H@x-25XwwSi=
z;pH{XuhHWK0{f7<TE^<~-Jj{%QI4zeqM|-N55>Z23ucaSqx|O8w0o>SA1%+sJPU1*
zXV7F-t7BBvU85r?1Z_hdb8BsALp$9zdlS~lxA);OVxhtyXNtB?cBwB)TdN}y9}?~>
z*Mw_V&9C!4q!eGy_Z2}O;s#&V+v_&6-kq7LtmJ>5R7x7&;GcF}8Ty62u-@$>o%Q`R
zY>&+{MOG}Xbz2Bd<>KiI%gqCm_Lx3FTaIVrOoLg*khJrwyn(X2sdoAGM`>>?%@^`k
zWF%C_23NhrMHGW%pFjV`bR)8$N9a>XM59H?i$>cbVXeBoUQ3Ga1t*94c2i%WYpFJ#
zQdnK#51(O7g*e5_S6Qd#RyA<wg^jR&CxlTc05<wgs1~etf(3%`?xJ%}!tXj?$5~xO
z5I^xQy^E>3tn*!{)`ThJXYR(jF%I6xy)-HjR^ptaWm6_8GgtbZ@EUP<Gz4iU;C3es
z)zF1m{9NS0Bd$Wt#owN}1IF4RAiHI=)AF%Ne9#d$!^9Y0H>KMef(3)f*W;ZIrERDa
z;=~`n8*PaD2~{R%Ha~{G)BbVppo~`oHZ|~GMYzPz)0JBUUez28BjMcC1^HYwT2jph
zdTQ2<=LW^-g}m~U=0!K=kY}JnZ9#M8jI+W!uP+5$FT8Pg@+o@@GSIR1Pb5$~Jk9c(
zQM8WqkJv!UGrxMX2{+qeXj(;UwDIG_n2cs`4-71%%moIQM;O$SU!xF_J~?TI$QaCj
zx)u^~6lY;ZEiv)VY8%7@vd9s_b#b4($_JKrj8ym>y_aJ5?w|O4l@F;E>F3-IZYu$(
zZ(x%P^qSm?bZ(RqR^CUGr(ldkC4P}j)PPUWZc0#cr4lFS=evIS2Dv)tS?Q4G!je(T
z8JI%eiBDJ8){=@3RQm2;@3!d5X}mw~|0VSj-);N7V769&W5aDG1)}%$vD>?TPmFbL
zf)Tl~c<caD6s-l{?Wq@u{n$6?oI4Tc02d0?NE#Q`j)3D${VEIqnYJm2$4)=F!Aix<
zhRal}_QP&nn2M}oguU_cyFIbbKOdHyTsCzjc{<8IXB1f<T)HjDt;vLBHQaq`uUf#%
z2A`Ww#;g5o@zV^R@Puh+nb((a(|(AA+H3QxSM1g8VcKb*t*QG@7LeiS7J=zGqgvgJ
z*QC9|73&)4e*Y<(Avw1+*6Oe@^wA!BH8r|D;U3Eox>i%pv7IoLo6(4V^r$vrQ;#Cl
z6Vs-ug<|4fxWfsrcL5j9)W7&9ZA~px*@1PU-KvB9n{eCNaru1nqXO-b$N()E_*(yX
z@!Qh~wZ}y|WnYV%Fzz6|d%V{j-fG6gVORgo_Ok`56&kiyUd=Xocdt!^7zCoD2p!Vs
z&J#6b!tfwA>r0_W>F}Z@;~r6x*dHtoI56aQKlR2UsP>C-KuiE$szSFuW`ApXRup$H
zyL~!9gfT2h*P<X26qeY)Vv19%TS_bxyU*4swvGbkaow|uN9YNWxAm8B=Dp&o_o@G#
zvAV-TSf@S-+6MpcPw&An`y-{R17kU2INukN$`HWT{$x}6#9TLlI0J7%JoVGQKjF7j
z^J5-iC%FeSZ^8sxx-qNyrU@!;3gB~H`j2hUssWxq#cgGWR1VkP8m}lY4;!^pkMzp)
z!ox61LM?q*a+x%+?qf`A%zQ%YFdomid-{STo7hPx=jI<R_(LF9ElnVi&@ym$3zasA
zFM+PCQZ}l>NUP+m&2~Sjc>4gO-aoqRdW_7q;*(n%t~hzH-$JT#gIQ(8{Bf2ztKiRM
zKdjHasbvw!!PSs}dw`VSh!LE&1M4%%<cbdw3H8t<=SX6fO+gvYjgLQu{>=6V5+BG9
zR?70AFZ1hSb(N6wa?2iio|@mrP-9r{mmU53E=SyKX1n(~yJYh<E?pxj3<dWq!Hd$W
z{9OzfZ=aR+uGf^G0-{;-3c(n*y|7o4fVeH}R<{Wv&SvU_NF?s6)b&`?W+(m2@23ap
z|2)=9@Fjl{QB_;N$xY}Q9seUY{ZVZl1Xo|}AP*BP5Wga-MLsa#QyqCV;gr^NvHm=1
z0+@PS7r@S=60?LIXiwiKxPNjAuiyXB+@^bF{+;z(I|ZpMLOJZJVT?j*qB$*&%U=|5
zkvTc;=p`xhcSPv->wrhQrcON3*CsSbzDtVo4UO=Zyp2C2KcX%#?vH-`_}r4>ik!^o
zkq>HLW*;Rlge&~1EbmYCUb@j((wJW@aBe(J+!(<ez}FSE1D!!t8(#p3c)8F4c-LAJ
zYH{`qi%4_@bPwOiSQ)!3ztcoDAVPYY-z)gRTetLDh%6ham3B-sz%YyT;W%mt=XVgv
zaUbxA4~y)!*<}_5-t<ZHwdD*AQIgxk#c)&P!SKnBXQ6LCf8m8$4e2sAeT>6hv)s|n
zRkz3zf4#M}_cLpzb_*dE<RYMvYZ<thSUjGWGh5Z*yrN^lXDT9El-WlfY~Oo*^gx^P
z(+F2g#($X9$@=kWqQz>)y!$MizdTQ*>LukrmPa(e*pM2CGX{Yn@Py{R!Gf0Yl<hIp
ze+46FGs07+%BLUviJ+w9k)IgK(~u$`JPk4=8Q@M?Kl5<KF=QuvaJ7BWKJ%-dVm;e;
zG6xmcf+p{-<~K%I>_YB3^A20SDMt<AV?q;gBYc|E&y1Qre)}w9Brm^$!yiGA1;%5$
zuk71)(m!+O#oqVI?<McdWpg1%8O!)#!3YP;1*H8bVjYiPZ(M>j!K|8`XZ$nxS>vaM
zgWj~7wvS=km^+69>%>jd(<Er<6ybH>_9iTNHFzXCI$kKHMQH#MI48cgp!Y$~`Pt<v
zr1Te=+DP<HgYZ0}Hk-b0_>7e){(o6;b~gLRzG(eReV0=@FR@fZc9jQd?;2Z;x?90{
z`z$3dw!iF?Sx0n;b?$9?gF`VhRMbE)a|>y=&N9mi5T}^{BNCFes2ic=hR&uFe;{A7
za-i?^E-qd<)8H@dj_x@r`)J1M<Pl%P)UkBg4-K|V%>AikY*>-|tUiB$Fek@OaQBWv
z-4&l(H_ZRxoTXbU-@)}7&{~rtjc^y4dCb{Tz1YO${NovxPW{OAu<ekeg}I4TEt{!J
zr=(ij<dsy%lwmd7nGA8s1$%PV{yHfOP&c5Xd9^|`Ij}L}gWnpI1?YJf<>HPk(aV7k
z%B^IF@O)VkazxG#aehbZ$hH1rZ3ST_^d@+MN4b9J%nnIh>3wTz*tUD@g-YG<oX-J@
zL=S62r8Ho@w$GAl%vqslF`hit2z>}XpO=&Agpd8Tb}U;wlz;FDHre^frE3F(u3XAv
z|6miY&+@XTPKjG+inAyz|FnslbBV*1D-oYxY~>k!J+!^fRM-m7^tlL`D*u1XorgQy
z>*M~N)1g{zkGA&cvLbfv`ROo1tr#Iz?U6?86{A(v+ATsWL0ck-P-GxRQPe(S)rcK?
z)ZWf7zyIR<ALPoF=l#BNKd<|K@%VNVc-qW@9*MUO`a>$sn)|XchtspHsCJ{i7w<ST
z&@1CcPcKhN<)&;CIDt4MbF2^{R!~A|t+(+tc}p1NsOB2(WSBhtkL6yoY-Eb&qf<NZ
zR-S6&NUUs$jCGCQg+xD}j#0*S7z60oIJRW>S5|%1SZ{Cd=*vLw5ywr0<uQKTdHd9|
zn!Ul}9l4A-YCZV)cY$wL?dY>aDr2&g`>^&pX{o)9-5|r#f!{P$MiC_00|l&2Gu|+Y
zxn)AebhZh5M%&w3m%a_m_n=Yh7ykq=Y*Q%diHw8Np<NQHW9G2t#XN9{(pvRSX=nWT
z$J~(1?x9T*y0Ylcsr(jBS%s-dzu8E9pSfHBXN`Dg*`OI?Lv*fuF@TmlO9SZ`gAw|p
z6TP^1ho5lb3!#?$O7$d(U4|kS<D4;BOy3bNKy8q8>+assP`-anVufpyZj^6C45$8&
zCXw_<K1_o>`exO;E!f)<($)U%KSQ}?=pVg}4@RKrEo$H6im}~uQCS)*nA;c~Rr64T
z@3tUkPAzvrfeX`xmy@T1v)ag)>O$}zLYLN4t;u>D>6@?0d5@Bcif{sRA;!k?7m$a>
zS~KjfH2{xK`WB9NfG5v2zGBY~pgAhKU63%(oW1%~`9}7|PeDBz4a9t!MU6x;3=w3W
zo`D)7CO{h9<3iExQ8)gC#3`oo9%lW_*7`V;{C68%4>DX<{)NlR>O&z@rNTKLKw)E-
zdzT$%fB!9?p6Qfr=I~dH1<nxobEaauWc_BztoU%E6*V0rCf)`>(lWBm65z{G%s(K!
zjDK^U2TA<4ZV7^-^g+~7UD*r^tg0bB$r%e^=7Le?M?cRhwBZP@s6oE^zL+|p<J7n3
ztji;ziH2Hkg@m~d_&h?!dP}RP{6<E*Yi-E8%7;3Z)q{1Tzjau5wJ-YKk?rK^hHHhR
z9^@&^&h=tpv{<*zCvhW)%tEp94e0B`XkNd%G!(n!9Iwtv1vn}HiQx<rGn|8P<yKS$
z{-IeVpj;OsuwM=X^(vB(1}IhFt5|84c41jwM@gT@7|b^JfrgS$&Xp%}l}XNNAZSMv
z+Yz+O(fa(-XLn0rCm1s^jzHTe1F4<NF3pq!5}=uxxv;l+E!5CzO9_j}_YBsZodvU9
z0N#;HZS)HQH0udOWQkp1c~6lez#!N-tOTj>>m^t{BB&(EY6DEpTp(|v><wECuwmOx
zs=ba^Z55}YgY+zpvTPeS&SN5!4k7uWxB7a_1ovy?nbY>IkMDnSnjUz4vFY#IRiRq0
zqm0+k!=#o+#mU))<yYA#e4w#2oYgqxl-U;{ooz;6dL1VuK=ZZ~N-svd4Kjt2x{wY%
z)MdB{EbgWxd)dD?e)U^yJddbpyRlv>TV_1_v%#1R!DVh_4;uy}f@xJ&{Zipx#6YV5
zUOpFd+j^|SG3(oU$;u{jgPtzNLjK=F7RLJ-Gd;m5)8L?si*j2e!<<8PcvNjj_`yNi
z!sPH_Bia}W-8XV{h(~x-4&a(g%ZF)i%mzyp@Q~U%jLt0qE+5x4U4F9e=kByB|NE?|
zbv9NHU<L{j5RBJAVn&`ZB3N#NPBioo^YMwN`}aV1zwa{C(#;Z(jS1kj{NRc5O>yAp
zDAoV>(Lrm2QvLA8<viC|UH^Z*IuH5vu7#C!^$s;%R_S2*j|HFfNN{g2RG>DKON&-m
z=}jZJPuLDMr}ZmGGLU=0m0A$~)Z&D!sswLzqm;sWH|dN4fe|>z;003>1}nPP<Fe)z
zvO?t1_{H|m!PD?~zyDZ-hC9+8yc2$_BQnn_&n1<xlH(9q+$^Cm{IAJube}SI%n$;T
zsmy)Ph~-qTQB<XhiACOyX18PvFME!!{{C$>sdVpm$qnPF&*(I@$c2X=zPz~M|0Q|i
zlpHZaMFtY#ga&@yl^#+su^-oC29D@h%E-zs-&{y<_-JBWY=0}Mefz;1QR{!K|M|RO
zvSpyV@yTL7nnzMO1fE|>PG0U;EC>##6mADAF;biJ`Ol6UDn~(^f!Hpg-lb39lcqU@
ztA#7(rGvM7e(+vgEDUY)=fnb!bkCt1L@1@Kq^D>f_2Gz;WI4GV<>+l_bUS1f-w#}o
z`{Z#yQCWl<D8SjPZ|54PsVBy-I|9SQ1-V0ZtTve#+T{AC<T!{4Jp@lCMrIysauN<!
z%(8uP!ZB=Pviftpzw3<$Vr<&gLnr*>2mq}tn8R7f&a&$qY#jMJ#Rs0<Z`&x3z}1$X
zj8X%kHN4#1k&fLCmwowthQ!i9y|Sv)F?wQ!z`{GniZWK^!I{ZYlH<$K*%T`3z!nBO
z_TI!<QsA5aPLrlMh+rKL&q@b}iC^SlD=UKmI(<U5S?8iBZ?^8FcBuRv*BMKY!-~aM
zp4v2`bN%vQPR5Hgdf@=b7SCX^zRqBj?pOX(kQ8&<aU8aq-!HcK%`BC8s(dKFzcU_+
zYybE(gmC%x@-hExq_WNl;SS~Aj11}756K=h5;DVqsT%2A7~Lc<w7hT=2S=@~>i)@b
zNPdjpX5L(_>zdnxjm3uC!XyFy!>x}E2uJyB(xI)JVYq^lwNgwzzs*WnqzIotnyxO}
zZj&aPOPfls?4$KmG^1eGcAP{e??Z-8H^>O9J{)MOuw&_16CE7fuh@l9$VUXbt(06w
z`fT<wZ?Ab5RVDR$+Myk5<I%unT*vN-nslyv!hzNB$hhHNcu1trDXRPW)1MqI3s0S$
zGN1XT(+zs|`IR&-dOWV(Uc+3hBwv5n*!7SlSlM3SVGhnVTcW3#!?B@+IyycIhMY?d
z)#9yZ2}$!ze%%JdWceH4f_A*F6=K17$L*DD|AsVDuMIZM#sf8J<6{{R;1_U@^4VGQ
z61}-1Rkv!9=@1B6F$auGz8}ZeKan#{{>m3F{^VM&M(6s8in`%#+t~$z*$EG|x)mGH
zSBF*A@-oP$6)c=#GL%U+lV&a8u|dF&RW{8W;+d=R(^<$>9)+kB{-+d@%)Rv=i@S|}
zM2^I-(`F-sQ_b$gnh{U+68=WzmX4m25!D*-0>Y%!62we~Jcor+H)eO?>rxLn*`A#C
z8bvXlaFxuahkP}BsR)d)TxqpPv+EoIC%Bs2R#9qnjdT3WkRFpFwFi4WT0Bg*=vDbe
zxfRsLm2G?r+Bar!wNnDf3D%su?iABPJR5P-f-L&23h%xXcA)qlV^yahfblVx3Tczt
z1n=Hs?uVBF82~D`^3%U{beMHuGgRAeXWn;D2Yh9@<oG50^;PIMCJ2r0%QU3u@YN*J
zu>T>`pE0}v31KYMGDgs-GKhKZH=30Rw-o@^#qU2;w7l=aud{k<(D$;G$Go0@zZ)7<
z^{Fh&^QWBrYWByAbc4%$&!qqI(V(zYF#trAamdA?hAK;Kgt|IXL+Z}iFZD6`%7JxY
z_7lF^h?0DVov%jQ|Kfu8TCLy#eULz^jaZmGqYgZ2kj#oI?qU5U<CgsO-@qPBSa=Zb
zwBC^I*N!|yRZ9Hm<2E{3+(0Jp299~1n=k#+UCbn*UN;@fi|B$eW#XSpAFNbYlc1*k
z7ML*KK>M<@B<VAJ!U?osZ^71hO~}@+Vql1rY!n@$gbVcZc=>btV|C}NcOShxvX=N>
zR@PPbeKtQkIW7|%ZMkmTTX3QZ-0`O5cyJPER*aOvBfIQxxuUfPZtsf;Y9UM5yFum0
zAtsitn2jT>)?t42V)uyR+qOG!u1rb4cf53jYeK*~kuqLC3`^2SFBKbGwzrc;<dxyd
z@uQXc0jRmrDS(VE@iiyml3Lqsz1EgUCHD7C3Qq@i`sLC!;9BEHcOiqUOp{6&SMcwL
zUU?b<K|4N%0vl{>_w$_rMHC&q>yJ;Z6B=8KvZ;(JlXLnz8IveTfGAwG)meDU5?$Ne
z??_F|oalhNuVKw7T!hYzSwnFqlN!XW{K!Ra32OEqi=GZ}`;7du=;P%JO8Y6CkADOK
zh0VCTSS934Q|f+q>zozr5$iEX`gl|bvuz5?t2>gD-%pO<5m5S&%tV2ZB4Ryv(uVE6
zClg8{=F!<UmoSYDa(}Y5((fDUv)VaMN)G|wWb(fC4UrYzblYe`>NgnDyKcuDo1@1M
zEOPdiN^?7aHo9tJ(e$2xZR|@?emd@AZUWX;#}+>}yxLvh*}TH+Iy_LN3H-<M<u_%N
zh&OGNv!(XD7rEzJZC{sW!+mKr&8v-e@D)HU8G>u*7_P4sQMc)PFj9G7$|q^}InN$F
z6DOS$4MlaCqxXdU`P{lh*l0D~!1}l^xlZ#-NozEc^XI7o)GY}y&)W36*yAr{Wsh7T
z_Z0ak=U1}^V{wiNt;jz(37EyIW!@&#ad+G5tq5G|M#yTZGv1xYn=5HWw={FG7;kF3
z^rikdS+QK*tf8!D(#~R1bxZsGr>27O>k=cv!rP?*4#F>^j`iBmh2imHkAA7#RTT^I
z?(((P>9IVqA%*D#h|^&n(jE>CC(_n?qoV#A?c9lVo-Ly~Hll;BGYo#_fhnvz?iei`
z+JG-jUaM*N<Y-^tvp3)pxpr_<_}xOs^<uj=kl8@j0O5tn^sQ%4>iuQ|u7Fc3Vv<#C
zr_F4`%o0J$lcnWDJKia4qL)eTI59OEY;GYKW+WIM55iI{zIK)}Wh|)`OHtQMSyFD_
zm&&qr^?V_GkUf*Lh)H7UQRwYfmH2%r3dx!64+CJ~HH;o}o%|%pIew9Smpio%2U_`;
zPywUjs^hxFW!8mz2PMiq0O`6OCh7(-?1u;13MyLIPqg@OU$h-#q2<}3_~U!E8J}5V
zZgey1_^LaH#`i67=Gn}7{bFhB%~85z=lY(3mF97%1P=@e{ZOz=Qgh@tTBBndrmm7G
z5Fh*NUoTuRrZE@e26{Fu<M%jDdyU!JtW{PTe0(Q^tfo%dM28{!7Qv26gj&j&{W`H;
zLZf2fW?a>%MQJvo%3uRJcD!95xKN+`Af;XTIP>P|)*Fu0AJPjCM;I)7FJc8JIt{cp
z4g>#VX;h~$|Fk+XHC2siwJ<c!7We+=c*$FJz4^2u{JvEr_(U!EV{-w&A?t^aKP!t(
zO^UrjZpnw<6IeV9E0yRpIV5G64>hY*Q&$MAmvA13^&+EB0MV@10fi%tif*f6od`om
z!ba(EiF(EO<oC;<2d_g<^5mF5`mj&zTZ8H8<A?YJOYzn*xeWSdIdyVOBr=3TJ<4y8
z{b_JI@KzBCRZ1UvISz-DYvI_BKI$-E6SVJku60oWG1a%`Lb<=DUsI`wRsNw@5pWZx
z-Fu83-#D>t!)Vd<)TJ^uBWrbp%Y7&f^)CLW*RHgqZmSBiB;{)QYo4iSB_}~NCg>9j
zS?Dy!Y};TaR~uxWB$)wF^@4Lz#?GY5-Pgx@_~har^Sog?kGx`B&RLZ6TDCTH0t}{l
zr|i*ZLDAZI<|P`-qQ&F7ISkHo#4|#qnmWnV*5pxY4HJ`@IBDdd;L5Z<&6D9+!W6{A
zbIKUKz8HyIdRc>}b#0AP%KQ2!TQeZj=_xtn#U1wWTz}S=9(Sev$>U2o2Pb=0Cwaey
z0JhO*ls}BDvj13=j7+UfS^sZoG5`7Sm+&iA;q+Im#hQ(MSW%i0ISoptr`eA$p9S+7
zQH95mB6wA}VOy@?02>HFn|;By{j#YxO1rLqL2)rDb9$e*JhMk-;GKV&=Wu&-y(hkw
zM`6oSE2?vM4iBm^(W@`*L$J1dZcqf6H+Q0x*4O2(o2Jy)<LY)t^gF^<1L7JF^ZZ-W
zK{Sg1l3T;aPbYUpZ*f}B&p=;p#oW+|kan7llZfc=lz(3~<hSRu2aTGI&2Xk~3cb{s
z+E%Yx@x|QKm5}e2+uoE2)t8};4UUl3bvE;%0+CY*r;sAD$dD^J;eS7LT;pGA*)dEq
zQ)N5mJ_k&2>|8#?gmC-$IInArl?mk-dzPs1fScQmrY9bP8x_y!9$Y8?vB>;8vn`N#
zx#sKZw>C|<bvg4K_<q!mMb2qHcHJ$?bI1I#NWYCVE_Ws*YTKqDELSPDMBg>!zQ1pn
zLpvt>sAN#=Jp-Ru{{H7AZWHJ%(Ob7{%(vZZ=>Z1WkFF5%gBz*q=EZzhP4kVDMZa#C
zV~rK^!H+791uBvdaG&y#VbK1i=a<c{`BmD;OW`hP@XD6(zL`!c?y%y$E3sqoL&7RH
zpYJwYBFKAE#k<4t++q3ReBq{$>wvcz*r4Tj5F2H+0&MVn(n}KEJ(N+c#@$UAjwq7}
zP2(5f0HhdtoFBLDqrF&8u-V!Qa5~BIy_Ko4-sP$=WMYuLS~X}JZ)AQv$@L9Vg;_@G
zbk2Xt7yZf{r#gB*3=B6gE$2VgV1l~kMoZZSIFTtJd;?34jfdT?bdwbGoWDrq-Wu;>
zJcy_>ejxspQa!Q0pcKq?F6b1S5B9CssI4m=G(L=I%j6>#o4;Wkz?w6@Ug^q)Wk=Vi
z`AD5qJP3_%HK4Wjn<rY~V{@PpJE-|hP_KM?^0x^WA^mZ5jj%*!n`yd31dV%w5-iga
z(e$*!-S@Aiqk8h2PPzy?u$V7sYkRR+b*F_hYp^!GHkDK?rYgQ3(nanZd61tI1yKui
z>*C}`3Ooy@ZE%2eSO2VkOO79g=*XMM#k|0u^q3o^O8#^b0EC$eIKBE`=no0Ko&kGH
zlv>otZTp#3@%DqyWe}YmqcUrgkDbKZl)8Lg&~lgXW)Di4$JqEzaS8D#+waHh;<rOA
zE08J8tgRzVc7cn@x+mm+L2raZ10th1UFLrl3kaG1&+nlsD#V4SiL*meK+OGnb$Aa_
zUm8*FZub#aUYP`NI*Y-Pbm^frD&x<NTx!e3$M*2LE4L!>*JFo~7U?+<%g&L6gxHKQ
z>Q~WMWfwaSd4=q9by2rnWB!*Kk#)0Xy9pBZg7beY&V^*&mWiJd2^zGsiSzP|fsiV{
zWa+S3M%bi*+)$YXD*e?A1P!$#Z1&xRrDl_}7X99Gitu|6T0|}ekNKAjSHx=YeCuU-
zrHSF~J0;fD-T$qu{`mp;a9Y5<#v?ZA-iTP`x5lNrd6%9-m`BI2n$ZW`%K!jCdWK`$
zbS7F*Hn=!hWz8%ELIY*HQ0mZ5Fwu5Ms`uKga!SPAKh$Qv^5i-=z9s(<p%)9%NPffF
z+T^s4yr%F$#}DtVoaU@<3)KI-;LmpHY3=6gUlppPaBB@tWh;5g#*DXW5YG<GoFnn4
zzX9<!H6kNBok8>v{VFLJMqno=RqzyVlcH=JUb@++G}8ZV8CXbGRrl%>>8!!zh?j*$
zYd8|8)^BfJ+t1wMNeKv;>5CgP>d$IEzn%h#cl8{N4Id^kM1zc+U61Bc9`9>PS?gUT
z7L9P8ss5<@OYa@vDW}zt+}_ts7ZHO%(X$PN+p8AjUYJXM@{x!bCzzOR_u<@sE=9BW
zvoZFnOj@kn9G}HPi7wjD>jc^fQcG6u11r~N6!#~+7|%;tQp%o8yPy*i!ee(+TF>U=
zle0NyIqwnT0!sQu8^7NNW@6Wvppq`ZNa2?22DD|H2TE93*ZLWT&E+6`mL~z#{pLw&
zk+yb?eMh<6GVQ&_4MMT=;}Ki$Dl_OC^@#~F=~Rbp<6i0YvG#xO-sEo0l^lFnL8&CB
zwcX9$H2o|@Q}@)BXAH<aauHQ&<`q_zwESQ2vB+Go7YdwICBehqmA!>Emdm`r9iI`H
zF+JB~kaI<G(=vw(i!A@1`Z?nIQ`wSXuEK}x2gS-=@4xCe?4uZcfX=bUMqc^#Q!>jp
zTk!v}TnaTYPBbD{PzUDIooWZ&;(go)7fh8Wx)dDMY}$+mFS(?CC@F{-Qm8}xs`9R|
z!LkQi(Q9NbB{j8ElZr$XO`^Q_iUtzN=zQGy{e71=iJ>5;tw`6QKYNRaw+4#06VhzW
zaH^E(FoH=qZBO&T5!evOGzob!KM*RU#P7{Kx#J&pwHQnP?(uLg1fi=sRiy6$7Ulj)
z@}-A+zHXhMgVj{sl1W-+Vn2DN%dNkvtJTf=^kRlz=yM%5J>5kE<7}^$nz<h1Ww_VI
zIX5vlrs~3|*76I3O&0&+YVrk9xitBbZa*q;RaKZ)Wj@&#h=!6nGn02mi6}zoA1rd*
zdOdKxYSg|l-o}<5?l@ViD@2`D7rZ>dn+WoqM%(K9ic~wDM)G$(Zbt*%EY?zFdkMBP
zzCjfM2iotrH+%3Ue&PP~Q)mQ-@wz>QANhOM&$Q*q_AnfoF_>XT*w5Q;KVs%OWtPG9
zQ@NyMq~(^>>f1cO_!!O3VkNbg5gRcb^3M)zL)}G=_!%zLuJY2B8Lt=1Sog9V)H=A7
zc(6iM=9#j*NIL;U<}>iKK<5Pp5JXTuqbr_%Pl@je>Y>7{NFPN|NnCOtYSe#sC{!w)
zBxb30b=~K6zj$|}1pcCg<u6drU~nd;qyt#g<mH+Mo}9wJ?3FXKN?zL-AML59)7PkE
zLcbU?v(1s0--2EIdeSRZdLP?JxBF&w+4>jBwQPHMPqYJCiN2fA?}!Mi9yEu;0?R1m
zL@U%<KEK0G{S>Oqpypobv$yvvr`P^^-15GI-6eJSX?4Z4i<5WUY1U%7;R(UmU`#Kc
zE)7dXYSq?D6Rwx$ji%};VF5Plf+EsfisKv?7)|K{|NNeT6pN{>8UFp0JptN*$H5~<
z8cVas@wPUe<)!bCVw*CmHl(u7YnFvnO~<-*A23&j#EjpoM|~Re?9`BeA1qFmpGp>e
z$JV0`f=5WC1LpD*;dF24sxcs_DjbX741*pFskcf=)9MofCSrT#(lIw@d6%r`ogyvL
zKPF*Vr%LZJ3hO7*rW&$sh!n`kw*#AZ^5KpUENUGN_lU|NIZ%1HAzwq(Yhxxb`%O(O
zZvHrx)#}{KN~iR|WvpvDQL1ui8E0Eapze{$5BB>hq}{#x%uOQ_X@fr5&MCEkpA|II
zP@aMDDjGSgigi7$oE*M>!nb`solG9~W6uoked4USF%AqziO&E!8$@xHSRsoXbOsi0
znqcLRhQmj!h6wyv-7<*+FVn<yRM}p?SBMx=oUjMe!G7Mm+k%K4^LRaQ0zTFmj<5;f
zuq8NHG(*{B9oqHd<4>f%VSt^%6BDTJq}x~WZhv01tl0KVUUs@ueid};1(&Eyo?y5k
zVOKnd=8v%ZHo@k4tem15Th|S<eSmb60#2HJ4Kagh3`V8e9h~Iz^a-DQ%w4Q$8`?|*
z{v?yvOP0#^jmqHWqtYIPT@rR3Z%fEUJ9Lce6oA+0=biob`f&>zleC*^vu<(s{Hizn
zH}tyrUT3B0I=!`E-w0#I<=1sNB@8n;{Id{2P&Myk9ccnFfB2xzJ;-a|O~(A!OV~+5
z2O)nL9NaI~BSrMMTeNz6BHpEC3@WYhOJnnamEOEoaqz1|o`PTFPCmj7m>06V;|^~H
zH15sG%v!*)S`MM2WrQc_b7o*U+dJq~+fdiw7GY5$#<OD{w~_TIcEX{L1G>F0n?0=H
z)C0xY+828i_&;hZkmqrx3;4xWrCoEk+u*0@e6f~~fi57F!PVFDP`^cB0O5koK!*D~
z-Fh%PoWQLlUx_qNa(N=SR_4Y78r2kV3A%$)eIht$KYTSK-LW2>`=G3x?oA8H5pcfr
z#)mc!)%ZuFB0j8h2^#QGWW7T;Ygi`Z0V7-ELOn#dU}0e#E@EbESDxi2Y47^2s72?c
zrbtK?%u6dUgHwU%Z;J7v2Z$k^d^acYX8)h5GYs9vc^Q94>}0x4J`Kytth9HZP|_Yb
zOtj>IeSUnAnNdRX08w*${19b{#Kb8&PVriwk$|r?5^H>2<{%;Q^AWk<EcxhY53+z~
zGnp{5*>UkxuGg!w0BpDMtJLRvft&;fjPIv<CR~34^DQV-A^+J(yCVT$kuZ6Xa$~Jy
zXBg=qfT7hhbQOiD(h)un@l)9tJ`&*@EX(zr4|Dp_VGY=sT|YVaM<u9o9qbLUio6!j
zn|h!l4ZrlS^h<bN&(`IltN*c-4oVx|{QhIK3B1Coe|4$d*#&^x#9d86AFfE-3Ed@n
zHjfZQ2ze6J;l2PjR<19D%f<K;agroFa%0sZ*dl!|!O*aEOf>dG6GFh$``)F8LAZAc
zt<u6S$%KI9N{5OGY#|0)Tjg;x)~#8-*b1D(zp@Syv>!)_makEgr4;64-I&N3LKvp)
zVIcd~wUoX*?O8auMk(11(yj+Uxx-bVH<UK4Kh2(xW!Q{0aE(9tF4?CwRhv#UsI;p9
zGtqOszeD`5-uV;(4%M$sPo_V2gGn222u!Xk(5Hym0l`ZH;Nr=N-^EBZsJ|1yzQ!xG
zjagUDH#F84e;l?XKkLD@G?JnKSLcWCuA1dMS@!$s(Z$*^@#y5!Q_v*n_M+u|o&b``
zSqW(Z2Zfrwb8rjURG=SWdxZLj9(r}u`X+lZZP0EVx;Z&vt=@AR5z0^~w;z;J=RC@1
z5u-At#)WCM0;*e1l{#*$T>0NRM?~`Y^K_3*{YrL)GKeZjn=nFMDAtr6v_NzYz(q3&
z$dYTsZ)I#GlPr3Z8G?>8a=dM8L|pKqL}CiZMjR8|oDc%R#r$5T`hOW|b-~*xQ8>KO
zgN=VKUqrmD867q)iHL`wQF<?*&E*py6V2d|`Y$;(W8TY1{O7fed<m-<i?u(0a!XKL
z*o5++En`O~+gTrQMA?QPW8oE#?)8#l3a^xCR-wMoat%Gl#6m@1-q*GJ)tet51@N7=
z*?YF;jDBZe^S_Bp-w%Vj7ZFvSD};@0+?9{s$^{w4E9jP)u^>L<4QtHPU|b==LHf9c
zUMyhpUh6@qON+riL>Uwr!gF6k`&+<OS=IkoOlrUcx$9k-p%9TO#_e|-Us1DL7}7&l
zgGWScVk>Zv**?LUQcbN61G99?qeD^}yxz8yyEFLGjHfJ`YmmajodmduG9uBfu>~m<
zEt62^Hz)sq?0Be0d?}l9&FXjCz2J|FPL{S}!Yx97)}lfw5t9i`FXVR_+WVzXgM%cb
zz3kzI^@8;WoaA+XjC{LGMS=#?S0<>5;TE<e4UI~z7lh@km%4~D=%Fq%={KcU7;LvV
z791u&bNBY%oKUT;QC(y=;};XZ0x1_0#I#A9CQ=Rc!Bvd#$!%B~-x26&SwzPCQidho
zt4ExGS(%heU-@bz-dRXKw<k5EXg^+xx)3*};+}a&KmXF(-Y;BNZV&5Av3BDA{T_Vn
zASYwwm&>CNv)+H0FDG2pmm{oHU{cA(!A9wRg4W2Tv}O)-jMN=;ke^l3eK-aHA3<vF
zr5WW@$oA4VGke)cpEIUw%s6A=%QbP`GJc;8*l(e4Ft#P?o{wyb2}mLxv#U~00HS#w
zp;Pug7!&`*7Ci&mTzS7qBcvLKp&{-MUOfnH+bO3%6ozyC`}iEFa^vqYo0~E*QH!5<
z^cOXp6@IKJ-xsa1R10zFOu`$6S>j8qrsxYG<!Dk_j+6^wu!Oe1H^c)yZ%eHy9BynC
z&f4A{4Xk=jt9YBL7M$I#aaOqWsbwdpJ?d!UHAdE{_o}MZs2yoV2MGBoQoA2`wkkcj
zGH;*TU{^9hTlsp8SmZ<}M$rg!JtNr%i|Hdsk<t1)A(<Vo?vAO&H6<O%a=z80T-0y*
zIPgO3&{%%rMhW$}o>jDAVnlt9Y5<I)ldS9-GCb8CIo_K&cH9h#y7_tk)Xojr?o6BH
z`SWs(x9UF@DXb8f%+j$Va`gLlFT>GizFZ2Lp6*SGJ_(ljj9nJ8paQ#WU#<;Oi@-5e
z*muD`=QP+TZnW5l0{o99$cr}Yr<_l&e<7UdN%*jn!SaF`T-qv&i9Ph!?K+GB##wsF
z1hWmof{^sGx?+V|b=5E!P8oaEIx_<+-k*%F3#1^Tu({+$>J7;7NQK+IDuFth-FHoH
z$$Zsb!7lN;8)d9cy$~_A9`z>*RwHn+k+rt%dg^#>4biLL@y?<pE~;#^yk1Hjeix{Y
ztmWWo`l<9IJqPE6N-bnNVjaOm2hJ(>iT6(KS)~s|>8z{==a~?X@%H)OxQqvvfxF>7
zo8@W2J1~36ah+XT=9Xlp&SUNE{(bR9o-t~`;WTDv*;*N-DYh9MtkKuWT%a@TI9iSl
zDUql;5x}3&7?81buPL7q=-7N`8OvqQf5LUqDO#mG9s?R7O-nTt?r4AG|5$o}QVr2K
zH&V|-E=rD~&quo{WVCWgWX^%)EBQwc8wCPQ{l=5d-=yGFkE+67rma$8#;YOQ$o-m9
zOFD)jgKFGg4vI4DLGWLF9eyjaqc^&5V4$26osRsE#WChTmNyJypuxDq_>@Tt<Ty${
z^8Q<v=>I#O>kE&}h85_5Yg6_*H+eZS>5u$==w}5cnLPe+7rOIEU{130%VKJq*;nkG
zF?fNX{l_eM#oz>}J2GIOG9l!c)NYu5?Oid%0+U8!md6tcYFzGQmOE0{!xr#cTwth_
zP`hTsP{?nW65GPd$W$c@m0r`knT_h*r@F;JTE}-~NM-AsJ@MSrDIca-YYA(r$8gfL
zS}rv0(vqLIb1tsQj;)?n;YA8-we-8}v0s~}R`?o9M+Zr4j3*P#1V(d>%6(@Y>qB9V
zY8Bp<=y%fr?CmN&^OG*0oPp+zkMem>)Hv!0U3?Zt_g&XE#fK_a)*_9$*|OZ0C53Jz
ztJ)e=rfS+Kl%X*-^JSUE2K+LjU|WS$4Y<?~R<AD6=kMjOOx!_icP4FHgp_Nkh(9s-
zV;E+$O=hs3{1V<<G@N2o`WyLA&80r8kcK+!)7S6hwTQvPY<&QQYr~Z4p6`pEQh+f(
z#?71$9Ay2Qef4wq-n}n+$-YDTW{_rAkx^0ulyR-Wi36*LzBXzsH_n=Q89`S+t9;u8
zcY+Pr)Ehm|hP2~M@i6~3nDrplq+I+|)wG%O_fIEA`mY4NUssobnJ`1loLp!1^@&`s
zA|tHG6Rln&5ebrJvc!NDBS)>O?t69YE{gBFSEo9@_;w(s+;XQ0cEx<3bf2pR<93CJ
z?<eQdxV)4LO8<=mygn+}W@rQ29@O|=xmucbWEIk)A(&eIH!H6wrWgV?2+~B^i1AB9
zRX(?^^k+yQCAl<JJtrLH6Gw|xSv7%L)<^?CV*SFRu5?e0uIWOn6TII*T6x6Kn(MCA
z`yC<qaf+ndOOQF{DDxm*5%ad1UK!<=B5xSbn^l$37^!AV11D?Qk*G@(->X+F|5%Dh
z$#lF=Kp5(%#}5U7PE>k;Hl2rfg<S^WU51>Tp=*vZUDdzLN?&qq&TGS7>fv8D3+V1j
zqd0d^fS+6NAI>p+<C%bq?+@XPvM_M7)4f$l_E+Ohe*#$1*Ef*vrAysE3YHHP2(xfe
zSW=Zn_^SnQvc~`HF~_nxgcs2Y^Apn3aNHmXYm5dAplaWKKnbq$rhyqG?fAy)yN+5@
zc3YcoOE13jRh|Tr-z#|Q{?O?`|H9tv<ispC_$9nw{4AzuC-riiO?)Uz!A)|uw73%!
zb!BH(K6_x{O}|c`C4Y^2nIrN-lYmDyQmqEUwpiV3**VOAzuc_N=d=eY@Ep(qf^}y&
z90PNsi=${$Rb^>csE1Y4UI`fa->aD;0e|y*PQZ!Z41HKNQzp__IXSD?)849dSjH(r
zH&-~9X1zP4xF2CrfZ|BJ|4Jr#Oxr%@52Q__75B8|98g#|1#EQ-Dl#86)F1BVB$78x
zG}&31)SUYv+naMPw=`j+8W-|ANHse>Bp$Bt{<PO*0ur&cv$b8K@fcOj><azc(x14P
z3~<`vlt$8xgJehTfv{-$P7i1m(AJSlj7o`mWI=M=6T8Jw$OjA;XPH%|9fryA9I1FA
zBAcbv@HS(Xu4h%z?(G-z-gf}}+h1mDRhL=+>DJl%39EZMN^IG_K1%<<37KWDP*pXQ
zZKd~+1R4dc<e8GK5=Z_|=5I1ezch}X{VJ?+>$wxCw=_Oyq>gP$$GKhV6KI$85(&AJ
z%a!N@Xn>tOn_mz4^5J|`y9x<>d6<asVHHzzz#-~$;(yp`%HLbfTp59#-b*(~3CSdN
z;^LbF>Je`V@9bz)zr6jZ%W0-b1@sk{hn{!?pVNMcNd8lItA*(m5_#(G>ynu}l=Ppo
zD+;s$ddo^?*V@kcqq7=qcaw{UQ3sCH!$hp3$;DfmDX15`ijmMi_qR(Who3dp*f9L!
z<h$5qb@{w(T%N$nS#2*RwGQNideriMwhep@?SE6t6j0~w7rsE(FAwY!-N1ImAhEui
zdA$+5Tabs%8oJtV9_jFJ_kXMLJ_(B+s4l5~u8tF28Jfocnh&Vn@~4o)juVjf`t8}q
zLwybN*&MDuP*tYIpOIL?o)urnJA$jEaxn(vhs5{YhBpir{G*U|_rYI?%Y%+fxi^3K
z#@QQZ6uGPDB;`rs)CjcJL8hC;$5M-ujL9Tpd7X;Ic(8B}8OFKt%;p`tm6qF0b)c(;
zxpIM($0*Mtpa{>C)G91rNb*quvt`S*$c%e$Knw>?71i)#E7?XZvTjcBB3mEThg!}e
zKdVqjuRa(B{M9Q<^+z2WX}Ci!&zQ*nXvP}qjh0$?T6*oF6rrIBA5bF-%!T29V$&-P
zmQHWWcdEo>%vX#G`MvYYdUV`o8=p*w#4Levg0~qc4Nafrj;iqpwm&{Qt($^tuh&jB
z;rDN?2pOn7e|&f8IP7orr|JSSm^`>RfADa|#g{wmT`Ly13uoSAe2R1;9k5ryNQ;c=
z;meo-V!nuETiyIxKCjB7AK2Hq4@&d3!ht3*fCO<xksrs?Y!)<sU<7hnZnvFeo-<^@
z;Zvjjm$NkDx*E_>-$H)8L|vg$oPT5U>D$`+th=&aZ|fU&V)*}V)NHpTBoI_*zpNTl
zB*(HeCT7n^M0D64B+y>lfR;;dhj84mJfpYGPm&(;$&9w;to!JAg`g7y0U)4brgcJ=
zmlqO!J-_vqPx6?u8%LHi&n;!8mRGaSS(L`0`Co%udcP=5aWU`LKmlz&7}wcvIDjI!
z!^UX94wW9>!ulqr@EbvG*}z8&qsm<HH=j2m6B!QM!A6rglY97+4HE<CW6l(`e~MlC
z-<B=x@j0kJ*EBqYJTDfQ>RK0fQ8!p1Kx}-z82HvgF<bd$d2lH#GS|bP)4Jg6NvVi*
znE7P6m}I_$L#G!&x8E;){xIXqZt%ho_#jU)Jmo61Nak(VL&zbiJQO5M{66SfQ=N2W
z?AjS6lk3%mj|#~rE%#QOKs?V9@Ibc&$fWNgw_hCd$>@=o$zgmaKejt-KZc;Z1;#?*
z!~0dJGlKr$eyAr?AabWKy?47bjiM1%{4Quh1ORE##v{ec<ANSu)61M{iOtf4c&LlH
z#FG#60?K=LVEYsfTa%rM6|1z*qV<<jHYFyXq>YWvXMmF>(@!T&EqKc(D-pckL&Gh+
z7I_U@su2mD-WtYy*-Q@T4=K!#3v87txmwdTO=5H%={o&5%-+edyP-O`5I)e>I)FDn
zDIv-j0kz*{awJUQHJ|DYD*z84rW#SnZaO+C|K6?YeZ{^W%I`A&H+IPE6Bqvh3v9O=
zomd_H9-Ik5JNa1(Gy0yGTWon-EqGgd*O9`$k<uZC9wM%c9hHeJxvs$0>6L-l;A|sW
zaKARpcz28mr-S{{$yrs(m24tEe(6q^cYbm4e!CbgP;y~A=4(}4`Iit|N2~0rA>6{c
zD<R6Q_7G9SfF8i7j2y<MjBZuIW|(6XKn~8S4)}+!>}&qLhtobAKR+nx_4~59WKIlt
zci4AK0;Fz5+JysQVPT_V6a8?9WfBk56oX#wfdLfjoQbv#h_H?j#HA;dItQ5=g0vT}
z_;e?OUyn0b;Jm!Tw2<Un%-AFZ8e--ETUQL}Z=2ki?DL3nM-O4CnLQ38%q7D0{OaTf
z(=T!_UnrVRe-%MmVl|m`tDpqjAb^0AE6nMt-EGK#>;9B2aBRW9cyw&zfSkwOa;F@I
zr4B>YZ5(U@CNJ@`nrE(ls99n49~BHp@}>F$LkB{rqewj5d<54_s-lK(ZfV)g?-SE#
z<1lXc?#kX!y_)rpy2cci?aq_SVS*j1ydC?!x-3l>rxk;mua5!jf!@dVxbjHofDwU4
z%M#9C*f;T@BX%I+d*KrcyPEtz^KL6D(@qu76AQ<FCE`P4JV#5J`Cvzi7E9eA`?F8$
zE*g=`5e4@@wYm7#*lT;mM+_C9^WVKm)mSS<FCa3NN3tPi4e|BYzlZW2CF}6{`NdQg
zTjF)Exz|yPq{%e71MI$Xj0WqM&})Y0ef`s-Ua9CJZwSBZV36VKVPx@lV>a#DSDbOe
z%_Z!&rY@y+0~a`R3<eHdm{T$5f+EOKsoTTrJ?b4}!!-vy8k_=xipJSKh*@-Pj$VHw
z!^ktOUrI)*_xD!HyJ|b3!{x#BJrzOKR^K42|5$#YM;hpyn+t2}r?j<XiuYy#A_P%S
z0xo|81=V*88rsASbf-vVahnx++&H(#X5Uf*h53dir5`xyWX_sz7ev~G7dg3^i2<8$
zh1)n_A!8Z04$@Wu$awF`rFM?QWMy$33DiR$HzXLl2*qY6M&2t2o4`DKpCF4j+`@8g
zgo2(d%#6ZwCs_s7ZUIFTJ*U$bIu=&4Ubrhf@oh+Q;`3!BIe!hTxnr;23v&$bJuJ!L
zv#Sm_(WQ@PeO5~=RtBgVm>Jbj&F2$`S7+mHp_c(jI~|K0f*VK4H>LMw>x&VY6_2lC
zyGxGfqB$_tYi<|VKP%Q7Wrwl$@hlM!z*IZDK?*pk!$AEBN#DOV-9+c@w;C=QV)o<3
zZ!@sMq3N&JEW91>EcK&F;))FtL@XFm$F$Rj<opAiCuH}<SQ{zjQdHZVSd|?f3{fuB
zPjQvZPfN>IW<B(_+rY;_{Y!ee8XL~px%hg<M$jN-m=<i<585mXCKva<(?(5cssFR-
zA-17wbyzwW=#qM?&a6Ff#Qff)C&FfUy3U{yF7!)Ffi=rtT)`*7Uz((d!}8$(hin|z
zpzzS2UB>uzqm$nWBhaDZ!0}qT%KF{-W(j|^V_mBJ{@%l3%a!twn+9D5rj%M5a?Jea
z>o|t3VJr!;*HbHV!%I&iRzMKEZmbo6k{N)>LgPVGllvd`6Q^m`6HhiZ?^fWD%QPiu
z!C;x3ky1ToNSz`U^tuHR<#>E0n|wgTB5j`<T$#EVUPY>N)5|OOzLFTY#i_~%FMZYO
z;-lZ(bx$UV^Yst@?-<maN&08{wv#xeBNDnlbi3Y*N^cd%FD({igv8p`yJ_hlqIqp(
zV6f<JO;s;l!P_B7=uQ72@_x$Gg5rVF8xD*14M$}gvRAyEK<6I4veG`E9=v+yC(kN_
zG6@DbOF=8`h<}LnW4={1d0!)`-PpU2Rz-pl*QzOc2MUy2iSjLpkhDdFB_icpCEYp*
z{}$9(;mq-~2R+5HVnpqExY@ytCY7mlfEUy)mXujeP=+;nt48E<J{2`CyW(|jIN+!}
zJbzUrw5SNAC{xm^qv`Des~1KMWZ2Cz%*Z+9ScGd{LkyRNjgP|Gt$_XVTiykzxy{Hl
zF_vGCPzeegKh$n2#-djYm_a!mI@xz24y1fqhBq*f?%nqH&&<*6A0UP;>RC0$C;!Cb
zsdTeMMtzz$K;#BKmj-_3_U=rY7b@R=kiAi)(N+BE<F;3@*n)aBhs4^2`LXhCjVhTi
z39#K%SL~N{<7@VQoPpGhgUM5}U*bDS87nE_gEmNFkeGYf_uX_Ov2uGdGl6yfbuY4O
zaSRmDKIV28wsmRCiN>R}RvDW#Gd{0>wB78p{jC$UBW_`K!V%TSYB1Hk7v#v?0vUmy
zj0N~dNSOK$%ct#s>g-tW5VTa$O3RFoj7>M2IW^hhZi%{e%ht|bZjfK{So)(Zb+}A*
zW-rp*H|5u@zg#EZs;l_boJop8652?LH@y7}GaDXgeDZWWfBk1nEpu6{VWL<mzvX@R
z_d_kuUXZc3v4wa?FA+Vwx5YM=Gw$6#EVB;quQ_b^I@>20I)292T^@I;?Yr1nH8A!J
zhU#TOe!&Mtiv~2V^|x}Z|L84M(yll8D{DGW^Vybl5uSP7D{Z*?hiR>V`@rqXzHIh}
zD*9DVz?TOcEnBEq<7~g$NY^!-RXFqn0aNxo7JJihqcZFfU8kyBLZ@fmZ4s5?4==T-
zdc^Q$!vB3*FX5jYJG(<;*bMGWJ%ZE$6pESG@O_;$5{l|Ss*YTpN-fTu5AeB_VW-OU
z2U}G}YUT)I&<NYnqiXukV-Nj5R#{(LueUD8v|eV(J#nR^!I$@8av8C?e@|C{eK}Qi
z4<n4d9J$4=auhqj)@Tt#mynQkpyH-9#Q-?G+`b_jrpYe+P;EO-XvEw4Et?f;&l`1d
zoojMrF_S3@!;V7fr7x7yai#ilOq^xGWpI;1(UzUQrQ#6FK3#Hz@~S~rmuwhK{(h<S
z0dn%F7^c;OlTIY2`ee))+KJq74DRDjq@o8x%@+pnLm`GyriPz<+=X1|u!H~YUOdZG
z(s)CX`}-0ra7X{d<TDSjL9N#`M+I4uc@^EreC=q~j7K`wmKT)L+?q}CGU1kfLoTk&
zG50r9&+;CCx=IVyh1|L``ukhGiuB(fP)wJtrf*LC57Mo@cJ$8q&3QM+$G%@p{-`Di
z;zNKgO4V6(ylx?<hPu77XFR!k2KCb41N>VqUhE9iyKZRsBsYc5txXMKT!Q-$xbmeF
zpMQUCysm7VwqndySABF_Oa-B2m)j^S@s9_C{axISUl5KI@!YF@jaG<8Mhgp;R}Qb+
zhSV%O`EE6yDDe(?kKoPq$6e_l`|^6==*C*mH7D65t*=p;e|Ao&@b7wU^4GiD#aPo^
zY<NwI7<HqN#0a8zCrOu>W7rF6H1Hi;J*t5m`g~k#xhBFt>&Jn+v?URYyQY;=kV9{z
z@~Usb?MRYeiT~p_`JOq)ipB{&e-<59zWJB?V%<NO{Wf4Gaiybr7TBEaxV@~bif#Pk
z<<L7GFgQ@II69=8BMy7lGKfby;8QDO22A!7)~C*A%u$oY#nz(<{G<MD7H>oQe$PoI
zMfVx57P9O%iL(h-^OOO4lU~-iDI<=M!sKwK7hrt^1eyv#ib)^)mTS6zb5yY9-`|7>
zhb9n!tH}1kx{?mvX9-Pr53+;;f1ds7n(9SPlOsr~y#~9RR23WlQd;2h_LGsiBi}7U
zBQTs2sQ`Dh5B}&E#|x9l9mekCdgAd>J-Cj$>gv+HBbh|AU;mT!Du10f<=9#AdXpie
zU9z`W<i@~!Lofbh#@SLlHK4{V>h2~EW?x0-K!aMY0j0vW9+_4C$MQGjS;NRw#fz|(
zc$xe0Vco$M7m@<`YsmrYxpmVCJ==(@w{!7Rc(IWXH`iKeGBPNhut4A)!7=k+_Y9wX
zQdzAdKFWAqZsmJ=+D`Xx7GgQ^Zj`b>(Ectxoot)Ed_{EMFuMa7v~p5NSVXp^74G>I
zjGc!mj7Me}hL}WJmf9wpADT+5xe6Q$<o-`gDED+Yd=Gy7AB*Ak>c6A5Cc(p$oVv}S
zW#>+b#p1m-7D1}VsirRsU>`DKbP}CsuQjJHLSBu8L?Y`+>kU8izZ^&BIW8Uza*-YK
zMxy&RxnBSO*m&^&#Ok#i$te0c?ao=5uUiuk!EsaFul<*bsoy~kO3yqOKj&U^4oPpV
zbH9(Y4PYL;0~BNNSHqp%gAxNd+f`z{zzxgf?z<<N+`n>39-%bH{c}o`SrU2`X15b*
z(w1GAU#qDaUA6xH#|Oa?sVOZd-77q<fARUshhVOr_9@N19KRbL_ENj*JMF;o+0b(f
zDn$#z7XmuvsLxHeGS}*2Z3$%;!6hQNZM5S~iI!R05#Zse+apF?Mf2J8f&k&}9epO%
z)$x$aC6`5V&Da_v@awFa_wNQZYY!`bf<wXTP|-LFTA6{B_DKs!hf}Z<IOJKz8giv1
z_6|T-;Y<AxkB)F!<el&!FIzIf;rZF5zNI|H(n7#PKup5#<A>^_5b4Q~zel=^)Xm@E
z3&NbPqmhp(6l(XkbjG4xZDE4leqF&AvOnB2bE5+YwSqTzbRMC`Jg@#bX>d(VlDnWf
z!7}-~_x=m2NE1TT<a3FDw_)~%!g0Mox{<#`+Gxomv;Ckb5#4VK-pW2E6Un7mWv)^*
zV{8+p9Jr!ulACiBSzv2$a>2IfQjQ?%2^037dHDS1N9+qf-V0}zzoj^IS~8jnzjqWH
zm=@6}8e6x0nZD6Ql3z46t>3J*JsTZE93$4GdXGrDD4=OqlT@)%a1kQ|tz*R8mqBkp
z1l!c))+T>M<e|);X(R5$bKyhC^E#89-9+SXl9bbw&(mmEuc*k;jJIQ1Ty7M<`kQpb
zBTlrNiid<{0Q;jns>CNdg{UgW&&zyOEi?QWhfM1=PMg+xZ*X!!Uw^1eGvZct$3St*
z1b5z??@pc5r=jM)3z^O8if8#L`=LQ5R*a_{{H<PWjS1SoP|Eq$hkwR?cIRIE<Ct`6
zFZbkOedCGD+uehJ?G3NZh_2D;AN2U$JcT6oUB!9Gtbtw6jBIswWi}(Ig7e|y^{oIM
z#Y7wus=^F-y$3RUjv7OMX}z29O0-z~S!dW1p$RhBZk};?sM*>YXpqZm-U2QTLC{^!
zvcntTCYU<VNyMq=(CJ~F^0&}OP2{wjbQN;l)}=9Nh)BaATw#6DQ(L42FpqJOnzif+
zC>b&2*1Kc?pUk{=%~+0sD$N?!sxmU9+f5KTC&{4SXQ`scVxh1sq{Qc8ub&u@!?mfz
z@^_uG+gY9`Ji~1Y<@*)f;a>dY&?w5u<B0Gr)$|kd*~Wyqm>177Ywrx7-z8oB5%aMS
zCE$ZkxiW?EYcqsscqBC{RIx`tvhs5FFuo4$J5Ugm)WSzk5EL5fa4WnXu~BziF?pub
zk}+6)g_-v~URFJ;H9#aSSF2!_vGlE++#!Sr3o2c`NUkCrrb1t)Q!PeF=(UQa4ay_=
z%qZ?xKR3YtYju)Pz>2AgsCG`!vKj#hYMni*JOr0kvH!`NHyE1XKT##pMXXF=1$6n6
z6GrHPUZNR6%<mC0q#B?rPE6Kl&><AW#`AE+DZ6j~>Ns2AVfkyO;YCxz#>J~3@wWbi
zlsYoh`;->GM1q*;uDkY#eR5HKxjK6<Jti#~)QVhQ-Vi1EEMq(8VA~|m<#Fh<{`5P+
z{oCd*Qe-*xUh;HST;RE63F^JNC~1Tz!A2d{S0gv~p$&Q7n}o7C>8rmJ624iAIlyL3
za=j=t2-uDs38~9mp30!<-*J)qVtPZk^ZJ)R?VfiZ9CUuSTq{paS6^O51k>vdsJo4N
z0m;MgWk8Kmx$%Hk(Wx3ez<@5Gnwz^rz5BM?iBG;P%3bPJtLoM0i2u3YyJcw!IvKr8
z(B$_vgINlVG>~C4p!z+gq^J`C1a&H^<`cog61G&Nc!Omy^Wl?Zl{_ktm!#p5AF9=D
zXa9a<bHVE5a(07hw);`svY*g>a%{Cnh7wOi5#JLk^1Eu}%SdAbKVKYi1=;~8ZH6h-
z7K?A}YOt?rw9Frd85I1-5^V-e;kC$9v`Qo9puMvhM66;@<O6hN)-SbT9CDU=Fi5IT
z@1B1D;@Mei>GL%WwC87G3p2hqA3D3ELPW;B&l9s4yWa!njFTR2jBQuKm&+B(YS1Cn
z#xw(f|2cn--({!I&K}T0QS7EC(^v3MqG8%UNp7rHbP<+b0V)lyopOgry$H!nrP$oY
zT9a8pKMDojYCQGnb+!C)@~CYV!?j_*u^u^U)3C1<6?*C%lJcO+FdtKzvq4&@&)w)K
z7?F;*UfD&&g=_xiRwi@k_}Bv2qh8*&+Zj{<{}cbZ>u<pfCy2lZmzdD<|0C|azmk6c
zH*Pz%EK_q<?{e?lTgy@dQ*+`rw@A46EX&HB2wJ#G#Dz!>#D!L_+=-$IqPcV6-m5R)
zzvA=5>u}EN9Dciba^Kf|U5{%bSG?I5zouf$y%}n36(M~icAtGR9_4g{{K6%9QRl5v
ziD~%~_T=wyA-Zdj2Ek%6TCRqqom>1}M(5h{M1R4UG~q)#NeIFlrYGzj!tZ!k{MbL$
z4+L<>sii7sFBB)?m~Ai}<#1#`Y91Yyl;u%QCa<JyeB!?1;pf+<Y8b!8b1auCDE6bh
z<ItDIEKR|7OI22~DB|k7w%BAo4VB&4Hp6VPbkoD~v37rc_Fu|>3)%M7w%qg&zJ|&P
zXZ~H_&1Duh?Qr?rObSaNzhUU@5@GKg;{kNAMbRt)6rwG9f8G3h&WL7R#TiEOSGY^!
zz)=Fryt0!RZyQxfoD>q~>M6cQkZSBx*-hWa7OtGq)+b&~i*Y0fRbB;aBx?_e0TN&j
zbLUtt6O^Co0l4VqGXjIU&q_)xbFjSqg+g;yBSxcsIga<lmsf^M%$oBU$<*Ufl|zv*
z*ECBBw<f*Z^%MQ<;zzeuHo_u|YN^S^Kut_RmPTppK}5Z~xEC$vw=Gw<bP;-r2zlV5
zY(HuHkE!s@z|UXQ&i|^jAWR&4F+hn~y374ZJ-ELfPrt0<H+^*tLuS*>5-dpjc0ln_
zz3n8Zb^7aeZE$p1V#0nFXEb|VH{83KYg#i)9Uba50{>cvOppy7tJ&a07^)m~sgsP4
z^ZHceS5s#GF)@Wz76ff5X9_qE84mNWWV!hsxYecl^Nq`Yz0{S0s!n+?tJGx=qCHBi
zQm>imN=6x*0dIsQ2BZl$^GUELJFmD9&<~$|$2u^8=Vn=31J}c@P3K>!x9F?WQOdk6
zACU6m{7sE(${9Y!SZ*sv638U4AlI##=f`5+yAuS&|61;gb>nN|N;MmO=MB5pKncm7
z&%fCSvD>w&#)(_k(xWT5xh|Psb!1dHPVD}Ep{Je>RVJcxw8&x|UB>>Xcc1g6d90B0
zI*ORM_9^cM>vF9Znu`Q(0}fp@uc-U#rX>)#Hf_34YBpN7Sd{)L5UuI<v-kpVrDG1x
z{*xsv%Kw8v`B3fr?}WrGplObSa!#qLKAYxxPRy_w>|_oH9x|V6b16IBAjK<r&4{%>
zEK4Zq-m?V&DQY_5?aCEJqpORLJ_Q1;%!VpA3=$MyT&#!mg#A7U$@<6CoGh|ZD<eN-
z@bU?QJPFZ(zl66&Hyci?@VK!kF3!w-oca&l7vC?N5SOe7znf&$rt42=F{c>kczWMA
z`Wv;X5x*JLzGcYvmF$5idrm5(!bs+*W0l@h&KX{SV_{8Hr+;x2kW}#%*(8fL+ItkJ
z1^bheAyKqn*|DaqLyNn(s{z@)lGO3yHQ&@DjW2fh+5qjqnzh89acsC6=#3zV4F>2U
z7fx{rbDNbH=EU&d{}a{mdg0sfTWut?M1R;{9O`~?j=45pdPWB9eE<@lcofYD=MDI3
zg05e`<(~Q&Ch}i!`jF6dX6Ty=b^i~LopKQc=8w-(Mb9L)&WM>j8Ev(U_tF?E%vJMg
zD77_>XjB|XZX3kLXB0ao{{?7)$;qYNyU<$nno@Kg1Jo3W=1)p~_x-tC%AXRcYpM^E
zFI>vm)=Vb|5x!ie7`l5#dhY@ny9cbY+=n6~$E9h+&?-9(#^Y7%Bi`8fH;^x{^g^<{
zkyLu2H-CuTnfq$|IVa2^U80C{7aN_~JbA`w%k#L2o=_s9yat*cT%1|ER^Sk6=inZ>
zSa#<RjtLUWvFbV;EEtRmlXmN>Z+%n(`I)20=R#24efrSGH#eXgNPVZ89#$<t$#mcb
zzLU0hz;@%~xsSKVjt&-PpxEwgEzOHInr>uZ)2}Z#n{rLW7Go^7=ZUyTEQfaYU`@1a
zD<!xn?aP?mZ}6^{U>vI@OM1?g>RT`S6#N`Rr6n_nA2X$$m~F9)IWy8`Y$>W`UyK?m
zuoI&li;H1v8cdH^IA?(DXnhIxzbI_B-6~FW#7KFMv@*m1Ie8%?6Hsr3c*D^WoZ<po
zn}1+(1mUf3m+soB>ITBz+!gOOdD1~AgbWQjFf1rtq4<<yu^()OMX1Y;TM?oYBLSl(
zaea&E3{U=2$dLWp)Zc~>$lRq+Vdtj2o0o1>0}!N@PY=(AKWzLI7I&sSBIy2yVo!5e
zuo{+=c=!zN)athOzN94`V4g<JHYzzFR|aIs7tz$Cnn+pStqdFCEwqxJgITnd;S-@r
zMZfEcDSY=<SguqS>NKRAzIB}DtnVJv{e594<#Be`+qHX~Ki!w0l3hDdRRMb$2rQ4$
zkP$hH;qB#nHiQf?P&c9?XqY;&3C+rp%`^=d`X;?E5!Yu0Y7)F9;9+q2p7n0Ma0&sz
zG0rUPnnH0;{st%+SC&8QC@|v^>`gfvN51ONO=nP0jUD?ovEdMMJekL4F&-rjpcM;C
zmf8pVgBdqC`9k(HeUX5vzuo;!L%!KuHYLStLb%nYhwwUZui3q?kV>T?Ex6h3G_1Gv
z4R6qYe=0tKv%`KvrE5w`c2nxawnWQ{dsr>}{(glkT1R+~1TU$G<dQ=7LUv<T&@v$Y
zqvIi>lFUAGj~KpV0p9$vG+Je>@+!#7A<xU!t1V>fGUw80dg|A|6z`WvqfY|NW0t!)
z^-?w%k$U(EqX5M5*dF^9t+UWJ>b;+svUVQ1LaK2O$f!}u5vV{3Uq11!iuJsvXR8_k
z_L|!W*zx_8>6(ysc>QJlxI;~(O=`QxQPuuyi`gE*;7IqaGiuK&5{*2?$I9{sC55>X
zN?DZ=t6$;Ty_V(0!Bx<rVZ-E`H5N+qMpIzJ$J7>Oc_NJ((crySHV-J+d}D)psjrj*
zWIC^G_4n?OVig;=;C$kZmuY!U%BzkgWhOltdnj>@sS@L3vd6*l=w?d;HDd9$twfjd
zN_ul*p)1dcN7IeDVk5h&?;QAn>xq$~B1?@#!PpQ3N|uByqayUD+B+lsnHMjfboMSi
zOua|sn_uXydvQ%hxLaDc@%1ji`Tj@^#^O}kxLcEuG2imtWPDB3qwf}chR)ya;v1}g
zr?qG`lR%R8sxLxIdSiRxXg?nrF5g!pvO_XRrB9OR-*=k$c6}S+-=JG>W8oh^|0p^z
z$-MTv1w#sXQ>ie@ZUc#R<(FQov0!-FDrVIj1~M^WiKD(tHMA1b#XhS@ZHG14Ig(5d
z(x9(3zee6W<D0^+HQ?QYp(h=hi>&N^8y7AK)gSl?nz84~(h9%yy?cW+4%pj33{4_@
zf$~EZd*tq}&9q5GzKDpQFU=%_`5nQgFW`uAiCwjdOoHFA5C7c|nz*8?Tk$2V=oa5_
zva{l{(p{Wza!Q$bSO6!pz93g2n6gbmC`m`BsENBsga$9sfzro7BU}3B(m{4!BC3=Z
z-Q&HZ)vG01P|KHSG@RF^+MGEblF$>D)`8@XNB9Ioc2jrtq<yeA9K+t80B1$AP`IvQ
zYC0n;$9+$1kUuJ!!_VmN^X2K2Zbe3FU$3P85v$(BDAoyJmHUrrZaka{F&qnYb>cR-
zLPXnECC^0yy%pL9JdeuMJCdPg@ERbaM_V?*TU&L5ko>0hkn`G9G#mJ~c*V`BLb7jF
z%-jTFu1lzjAh^P(sGu&tqVj&0IeLjVhV_!-hJ30#Q7P=9o`m9FbqcSxt-oC)u>X+O
z0TAnK!=|*mSfCy@Npo)u!}DuOqotOP$f6>9@jTrF#_plw!MCsPtJ7!49jIKMS-p0-
zy7#t#otL8XV9j7K&nneFY?L6FdGgEEC#P;@mp$N(gkB+5BFLct8!a(?t2~2HZ}j7q
z0NPzJ>ZXwuHaqz<vZ~baYCn{COG<q{0}SRCwX)SJpSgqH&cEmt!}qwp*{%BHOVbK2
zStShWmoO0-o^<9;Yu(dUILjvsr)1nGmkm0pAlIvN+RrEN2DA70Qg=US)RS|ssUq#`
z4|i4jvzBu5DZGYH8ZcB>iTWTkAW%O$5_4oD8()VJEI%C#b8&UOqJFjdN>}NUamLb+
zi~{@$&@Q*dPj2dgM6ybuqw8n15C&+ylyFq{LLGkpYh+{}L~92uv}50eSBsRZ_}gKI
zzWlrtzvfcOrX**115as&k7QNP<H<P4b?Z+u3tNg9W<2xn<HL>;dxO<M#n(~_9lu>I
zRmB@`+mb=va5aBR)Vnx>498W-p%UvD##c>i6&}3`{(MqM{b5$>=sVrvGJfDrS`Ua~
z(8fPwBRdY;ZwWVNv39pmvZ|K|;Xl7t>u+Aq_|?vFB{|+<k3)1XEklHU8vXR?aO~LP
zqiaU8GnR9~d`r|!iWAVk#UxxY%{Z0s2G~VnfZK-XHRw3)Ls4z7fk7fNDQhr8WHdcB
zJ$klO9P(xCbtkiMkF2|+m5@-z-VI&;(2ZL=(^l=z7QpIt-NaCf(};SVeCZP^0rF__
zOMJ!5E%84klt~XuKA%Hv>mO@YD4s@*rPX@Eix=CoaA^d(zcMULULD_|Wt}OaIx4!R
zVL;%KC}g)+P5k{fzZPr4tiLyx^o;IaFrPu#EOl6eR5%%2)s4&jx*t)uM2Cornp^DA
z^n^&hNFY_)Dnnhdw(0o=`pQj-YsV2|m)B%E!oI`lH3Zk>TNt^ux;#)X!dB&^oez<V
zr9%SE3J+;|F^2`AYqu#y<I!QH3i?j6p88Ej(+N*7A2|{tLgq6uS`(ev)x&tL+<3Ta
zr_$K)*DWIi6*7_So^^7Y&0zgM=Apz66i&4TVx<V4oJvf+oz>5NGCNt0XP{?;%nqA8
zj}qlERwMJ!X-v;RUTk9#Y%rjcY!L3F$o+@8Vh&H5(b*&&>eV+0p1e=G4Bz<?@4jtu
zL~bj7H^S)(;@q1<aRXt*Q*qfCr)eMggIoQXcPU{BgWnB;r5=-QF~|2ao~2}_$hb#G
zQ-25f|9&zl6_MJgBG9}~Kl8JLeBjD*`DcgzMR^6J86r#)FtvWa(aH(|3ML<sd_mMm
zsv-B~siG5dQg3lZ)qD!qd`8U+qAbp^Gs`-<R=7h-@nZwsAw@>}Gm~}lT;<DXKS<`V
z$K=LF0Ohbm){q(sRfksO&@ju_Q(DHpK8tm?=~*`(Fbaw$H}_b64p14VO=!o?eU!-i
z*wq;mtKH-i+vIdr-GX}$4|Xae6PC8gn6XNOF~AX7aul(0U_Lk6M<wPWnh>jYsmj!&
zMx}@OM1`QvsGL~h(D<ukRW+L<a!$Ti$0Q88L!yM*qQW{4yh^Y!YG9xKMVp;tsf1T<
zM_WHWexrO8&5OJ=v+?cNTQPL*4Kms;HV%=WS9KsnQf$uL{b1E#U+Z&p0-tn}3?2WP
z`th5`i^IFOv<rg|H-<eGb4SPUwV-3dCSei6&3)_E0d7zaN34Y%M@ZhZ2s}1Op8az+
z=H=PweiO%UBW5jD`Nbytk-;ra)+?}(DsM_5jWMx%1{`U=yxuG<Ok(H$k^kqC`Pr0H
zw0lvTM^mng)0BBH>~;?yn|;Q&Z|AhEJ?i`qGFhbxmR%V+{Fq)lBB1$K5l+12F>G1-
zMbMOASdP1i=R33n?8perBeRt;C>*&KGg`TzUYxYHX85S6w3_hWYw`KHQH=l|f3|?j
z;&m^>)BQ7#N0nv-VswzHHr#Qo8to%lM`XPTc_Rsy9;A#w-$Z|5h<4|h)_r>gOSu#m
zZ;X<$&iABAivtJ!S{N*G5YN5dlXm~?gd2%FaNv`i%t|NBudO<_vqIj)iU;@Mupw|4
zQ7n;>VYYx%QLFdf_Wjgtgs)gV?4sZKvRi8p|MP_hD`>0??55B;u|vKhx$k65#4g3=
zHcz$6kPOTsd@iY44ZJ9C?813+xltA}Klpx9q(dB<Ee>jv>+4;6z=k0cCM{CX0W!V@
zIYy!4p}zjj53_V)?57^6nZDk19p*@r4FS28eW}9Mm#KzZMs!%30mnz<=$<G%#bTB6
z49im*2@|*6jsAl1%>o63r7c$x<K-0#7kB3Z(hyMw`_%QUDdGC8&QsMv#mG#n`(7Wq
zeeu-kdJzrijn4jI4;Rpd$H4Wc=U-9ejr?7!rEt>rAIIH=Ehl~h*%83a8l0$vV&Rh5
z)|dqm9gp?KATyepk6uT~GgOD`kVx$tQ&Bx~1!yji0>;QWpj_$XcXzFO;;ywiSTu;n
zNU&30C%bn?UE5$iWl_*f^LJlQnmW42%wsU^d&_Y4Chxa;{Q4?#voDj$&2f^#+;RTb
zrAfl<Mh+-3+pl{azMejxZC!4=`#T$tenKi1HqN2tKpX?&y~g^))gGVXrmlU#^XTV^
zw#mpC8;cJJP1(kfB{4^Q2OaX9rM|&0WVo*{dM{dnMP(C?p&QjRq3zuM)06M{ws+0c
zoC~}Hblz^Og66NJ|6U9zq;tes!sa*UMf-s_p5U_DOxTk$@^rWp+9m7txytam0!L+G
z5m7Z?I7c$$R}Sn#1G2UUu7tTyrJenuv$V(TZc)jeCfU0M^zW}`jM8CrQfG$+%1qfv
zoyS{AupuugyB8Xz?tJh(PSw!EU~}fNRD1Il;jGn?Nq<G;pNx7li_*?ah7=P5zk7iz
z6ayA@4_Vy?c^`NGJj&7cr*kch=325@cjg6TNtux(q??~CyTRs$45LKK{z7i^fnDF<
z&U2OE$Ww7KNZFK^FX`7k^0$;oH_;0nS#!`(s-@8=5Q9)zt@)@ui_&f%s{W>}li`mW
z@p%nh51lL{Ypor$2dzsG^J?EWUDCgIs!F-)G6}4&7|Ydd=#~b}71%8o%bLtHwvl=v
zy?L_iLg+3&)9O>{CmmF8xVso}Zx#?JIccsdEzP!KM{}(J7!`X;Zyr?1m+j2T`VZ$;
ziYt}D?V3wPkj?!^I%^DhEF<mK)2Nkf%sXEXSQKTV|M;u%kr!>GY_ZVJLG0zVvD)W1
zzX@;s{H{~?C`0f}&Y5Aqhqm^Iiy{+rNs-J_;(WzOZAeU}+5NDimKg_{WTSrf2v`;>
zDSVz`mQy4T$%rHcHt4C{21=!B_jmeCHFmR9`t-y$xbg_GCsz*Ky0Zlhm#l4AF6sjZ
zO>SxN_DJ=od_I+BcsO;M0UBOKR0Iszr%|d%iVpt_)n4gU7XEhSTe!!)`g8k$O@L(D
zwCqn!*9?GhzI46>Y{%zh&(wh)E3K@YD$7=Sv{^s_&0DHrqsxh9fNh{UI5zUhP4^)g
zP*)&Zmpe3x{ft~38_U%<lGpvdN#uo}X8{&eCq)ff`9PvI94Xjs;N*lYPb<GJu*_2J
zi&3Cj(Ok5O*&bqg0t2eCN`1-zyWbAm9sO|~UoMx_7ZW!A(4!E*)o{|@&>TE2n5^I-
zW|2_A#`yuA8=XUtEKSQ-*z;Qg)N9S+$_4TCf`CYs)%NOzu-A!xHhQAKT%e^t%+wDZ
zEf!eewj>)onb~I^iH-hE>zQrrX{37`Iy^G3L(Z?x>w3T}pFixPq)Rm4O}Q-bwcw5A
zYB<*+8#lAbH6z`ed+>XRn*FFzr~X5&fwpR?A&xYh88M;)4@~>iZ*Fd+owte4Ot2kH
zOp`J*{MhKrGf~Qa_P}%##=x<&6p^?-!XoG$D=+WxIxYuv+Kth$FyrojOfk86d7)YF
zWWbd5K;lXb;zYUt7Ae&1-#EXGWo`AinYwgfInShd-{*>}=p}{PS(gV#rCF2~Wy6=4
zxoQ8X9#}>cmSPSlT*>=j0*bV$$=I%*-~JjNWNs<P-b6^@zWL?<D?T<$D$2T?j4b<i
zl|C>5)9Vd=)?RI{d}DgmD0)f2PvNc7yhc>*U)=m!qa7r8RG)sw-`tqqQw7DyqzE^?
z*tG@wCWr@gkXjy_1+<-2rqA$KO-i0i&ZUkP<L-KrgNS(}&4<*O%xE7ZI3V?=(*+VS
zOWrqn$KSuvyZc(U5yLq}dRmEjvRcl4PV~|j0sTwoXU@%OeDzatRTP>f0oR-scdb?$
zd~d1MFQ^CIn={=EZ(Kg1de4qG(5nZ9`a{;9g?}taxaY{y!N$A(<f8f3^J`7t-k#s&
znNj*{hT;p!0PkhANVIIyJtKogy9B?tyL!I&7LQq?Ji@HZNmBYp00Fz?X0OpFJ+8fD
zer?dUml6d*KNwpofh{~iz|(o4r#vsehpREQsm}-5jHMpfG90N-rjoT5WFt5t>%6l<
z-7nKCU=BE?no@_bSiOd>%=Wv8`Wx*Zc@W#M>#rDfIF{1Gd2!XoKu7uv_GC=81qm_|
zh92ZUrjgPxhVs2)HU{hxUn5j4fi(;b+LfiL@4IMO3dxp%MpW-H8rcR|#j=HEm=8I)
zUpd=9r?Rni@zqpFz=MNX-2eq*?Gyo(h|%@O=$9{&U(csJQQ=ER+G>Z@0TlC6S5443
z!SDU@d7RS{fnVC}W#RJ@<@WC4&ZYV&W4cYBMN4dOhj<O#a_#ok)BWm=N4o4Oz=_Uw
z<-ylkzl&BF-5t$x5J8_iif`KQeu%#+mHi$Ud9D2umS1lqsmgn&w4J6%O-9aUnEUMH
z&e&Fy&_)L>H>PD#vhiwL7?3FLqhGTOC}@hMqLxJg^0<_|yB^iNWQDo>L%rlt;IA57
zS{`Tf7+a%jbiu^+8~S3x3D#%p#0Cq03aV0iI96ESJInk49tvFI(?^%id`;w6vAe46
zhP1DtqAX<?GMubb@fAfailV0(`P%oe;=d^!pWM|yrV8}>2|UzY%gmj)-GqYt9bPic
znML8&T$Z|UU{izftVnQqSfFFFVWD>|_gL2^eNOLF;LkfAh)m0vP^Q4wpL}@K*a>9k
zElmu!y8-XRKD0OVNFr+v`cR@@oe<uUoiFc?h<ZAFAl`G(@;;0sIn1hjaba8y={^nv
zkCeX6`MOn`&_U$s&nbXG@-zwi;+=@C_bt9dMGD|ikn)YSqpywkp0u>i>xYzg=(kD*
zU>H)n{gL1^^4i?TrcRNzXit$|xp0rWB@<z`uwrH>i(&xI)y8<#$r>$g(S+&JWx2Bd
z)Pv({>T7gLE2+cW@Ci9C!w45SN%HfzGWubNK$}eXJgpAgNL+rBYzI-+-M{soU>k9H
zHJA3|^UY6^et?eetdd-3GL?lPyD@wl?-W|=2B(voc_<Q${z`B6uPH1WiTg`dKP>Vq
zOIIiCw=w4MFT57B;PyMLNIz{pR6w`?jW3pj7czGnJ_OYM!g4O@9!dh0?qw(2XBhly
zVL81$wojYdkWE^3K~BgMZfajg^n%0!czuGGLnzc#u9+eI**oI(u1y9`rBj*)Q~m!H
zOW4aIohyU68y&<Wr5t#bLa$-0{IR^kMEx9SxomLXpNi|fn;m8I`@mrf;$InKZw1g^
zY$Uf>qYr9TO(!XHSNr>0p~p|AQ(WR7SX!{WuQd3U#&jX&Y`ldYYy7P<PT8pejZvie
zOghm?W4R$9gGOuA;sDe}JWI)vNI{gt25o%GX>gDp)v~V!yu^nmzLJyR&;H7aE|V{6
zJ9iS@JMeMkk2#FgJ?1-QBYzW*dWU$N9R;Vs%M>RSLig@+tyyLRgu81R8DnB>q-_Yb
zTDV{}-klTf>V+eyMJYP-YjoAlk=T|*xkE$!gXR1D=EJl0J_vc=n4l=+Pg3-h1>edI
z;&b&Lr`2F{tFM<&VAcKJ*gJda9aZ^k=FB#?1N1~iAuSlw8rR6-GMrHOP|lJ?a+zUX
zNI<f4;q#V7N`B+{hH3Ayv6$7o((bA+>4UE{@)h*v1c95XIvZQ=R|n+3G94f%O7X7C
zzSmg93EhQ9j#qU;<kt9LuVIPJzhCGm#QKbSn-xq-BG$rz5)zwP;iP2p{GlQD`zpgf
z#XdGae+K_((l$UE&&)g668pn{H`(uJn9rQAr9Sx`_3}cGaNo@gvw8p16$$-oD4*?h
z3l6f3FGQH^@R+ohl65Z$F}|h_&siIc3LDoOB9|}<BrQ{TrlPn0v$;~GUo?FwS9?s#
zNp4Gt2zZk8v}Qoi7_TOYQ2SmHD(tm<+q(9Pj7_9ikKPOEOQE*tn&2hrof_ZOZCz$H
zvC?{_@YXzuYNx)$iX2y6WJDR~3LTwS@@%n6e8kT6YQ5##Q|tF6`ujf*KQE%MWbHk;
z5?%2iRHe6YhEu5=<(<%fJnSqviliBG+wP)_CihL}s`ea5B^FBn-7K8jDW0s)-eeB`
zcxKdqcPzDlhf)sZH}$0lSAB%@jR_xLDeke6Ae3yC#~x;D5DvHZbm;4+We@Jj=(#G-
zPZ_hPfFDMFjjO#8XA+bREYC1R26}7xO4<)0bQv`Pz&D;g-UwMK_fX*isN{EW*ihm;
zh|m}tF~|=+XFT(O#F;L@60#KfD($JrZhWDd@I84Nfc!~!B(i^abYw%zjlLpo7gCWW
zpG3{h%!vqe5OjK*9T;o=qma%O{JwmmVT{17xRV@S23*|&N_RbNhr(2=r|1H?CFyt7
zI|_QWS4>3?1m&N<4VDc!suUEI`>k3LDjr+wA~jw=d8EbJ&>ibdQ+(!dk6fRx5wBV@
zc_}_ZpVeM3R;+V6O<^(CYYiuzeLMf}2AYs1k`58M5)qsyG>X4E*&ZyPd88Vna(%-2
ziprCCgr!pd_)vUA!m=%P<_*d{Qdhbn(uO((vYG{thnm{svaelQA4IRYU;^!C5dWA$
z(7}-eNy$m4;P3D6xIKx}M}_VxBfDQ&Y|TTE`OG}4%k#ckTmtXgW$aFOy1Izk)ixRq
zUa0slYib%xM4ARJylsAHEK0NC#5OrIk-p_p$tToMH|aPqE~nIT9jq>$Mpa5W(Pri0
zx!jA!ZmYWA3E|hU-U!7_2C<}nG05=w^@>p&_X}edRWoJTAJN@q!fmQjUaWj@zh=97
zFN1(et}3;adyOzK8EYSNqJ9641qsDyTIx?W)W4X63I@$hQAXSTF|mhbf}v5jAX>>{
z$|loV#;O-l0-{7jP0NPNI<4`nX0=+w@EE_2Tar}i>a%XIU!V>pk2L{dalj8~>_8{K
zWxLVN2xrDoK(%J~Sc+}MZ@Vp_8BI$^%PvOhoK1GmI?`)Ywx+bFSkf1%e*4D$qtKG4
z!P4f@<D&ryGQN1rdbq>SG2!^~*(KYy>=trY%rDz_youjlwAIf9A}w4>Nhg%zn<(Sg
ziL(xKmZHG8+A0QP8K4tUQ9g75VTBV`ldSJvB?dDXa~QmFGOX0rh$w8SC(T#5QV^|D
zxG(!?y~*Ff+e{eUnJJi3fsnx`#XH{6xTU3VD@QNp3F`9u&+*p_;;(M(R7)AF8)x#d
z70QOLP|<JW*ZL&L4!JfW@WRuP<JHAGmImUj&*6|6iGaSsDLNFyZC)%brCo-M*zB+}
z-4Ev)%AD)3-K_)&S3R1_R$)?_n?a8dL*&xnxjip<W2Twf17F>sKX3Z_L%$qWHuure
z2oHn>ty@eqqJ2VgE7tg%tLn!zElM9Y0C}UY9h}(O?=O{7_4>G1hL$0l*n;vIfo1Mt
znOrnU>a-uWx`n6tMiCUm`xjD#v;*TVKJb;$F1Zc|Qd%mD;Qp4;sQCK(VRrgCd}F}T
zHx83+^|Pym^bpsFBbYU3uxg-PHh)S$APip*%LvRw7LL2FZcYI7ItxiPc92i423!jX
zc;Oeswpmtw@U^~HZK%t$@zWI}QC^MAM}&d4=>_B5oKpASAux+Y#$m@FNINOa&CODg
ziU_J-oVpKRvM|!Gbx-)ml%G-Kw@s%kn#Cg<07iHsJYNN+hYFV{b{K{!)Nqlu`<8mK
zuPPY_2{iD&+}35m6~I&Ix@e+V!}n(a<Vp7T)qou^V;QM0!7b*ZL6Sy>i-ND_OvB%b
z!WeHkuJvX&50A$t2}8BkT(7Fx`Gb3KLoX_#^7j$o+C#m#SJ}%0GQ#Kf^JU+^hHAPF
zd|+c0d@1>}Z3NwaN^lHAu6c8Y7p_6MP-{vt%_~tzs3GNt^D+<__EcK)Hm`#ryHh68
z79!)>!`dPX(QrH*<MX3}sZC?Im!0bRd3%V>$ifgX53|pM&byXe!#=<F`vQR#!WDXa
zVJwjhTXd1|j*-5~n_IMs17vKEJ4Y?&dSw)y+$%BBZ`7U1u9Z)ew7`VAyLT5hQa}EM
z-O{)7bB9DLm^>>!2aQMKj8}wmQZKQr&<xPwq|&$X`k#vvaWRB8P(X`;k8Siyz1h&O
zox4};7mp?enByG{uf8*c6t1bK(xR7F2{j(oNhXM<MoGxp^<Gu&ryfH<Gr-)c2X$Ud
zD+J^DdNW6f;f}f|iNxD$JMasGd#EmjO{EW&Zjwlw%?S{k@;2BRy*gDg9jG?ZH{{rQ
zi934EDkQ+Ltg#{1qjxm4j_{Cc7gPK<J^fYj-wmnJa^i=9t**wLs1n*wAGQf$zF>aD
z73ruKEq?`h4#aRuMgi@e$0j3vN3?!RalJ<z$*s@-`^PlTedcz=+EE1*XtavsY|6`9
z$z0dHmg`y2r@)a<myUpbd4H;vp`f;wmZ^|BIf2BBoKSxM{2tW$M=&znkwtFV<Dh(Z
zDBM!ruSM(1`T4!W33m<AktKzuJQ~-i9d)91PpT8zfo(a>ReaCJv<=p0yD#_iaJtH3
z1XlcJ&JbkBZ4QcQYSCE4J@T~dp<z?zqrIMkX+kY6C_?s_Y`nu0uS}8vq9%b~Z41x(
zVe`(B2RhqtIr%X8I<wW3t#^NU9<uYA2|^lLo-YyMojWqxVk-Wt@BpPR2cb`U*8?U!
zP`7KFQ<^b`jwXB=Ej<GLw1=tP<N7VH?~fbQozwm{9PKDM0l#s2;@#9oIV{Yvr2FEV
zGmS*ND$Ozz3z5{!6yeLqvj=(oN8uP+2_-?sT_e8mP5w-M`uJ(RY3JO~`Q%z3iz$Zk
zQ-ExS!$Aks=G16f1%ziLuesZ#48cIoXuZHY+U@{*vh+0!8LeZc*LEKa480l{61uj=
zw-5{tGWR)go1BLoJ3$rIo%L}x`m6^onMi1Gb8|Kg5-bVuZoM$?p_8g3l^lbjX4t5g
z(m9(W{xL1eMo#)#t<=|AASa*)zFYfQJL6R)+>=9s#X}jBi>WmKXS0*VJ9BpysfLcW
zV(5jKWx@$jc9}>V9for<F(Akz|64(vZ?yzXL$K@He0=$tZ5GCKCvCx8ts}=>t^58P
z%Q<mH6lX&%1rX8FVp=#m?2?lm0uZ4_Gr-6UhE)<we&o=$oV1AzXuXf}<VlDz7higm
zQT<EXcrw0UwS)8E>%n8oh=k1nZbcf4$!TuKV6f93@)^B1TG^>}tJJ%0LBh{?DY2?l
zc*1w$nWGiFk^1OOR)52%3R&TBv<q{%U8u3{@wW?(Y}`y&{+y{{*}w7_<5JeJ-#yf3
zm2K3Gnh&uPj?|ZHE@N4ulyn;0DJ?7=EGn6r;`6i9XO;|t;zLEzs|UV~*$r;cS3gQ9
zfw1s9vU?T_rJ^unEq+c!ZK7EcG|+*<_;yXjVdJEOMX(n(q6Y0=KpG{}YO-c<p(wsg
zY-0$s7Hg$YO5657CM)#?fs-{UrqHY8E0VtXHi1gv{d<7MLd1}v!R%zUZJ_wo(KPAI
zY&>U8$lpWS--$HCqpk{@DB4EsE^c3G-L-#cCnfUqhVRYCspdblZ*D3%*aK?boc?l0
zNY#{NhiLZc&R=O;eaS1x?0C_&jn@&>ce!h3V{r1CE)aMlmKCFXAg()L?AiOb-)|Pa
z7?58b7W(H-g0|3+riTOkACt@aF`7%}zk%qYb25Gn^9<-&V7u#*q)OKt6lgNpjq>Io
zt@m1DOYLY!OF~~@(Z2VbHqX9eRKZx7k4JVJ$n^f@NuLeDLMSsL0M=e)*Z%!mLx2jQ
z)wOzMQf@xwsll)xX>{o$GY~N<jl?CyHe**yie^^e)AgPMD4qJSYk#|@^3O}Ct}G?j
zO=_aodN^`ODN@Mz#|K&D0Iz=2vM(Yz5;G7ZC(yf;Vb{$jb)jk_|B?xS6d}~0uW4A;
zBQug(DwiQW@g-=}zW*Q74^p3Ed1FY|%YbGHi$xI>7p4h`pB-)N6IThZsWHn4m|IOh
zB0lZURk!f_7^a)Y7q=T0CBg?6cmAutYw_1vOl%O%zr3<&_U6_azcSoWW?Rx~a|UUo
zKgD*H_(o)Hp7t&C@!88iHBz6Jrwq@;e;H};Pji}WJobT^0lc&E4N>~BpgzcVV+-fk
zg`*pl2(7m{geE~rGGbKE2q4Wtnx&U_r~6tf((jGO_8959)#b<-TR&vg^C@(5<;bMh
z<atX=yNf~=!!?%2h<K|d64h|Rro1*gU#e_Ifv+)AQ&`*-9WBjm<&6gwN0_GV!5R^w
zqvb-LHu$8Y(+6_k%BPh<N1wqPk^<}6A?XjyZ1WXg<k*!Q>8bflV1+%lPrbIIZ>T#X
zT&8#$!hN_em#>-gs~e+n$G^*9AVa5+`x!^dRc3;tci-u_{qhMtYX}Gy#+Kzhw31bv
z59tTE{E|$beS1F9_Y>5pkm4WU_jbS%dW^@n@T$!POMu_^l~4=))^Uyiy9s-EsebOZ
zeW{iAL}AN1eA1vB_wL0*#{MSPJgW3{Ys|c4jDKF?W?<bcpHWZNM2?{6@hZeu2sUJ-
zxCXIh;M|^1i3Uf_aE>1Df7>L1u`S_d6=_B=y1RQH`$p94v<YXJ&rK=$dr5TZ_`OUk
z+7{cH&X45}1uq3{=^v5{YM{cBkjZJ9NKFXU(5uH@%P5NXW~iliR!#m_lkC_ycKpo9
z#Bw6OM#PEh@xs-a+`ErT4DM4XrL4rl;2^KL80)j~2C*N%J~Tn*)nbhBRZS{Ywvi*t
zo{`kZ<*@lp-i|)M+ULW!W|&8DLJY<?uU;8^CYY-XDjLmc7e!$YD^M%^zR~_6%VS6s
z;aLxgt7oUY1n_(h_~W*~djPXIFx!RCjRi`F`i3`#Y}(l|h7ISU9yNZ$I2%D`<jW^3
z@N#UY>7&&lK-=BJk<zXAnogc-53Iw}!F}~^=X_cjB&PP9i>{plaXhoBNNhv}fkq=N
z;G|mUv}@`Wc5zMN`=K0$pGqdU<P!V2ThuXhJ|m2@G)=HODtaT<;aBGJqxI9`!lj0`
zkBO_R8a(r_jVYgmvLV~t+=&%BAxmAXA3TKD5!<|0x*GxU5yu;-Tdv$*4ai2PPukXv
zV9${J$72Wom^e1r9%gnf?Q1H_s8WGDx5XR6e88KVXggM+rHMnm&F`WibA>UG)4`3a
zeQgBWcp{eGOR5<f2g<<0XJeb_lHr9p3+H0UP;|@^^5kqPAEz^q`QCG^8*FG6Y24+t
zvK^ws-VtrUrDo(RmqX?c2q@JrgLczv4}?Qj7NXs7>zHq&)s4s{Jp!NZ&$f7~Z(v={
z;lake_%roV-73#l)movFx|%~^AzUcch%hn9vP=z(Q*&FgFm0XjREM=l<g#ihK^rWE
zwyS-o0Zz~XlffDy-%qIwgP^Qj;n0_coV)_>kKfkuw@{$P0x76jUs3j?QkyXu+h}PY
z@Uh(@zbqaSuAzKD?3Do}{SZ~rRF)tB^o#Em&t0%pw9BX>>(!2A8L0*Xz#iY;%Y6Qm
zUi)aSUi4^;Kd|E$&oxBiM!SDQ>fx4?p-wosnqWDxHyX+n8#?@^Qt?mj3oYx30i;e7
zkpM348!N<lRIGvT`Q8g2=dyE|BM34Jnx+(c@sB*ct#Y>f^7r!<;*n*PrM*>pMI+^K
zk&u^(C>x&NfqeQqOdON+&W@oY9mp9GN~N}uKj~R)Q*%dgvW<5IdCsY_r@0HKJtFX6
ziuHq)ld8yF#2cG~%VQW(h<zvfd-U7n+}ydcWT>bHO-&hB%-}JfK-U5T$NK_B0=MEW
zTCfWk4wvO0c+CdBPCnDcbB4%co}%Pd6LHXm#n&brOzaT{w{|eYZH5zI>eRcjB&v_6
zubmfS^>p}`n3?VqfRKV5B~)39|J=(5XXMX3HxMutIb4Qv22)9WHjV?Bh{g%h!Ysfp
ztE=xXeMEfHiWCAh7_`~4=j{~ZNguvdFS@+fe@XwGkM`bPwecR5YJu~~ihMC@8F`=$
zRm)4K7{w;gu;lS9C0z84h>Fx!L<2o!;*!BnCbqUs&?Q{=Mryb!m))*mY$zxubIbwS
zOyv8W0BJC@s)l$@MhYRyu`(gl5W5>^Ugx;-+;)o+c(wQS4O3GP|6BfoP4Cc#YG05i
z?T~6&RN)95t<Q#q0@fOk``F#!S@OhyHMMaFIUXQzX0z+{aTI$_GK|xc_tIRyJmW7H
ztXh0RZ3JWK?qSW1V~gC>$}TH*BFV40YHpNtq6DQVTMdCcl&2*n@niny&9AK*@d0>*
z{p;NCu>N+o%L59W1BeS#(QBB|3PN)x4ee9w!BTtgHIc8nSctTMHnW&tS%dQVIs`*#
zR%VF8=L1ua`%x##^J`D)^~YgUV_QXyJ*E%Zx)p0P0MoQ17C8`0Ms4%4;|3Tw>R?X5
z$r`S%1(eNK<BYAN23X_!?hmC&l}?xFbrsdS5epKVvpf7?k(*sb+Ok~7M!$HWQa;&j
z>9tlGAX-pJp7nbl2e)L7-nWv)AT>`m?yK82KCvl1pX>dvI-?>7D`-H-r!5U#C_dcQ
z`2KEGpV;tjB~WxW%H&U0ya2?kymSusGY~jFNmsqxd4-6gnEVzuMW*Zoiw*5U^d!u(
z<mj=i&3D7h4O;B}G2K~Ik(}oCIFdSb$}a}bXlC0%zHrnWsAaBvI`dzF{ExD<#Tm^9
zZ7!d`cE6M1)io{q+a*EHhE-#jHH@>Xh$>=RjS^WX?#rig*(YqOuf^h@w!RD0<i1|g
z>}6I)me>}Apn1(xfQ}@J$guDTv#}#d$Jy&taJ{*?1v(hKte+`t!~Ui5x&svk;GfCp
zK?+=u5&F+l8zN?5H1?}=3Nlh`cjkQ_Kew?df(7oa&XXAIvw~7w95#chtm6Ab)nUnC
zT}Cm)JI`b8wk8$zk1_XtZq(U^!o8zCuPnspxNaz!O29?AA{`p_3}Eu`oVnZ6ql|n1
zpzpT|^Ec#&o_ufdt6ZwmZHZpFZB)tgV>bH)a5v%Ml{CqQ_>=s%42G0_(%9g@6x+S;
z!mN?D&5UB?q!dOQP4XMJN?C+>E2~v9L{+sLZQUP|HWNO|?EdMUVpYANWW_jf$<I=Z
z+{o~DS%)j}v79cqZK(reuecQqGqm=0r0DxvJ!ijrjahq>V|#+`+o{ZbKazf2*iO%R
zJL3D%_A%(>EG;o^axhA%^H`y6Bk|h|S<2h7-zG%M;8HT0(HtvddE1jE!SKniZ@}_0
zr#*x#SnSmN)A|S$&WPwK7C+nqxZEFD=5)(wESr<Daa^<A|4Axljaibf^3(_FSH`nS
zJ~?=B!w{e6{@*9*=N}UANqEs*{Vn(SQr>vo65AuE2vAD{{Kd|$4>R_znd$snzFfS8
z_~{{<uAE&H#YltVvoq$CVZjy)sF?0NSeu2dtgmGUT+t68*)@HC8Z0b#@N{w!s`Ne5
z_r8EQ88>q>j}B-$h_K8*ysvXtD5?h6J=Klw%~~CHU9$`Mx{XF(hDPjwG>ALy72qwk
zfX#Ze4%qZaaocd23qgxWpua0T5L~A@$WNC71f6$r+FrXIf+^gf`uErB-J$t{kA~ho
zy1R)M3vXrOX9m>u>_;Wq_jR>k%wxAVeJ9&N5zQ8}nVCj>Xpj80`UOq|JceK<v6sSw
z3%&HnH^M5D(2$Ax@~WZ|%6Z*h$k@>4$E;VT@nu<MbeK+fx#QV@4e>DY+o779O7i*D
zc!05AfR&|ig+$pRALrx|QD+X30daV7;B!9^$dcoM3SAk-#-nBDai)4b9F{uag$L$V
z^G@t2rf^Z;udhlcJ6Hf1d#I6uiRA@b$;WB47quV6{gV5~ls9yYweBi;7-{wQSIoBG
zdLzzek_M;FN30E)LniJ+3w?QxMvI8VutllIBnz>M;Tistl1AGcwZhZk?Ye!MAQDt9
z2pZ0k^e=bha!~I{zW*{U{c*Tpcdgp&r(&03^v!96-PB*-D@u|LWd5-}uW@uxeG&am
z<pmFRn3+#}rP2()AqVJW0+!Y`P0lbp0AmOnfSU9xo{?I!fd=8~3|!LT2X^pCGozAy
zc3PLn?!wA0`3~unS88LMom~3xSQV%~En&y7b_%a9H^S!a3bQ4|)zr&?L=!?%1zk4}
z*%7a@p;S=g(4b700bA+4<2F=G*ZgyrjKBQm5WQ-A#a0=4=6>7M*LQB-XU?yVuOuP5
z>Qu$WluvPh&1dLnbON?m5rHSs|JJ`KP!Z-gQn?))y3FM|@E(K4zD0FYk~66R_z)UZ
zvM-biX0KSK6n9?U0Y~|AI+Uq&E{f7$DHJyed|BF+B65QcE#u7*wtv3RwLV-8yx|YI
zdT}?E|4K$~IL1az#zAHc@?o}MfxlmXGMtFmP`c4VOlTwY=wA(b9f+SUz`3>uPbL`#
zO#VK-T$KdMlEH-ThQE9J$A53W27l7GkRW;<HcG&HqO%U(ki;J}6R(tk%YK)ozHz{*
zb!l1*0ZMA?r{1KUXcU!M*p`Yl`}nG`f7BPKDd|vh-vtk~kUy^#&g1%QZO4YoW>{$$
zistVI(N;zvt9c)yelmWD?n&o&n&@421@r1CHQo48g1HgL9z;KT`MK+xyi*I$Faq|Y
z^Zo0)s$u%si4AXVAQ<XqI76jxG<F=izMx2)yIfOW-TJ&TJQ-n&9ZtaxX#7a_-8HWY
z)=&@P{BL3_)N?4}T5BIiYsk98l0MJP|M*7i>Np~*0}1+rg44<_%-lEL=5|er99%hS
ze#DVI+TI=H*EO60HC8M-aNG;avCj#yRn_a#?~+zii@~tMo!9;1U)WC>=|#dYoD1Sk
zFJ#!%WqeX&wk9W{`n94Ce=M%^(w|0LsG#A>y!k~(R@vW8)V^M&h%<4f+;1(;g2mT4
z#|IaC6FMyC8@}jneE&}Ot|nR9j?k%)Ek%mPLh`QMJeunz3hJ*k?wMTHAQ43G($j*Q
z`-~V#it~gFIXd#nC}PY#uS_aV{5Q&*x9a2P$b;^!)BGUto%i>5S0Bl;La<qloWd^b
z^<3&TX3o;~ZIkqpqMETTO8-*tOtBf!Zs3~X(gtF9W4}wq3b+T=1^4$sLT31BVki(`
z@WSx2b{1H>F*+BE^?v7*&&n}!)}oa6{CoP{7dtlA3i0!8(<eCsa$K&j(Jkcrha-rV
zypfSfZ<K@9@69a}4D*{z5%l`7x9hUgMEf+3?>^bVoUyEYL*T+OFmSW)gPLFEuAe}l
z#(aliZpc-M?>fJxwcey(x*pHK(Aq@%^K<dCtMNmAU{WfkMB|GEgjw-YMxIBVlbIm_
zA@h{vTiGL~^XA%#sSdha_bP*n)7si6m+Hb!=h-H`HM`|f^KXh2+F5}C#T>s1s)&Zx
zMa}iq>m8r=T;fEi!4e@)9Ig*Y-i|S3b+-~a-Hy8urhwBU=c;oqdswH;lZax?TV}8~
zH=-dU8Ib{~Mex*eM~=6P#INqJ*?OWqw*G@+ByYCzS%%;1>O)Zy%BVWjr22^kBRGHW
zj>7!u@MwXnCM{67BK57|C+nRXjz8-!Nhxzw#}UEC%iIZ2z0D@&ivV4T!;T6iv@x@t
z_27JSEqF5fSF7_Bz+frN*lNeRy!#r(=xAa`;JmO|4Xv4X+R3nHrmv@PoswX>qd`pp
zu3vY31$QvUcHX48S4L`>=xqsmKHOt}J7r%M94;&%&Es5Bdr&6wcPJ)UPRW&Cb}(i`
zv`aZz?J`44q=)qyJ+<_h=>p(<Ej7GKpD*fQe&G@IIXMoLBv5iK+2>EPk?8LxHE4$n
z-qVn1I$8lOhhP5eqU~mBy#E95uV$L<mqzIIp6JiyQn%KNK0$k~_4jE*`fypTi(*!V
zC^OU6IST{@lMIKrj6-8-&rJP`XeVSIyK3VNp|=_~*7kObdkrh~<4UpYz0jJ!>)OZn
zfs^ukrUL(coza91s#(PC2K#gR-Cus)^Mx)aKdPtpT^lr5Z)kKIi2|CUl*>r1pkB{A
z2dRA`f1O<^7K>eYu^r(3h@cQ@!^nlJS(K(dZ`A2S%K2EA%4cjLT9ZFlKj+*$FV5RP
zeq7g)_<A1oCPjpyqzm3Ad((;G2u{lUSOt+I#G|@xtHNGi85{hM>A~2G7~H!!ClAYf
zNw}F+Lr5sGpavm_wf$Y0+OMT!^z$E6Ci_1oFK_97c~b7A-0TO3w-INMqf5b(GTs9m
z2>^_^;cw@m+HRKq?rw_6ZfJEt=kl;uWL=o^Iu^Uwx~!TNIk6)&LA=v66K$5VV*SSZ
z5asjEq^bTq?vTM(?qt(?tg824$fZ}Q#dDGzhk1P3yY~<(|CqGb2aO$5ZHW=fp|y#d
z%Ebac(i5r?JHGC8|3J9&a#tunW*6V+*!j?Rst;&+cF)wBqe<EH{cxn4f2W}8i=7#i
zpGB|VhVSpGITcbcx_7b%gv(At4R4psg?+N4PIEDSg9ByEdch-$52F`xLNBQMkxvQ#
zeMEZBEPP7JNj+)YU8Lr?)Rnnza4F6|bNCD+^!E^!(WJC!DFUl;U;8j@S((Q0aSz$;
z)Fx`Mw{t$YJoETmm6+39+SYO$1ZD=$ofDx>texIDq?c*9UVikXL(UN(+dVlNZ6geL
z_}~H)L+L)l_=x>C`oC1Wuh1S@)(n{kx6b|S*8K=}`rG6jNd6sv`}pEkpcM5Ig0#>!
zFbO-M@yK}N@4zGUkNOP};nmfbUbVS;_5ih)_Lq8qehxl|jZm&|?M&i;`Y;|sv#h+k
ziG3w3{VVrf(U54kbzXdJ$tl;ji!JiKTTwCo${5C?;fVTZYiwV&vZUcyjt2$UCoFQ}
zz|n76&%FJn&(M@39SYI5V5jJ4Mnn&3$3PE&$c>I_cDkt-BiH5PS{~(@2ymnx%z=8I
zB}f0p+TzI+FgO}ZBxl)4F1q3gqP-LSBQ2l>6y1YN<eAfdZda#e%p|bD#B}MenpPK-
zyn<H77TDF+0JiOHPw1G4t>~ip%*6gaFG9%DaijE$EW`HOk{gk`ulCK?HH?oyDaU%}
zZOaF<hBTFm<A<XR%kB%XN=sba%Dfn$;z;9%h{cAGx=Rc}SxC=t55Udk0jdl74$Zk#
z%($sh)!<WrUSrIz_jQx8qk(MriEQftv+7>EfYaBhWMuhQ&ZU5vz;f5Hry1wCF=08;
z7NzPrrTDj~o#dx<v7utCkXHx%8~M8<`}hqX!xX_E0S-|L>-I&DZ${`1#r`lav%Bwh
z&T<sm9qU=1t~@3c<niD0b1_FuzI_TZcl)Y6X3Q|(o%ut4IJ-@cJB+SXvd@)j@fqg`
zh*N1zq$HhRu~(auC+_UWvZ$rTvIzLL4G(+n<ot*WXFFTv9lOHo;5H)j4C%v@(;-cC
zBD>H~o971?`Gh>8txj1DkdGuKyA~WBx({Fg&BDoGt+w*s0ijpuP;KYX9+vf6K*f8o
z8ms2j(uu>xAld|$wzL*W7&y6R9G>c_WN_vU=6$@Xguh>6@w+d^%E@;Jw1J$Z-^Dr<
zr-ubO3*Z*vu356lwR1rZTF_sqvUtCaskiFii*+4!_2tjQ#C#ebxmlUv%+NIerNW{H
z2*f;bK6KL2u?s`F^=qmbVkqC<Uz0J)nl)yYWJRiw#};5CRgC5RsTSj@Akc<VjYdT<
z-O}w@<4-cqtiM~E?WwJMeHW9bNHD15OdvTR*IN;>Y~i$bGpf=STdF6n1#K|fX`J7y
zI!+`StW@QkFQA&5$hP)yyw9CHZt55@WiYeMN?Dh=Zc@38$eISzOqKiQt+%B|KSME`
z6yxMWk@uGGt{^)KGDkLb4Mf5`?iy%pZ4v~kk$fF+$TT0ww3X!l4zdLq8CW4;LW!^W
zUv^c!oy%Xy(Lq?IDU@D`abq+=Y{YCkI>aGt2TN}e5x~&V$Ec8?kx8}~A)5UE5%*S2
zZM_fJE_H#HLUFC&K?=q7UqKQ|a7&Qlgb*aSw@{=&palXg+F}7h@Zf>s#fw953GVK{
zH+yE!?3w)q-lKJtlN_vPJ-1wUZ_s}f_o=_=d^kg49g-CJM=flaHO0m>!<GlKxxVSG
z=g}Q<r53@TCL4I=d<tcnJpGZJdc?9iY?6~Ry_v%dvqQwvHZr^D-<}U8Z!k!QX~UXj
z0&j*XCk{tVH&=?~P`wQ%+xicVeBuceCzZPM!4+B!ggv{~p}#Q@sPn^Bh-64^Hxp$8
z7eZ^j$~C{~YDr3+XT}glF&aggI#TbjOt+>oTFH@`5;elTBd3^LfgdU+EceH-Iq1dc
z{$Xa92JyeG^h>riE)q+f8G_#|yOdu_Pes2vi1G!k#c@*j3sn5fZcyYMS(-fY=x1NI
z90{i~#o?%)+?9S-h(z2sxx?HX%bPEr=HJiG9TZ8cS5qsojicROz*iP^b|@WH#q7}y
zziJ%+>F-mNZrEIX&rGgJ*_`Gfn$2;CQRlJAy3lcgtG&`)#P*xY-LO!fSI^(}2jJEh
zlskGF?E);9`MLaEww##Fxe(G1gDmIRf_x0>s~RiX4VE<;@jKZGIt@5sln}GWYiC*k
z?ZC;1auW%uVhKSW<0NUh_Cb%VB<*~rwrS~tP7p)mNv-sy)I7fdFaA{3#`_z<4Zb^i
zy)TElVqUp!t`*Rt{%+j<zk?fGr3GQ?Xk<;nL#-r$2e%GhEjxu2pQHpp0Zn-W%*c{L
zy8J=<^N>{dX-H&C7a8yoDA8;|Qhu|=Vor2_y6m*?s$JE5sGi)k8-o^T*PU}(8h6?C
zXKZX&-Szo?`r)<PCG_jk(n0d542Lnd_eWE;0|hHk;V5yUeb4M`QL^bv?pB#lDQ*!u
zAM2&!hw0q&LX#;I-4PIV9FFPGur$eMj&S!*^76qPZYJZlwoEdxfnlT8UAsM^ZJWh*
z_o%Fcg?dJ=^VwH@|Mrhr64?2krv&cAg_pcsXAF#-4`Yr=CsubPvx~dS;;I0?uZuG1
zZ9xxo_mn8j`qPw-4)|Lqp|yx0$6*a)=}k=Spvk}3cCs8VAGGTzPm~I)@KfL`XDUA0
z2+(cyRTux!`fcquUe5dGUzL?=nu3DB?dgZ#xdXuZO-z16#Zg&bb3A%gLa>q9BuXr>
zt+T4zvtk*D?}eG1*8fM*4(7u<R`D%TNq$U{kj3wOvzirY*<9_DhKyuqW(Z>4+TCCh
z;*)j}MEjt}PHdeP3nEX}w)<1pt|e;Fx5^o<mP{4?S{jE)?oaj`td1OOb*>FY>kFER
zp(_kpyA1Y8xGg!4i}WI;gt3JfZmd^C(-L#E*+J`)w6eh0_L(CSxnCpbk&#sB#LS9Y
z=Bbo^T|&qFY-9s(sP8T8_!>*v7m$xOfOr+QZ9<`_pXBGxkx(%2T5az()Uyt}`Z1QJ
zQiQE4*Go@>I?TFuYQFj%lWGw&#}<Rn*n!)e7Pj1iqU~2Woye6+Imv|@D8jxrosbR7
zpmUf0;Op7AyVB;@INE@@i<ys_D2+uws2a9fRTXm2W4)~;*u5~?Y6m;e$Aq*h2PC`U
z-sBL@5D@vk<8p_K2Do3t2%A)RYQwc=yD=LPHrlNB$Ig8RjtxAyr}L^daMQjUUfl(J
zPySl^lGUCZr1<>wU6BSA{J{M5oz{@ND&mvdazJ|X-p;g<{~W97McDtCZHjD?4KUYx
zpWAdNXL9q$V$%xe0V3x|90bRl3H9)5bGpGU`L|tb!;Rvv{FT47FOBG>r<UQqwx~Yv
zChvs4iy3noKqikZBz<zSg+hC64_XY1-otDbdL=LZSL?aLSGw*wdeOzXfMTNQ{3DkF
zGOoOt{VUB|H#LQXfxK=3Zv=T(yyU@8NS(4+N<WRtZ#IgSG!O49DTzwh^H+*)ULK}(
zSO*)i3;{v>ipF5tSOc6@;q`cx_hm1~ar>3(3qQb*C6r#wj?>cLY0aKryzf$Pne{$=
zSQeeVqX|WJswW>$6v35!2)@wGO9rd2-avIqv#^)crgGAj`5@7-#%=?es06dJ3L<#2
z3TDH+jQ*OcMrP^C0&2_x-%lU#?9^4WZS%7Fj{O>p)HWE(orHRLW(1f;>BH5|PVNso
z`JXq~l6v{75Dh~>GEV9$`TgEz^B1|Ba~4dwfmY4fPkK*0OU_Oxd@S9xy>LfWQtCMv
z!+>*a1>O+fIuG#c+$U=A#3MDFro`du*hy32niSzjf1Fq!Zc)(IQJax}>lR1dQ{YQ8
zUdUZQ*ct5C@O1VZf|w^BZ(*Nz$vM=Rw`Pc2tyjQSkCz*jb3$ofEp30G?{w1pNJ@~N
z{Qig4geFEsI(L-7K6H3dC%KKYV<b$Pl=H2Q3~5LO_@o^>iB;7fdg(4}c#M~^tDRMA
zVJO%>r_QC`WXxy!--A_E-=89+kxN6(ZEa0-9bt)h6o2aC>J=bisHs}H59=rZ_cm%c
zY2fvo?gazS)R-;)G>84UnD)w|`DU0q8gD9LFw|Y{D_2auX?&J*vs|7u^en?K6?+qo
zVAWfm2@vxc-$w^(fK;~B-G}iZP0uciT6iBrUAFqghYfny;AUf<&bGyV;&B5L4wHk+
zw0^`j?tug0DpTwDYrPt1qr{-eU}4JtxPnz>Tw{NH<dEjQ*YM8c%rO4_!u04BdM78v
zaCAdMq+r9#JqChOP1ZT}&(LtMQm^Bxu};z`=79<=V_sF2-H9Lz1%R-9%XgQ~yi`zT
zzTf_1tFJ%7!0vg%zRAX>sy-m~OhTMNnbWphMM~ffZH^(cZd@vD%ut3{WUyhaUk4O2
zmf*|FuBkjwfeql7d`}5Jdu=8?)Bh<@pCWKwVf^fWltlf`$1{>V1kg!B@{$25v=Bd0
z=bYu`)azIb=NO@SBq;whyJo#hGIc4A%zWjU)J)o(%&c`<MsKqC@A(*3=DS2I8bx-<
zR?5<%YG3_IEku<*rQnJ%r0k~G(Li=`WwdYOdINOLjt=p0%g%=MH3L+p{&d56hWQT%
z1DE)#9+vG?{305hOggX}XlMXa75YPiyoJS0-}*gBV2r%|ecSP0$oEn%w9Qnuoo`R&
z-#`o2&ou$}_1#Z2gy@N+#9}x$FSjT;wvh{uCj#hGEe>RzsWV7F<ol_`7bOfrxybFQ
zw6(4n9tcDir#bOXpSkm*bgd$Y^HOEE!E*Q=oCxDnmu<)fVFa4k@9cRGhkhjiH-%DY
z-_aHR#!fV?j28m5vmhWE{OWML`)2WWzH@k#lMX3!m?RSDpux@AA)WGvn(DNsufc%S
zQiBG{6x+^&(jX|v-j|&(t$6ONg4O0!{a;yhv3dgyV&0mjl#k9U!{vG?HGhiW9(8R^
zaBVSPAdXI*zkmGsvAlwao$k6~;8bcJPGI--a2bJd%|jn~*NWBp??uKX(xy6Jf*yP4
zzp)=)A$K^kq61^tyjsjgbMK}JlA~U6$mL%t#X2-2G6Qi9ibY@dn{}Hpfb8E)`B$9t
zv7GA$oi)xc_7s1th6S8|L7GL-2dWGo{WT6nmkGP(s8CY|_k30k>~EeCb$zS>3KcYX
zA+L&V%4=HSAw_1<ZDs}Ix;6|7&`J27=i2=F68djNqZ^Z6llL)quV)AX)q-H<pV$k?
z>)hRS@WtX;Tqp}-@<dujnb?Wp<?=K8x^P7aeUbuL{atI)ahs7LpT0-)U&}m!DX#p8
zb5GPWP>tLR0-OUvFW=6m3x${evC^k%YEyrjum})f8pk(|Vu2U(??s&59_aY(<C*i3
z*ZRpW4T!kv;Z|)AmyPMHnBN-xj}U`R^MmmifceRIL<?b=uviFpuhKmI!8{Mmyt(Eg
zXR^VP#c>|mMf2SJ+=}h{<g@uD>9&C6l!S~Dt&H-r<0INIR%-g%EHJ1Ga_J<NH<Qon
zB|<Z#J&H}GSJ`^|Xry(4BY8B=3q3h1>X3@q11Q3_B_rlT^xE(>T0;I(If&OipQ0(-
zBlyO_QZIgNjq&!znT)z1x{$wjI+bRu;8mLoCxN}7^P;%=-0Pl~?l4~KEK@#!h)r+{
zwog51raH$DXX2EP(@U+e1P_9A2A9QdP#!x|twFsUub<{?atz0a?5Oxx>pi~p#30q@
zMe5x4(eaP<3x4}86(K2}b9G~ad5zag*)p^$i-(mBru>fuu4hOSvTvXrS*~NS1XXmd
z80E0F`uVI5`cd=PqoM&El3TE}GrS{OxvbpZ`(&m@=e>YrqVP#nnv~9LV_r?E$E*Wg
z(bS8rJz#E1)$3ubjG53|2Qmo34kP@5{ZY{J3tVPFCwCweig{7yhAIj<^Ah56Lp+1d
zn*5oa*hRCx_kA4m3+TruP3iXH&f0-kxUa)wso0mHNBJOnr2r9mGwx|!&9fJq=&Rw)
zE)T^?2ZY%xtd5XQR72E*uv(<2*_K9nQXH9Vlo6@OyE?;}?gxG9TId~SqAEnl-s{f8
zmOg(ua^$)US#>FW7G%5D?V4B1j-?P}50ICHLTY2B1YLGVWZDYF%}}-Ogw6$<90Jbn
zK2Ncac%Pt(WE#5(X^U$Mp1)uhKxZS-%kt3tU^`BCLeaEeJnG#ZrWKnvW+X;=f9YWH
zpP<#gc+?B?D+F1(OiWW@o85>rbI^=CCI&RDHlHK0pS$LklhG=L*pbnS!on;dci+PC
zvAZ5j2%|kweYST#s%X|n<B$qLVl=0|_$0>Rw5#rSLudStSRjx!)m8H4%Aa@89V%Wo
z($N0x5ss_MCTobfLo-hEu-Dh7|F4rI(T)^(M@7#(p<@M)9d<9-!}50>qqut`42Ndo
z0{b5@963m37)#jo9{v!FL`w@)=j-YXnO(s<sPG0_Ay?Z1et=$N-9;jJ@33QdOMG1Z
z;!e0`6-qD23C{PG@59HU_}pr>xqH{fN&nL{jq_hKd;43b{tQr~k8~QB7bLXrcpRm^
zT}G=EZD%eeMs7?akIi_}?#~3HnA)TZoy0`-IiVIMeFM4p6KJ$k`ge48jE4l})mzn?
zLBzDz(-rTe*4t+>ztY;fCiaB;-k@BaDcL_cA)>E}&@7^Qdrv2g=3c@0ri;&f_}Z}B
zIeXpMG7UBkA|3|HWf5DBKRIB7J81+B<#kPA%{}?~CN&^YiRrQP$AsF0O<e9+@#I$t
zf<?N5;$89z=Ep9>hpJtP-@pB@Z~4w(&7RSSN|KOqAvd>Y$q(zCh~&!1-2B`jSQj?H
zQ$uaF`D2b*W*UpWD1Q4tib+w$ZH^T9Ca6;#jbPCpD;!zMnt|AR^1p(1B(?cQ%e^z6
zEI<kLmQe3R$Fieq7oB^3#N!=rBr@y6Td^oCpIKatad^fhA#buxACf!Y&1{%5(}k9x
z+|iLbAxtTnt7n|_`JlZ)U8yn*EYaxXm1Wo8pizz5{d{eV2N)=9HowWGOED&5IW(w$
zbN1aJ?fiC;Ats)RVE|=t`b~utMhOt4<p93C3ECay5nDM=<Bdq*+!6heuZxoPoY74K
zc0YLA|DL2nKnVZt$+@*^6&iQ!N?xiBi(Vik-h04Ogtx)x59HiI727qIhjD~x<b$<b
ze-0Z@XgWka!V{{(gobS(_K4!k_h)SNzSco|fPv}H{b>ObZR5`pAJ5hK;Nb$eE+qCx
z@A?_HA$5`|f_cHl(M%NJFa8{i#=%9xCvsGB?(Yv`)cLY22oQHynZljp-ytF1f{#vC
zTJ^Ee!p!9q3Bli$G4mPV^4f-BD2G#)D}TeVn!@skuqxV)c{@@4VaFxaI8RC8x-8HV
zuKSxj`|04m_>L+)<1s@90iq$tYdY*)08N=6k<ig;`iK46O5lye$}G4#?HKB^>Vu@c
zapuD8y3v8Hg{d~b+HUz6CY5Odjs_2(9DtWRpk%jyC&;I&6#Q^gH+gx4jBc~Kuj&X^
zxR6%ui{T`9o>inKrgQj?g!P3#fCW5wd3Hj&pXTd0T^P0!HI$Z5a=L3tU~N=IbzCo%
zE33$(KdSt;Q9-!BkCA0loN=_%KrxqI^z5Y(C>37Cf8p33z;3YmG0UzVHX(y^KQT)N
z^_r}Q0=2;6S7G}1va`a~pT?$|8;?3Uq#ztp=j{rR=8sK?qiBp$3Y%*{-eJRtwQSh0
z&1ndeky!h5a|expi+sJ6*9RBx<h`W?%;q71icA0D1FnkHBtT-)NS(rAtqv(s=|Q;v
zE{0LRu3kmV+^AJXZi5ARVmX%Z62T}!L`AtSXwrxga#MU<Spz0}jnj#}hQZ9wET!DL
zP}MKD6WB&2%uJJV3oQAD;>??x+w^ffJAp9l{#lK5^^wlMnN+tAeg4BP<B_EZ2`Iy~
z0Ok9kXs$Td%6#tCnGRFCIl#~6$|@{q@i{6z<A>0BNO0>t>2}3c1tj#@SUgk#b9a=I
zlIc+2l_6hApFJswkXhNw<M$(wEgzuH9+jGOdD;?4rc5lKX5bokxjbuw!Vd#p-LSK|
z`HJjIL<a){8?3e+S1#&;9>d#Pt@gYVuW&vU*IiDp4oMInhw1w3vAVEs7o+!t7>-oy
zhDng_5&5%q7To~twyf@(`*rgZ8NWjD|2>rc`}coOiocaK;{X1eQ*ei4`P_EWjxr5<
zIE)i>i)40DUfG~2y`W}MvpW$a<oLGU4(cmzMs&{?zi@l2;B1?-HHZ9afH%=a%R&p_
zpm^fgwOZZw)ye=shD|;UxCATWf>|S(r~le^VE%b(-IcTYZ2mxhq*zA3RSylpbN9;h
z=1Ue2Omo#Dl_AqFGZm9*P)uYxe}M~XP)kEdeq{UHtxhVyt7I_s31jr<Pu?#peN!_m
znwVsmvVRA87zf2c6*vfrU4wubFln0m>}<nt<{S!@L|+by)+;YOy4B1g@gGG*y(V_8
zmf>qIl+b&P#hSKQS-_~^HOa;+-wn~XGoQm)w=?5Ukj2mk!CAWWQCKiaKy*1@HePS-
zT@n;XAlX&;qzoV@j1J5Ev!oW-Ivylg|MZj+?<pP0Qho6GkHxxWsmHp;e!KXZu1-?x
z2uZsA$#mZ+^78j7Ilg&^a@fO*u#5S_YFA#-lX${OD*^vt@}b7sb2mK-c^}1EDBFNz
z5xXpd8ug`c755y(h_zX<ty-^pn+PqXixsl8>eV`o`@W8fVUYt%IPmNH`AHLB$DZ_m
znSIa3Va_HTw9sp`AS#K0gx|F}jT*=~&Md#|`;w9xZ-N)wY(Q#4JWBO)dT$*(H6Pnr
zXe5l)+Z4?>Tt6S-l44Bq4IMtOija^=-R?mF2ra0#FO~+8M0(4T_6T~w{DxtUicY>k
zc7Z-t7}kaK5FmT@Gdf<W<UCzxMb*%|iy*!(faZhHE^jyf7MnAbyByur;muk(NjB0f
zKyC+D%pDambz@L+nlH{xFLg!Oeot!V^@x*|kaelI-3P6S`dU#kNJ9Q*(OJx7%s;MZ
zro>0RuET8x`3##S6{cw`0F9zrpai=Ilz@Xv1}HLy?Vn4~xjxSIib0}EhF?kqV-&<)
zQ8~tvRaglQ6sSv8F8h(Hd9~ZT!ae;x{h`+nJbp?+)<AF0(oKD@)o^MCI?NUs$X5?g
zeNkpXW3Yp2vIN>bdTK%*ubY;W8%?TXbsS9gjD=pzv|Aj>7epIAS*{6u^Oj%M$C0Ht
z)-{L<Mq-j4#`&24luZ!gF8UsoO5kakH_MV{RFv3@TPv}Mt`!M>`F4r+Pn*vIJBXX=
zVd%k~oJ_UoUXmu?1yNAZ=Ib@L{l~e%t7-aveGFk~5_DZQcIK}5n!cl2R%38JhBZ;L
zAWp02G(9^`10k=d9g=qjLF`{y(MVV46(sy3+50kn@qanb3LmN3rEcl;*DQKj5Y>&~
z)Z06*Ezg_p-5fnB87~*%&Wveui$mTJ!V6;VXJ_#Zw~eW%%q$lhqU+oQ^oEx*)3;w~
zo5LjB{bQfFQqvf;$WB4q4);G4X(u!kn~VL~P1hGdj-f5`N1KP+xDK`R1@#)bwU<ke
zcs^f9UJqWtENXq$&WK(omBBZ{D~?6<xDQ3xJtC~cj1E3?+eBPa`YUm>xF-i}F}
zism+--G9x*U@>p^uDz8zhYQd<4%!KcRbk=t%vC6khc2z_D)R5-PZ+^E(QCVX`i9IG
zZI@^1BJ0&bff~xa2KkHxQlQmjz0!XaxBsIUEgIn7t7>a3!%ABr=6dfkEmG|6vKvCB
zY^9d+L@e$f8Z0_vG{Z|dmP(m=6qqIsWVP}k!X~3Y%iOi*=!+JbaEI&lWy9sm4#{%}
z>4%T3zv%oy(Xe}Q>(4@x<OxbYi4<hG9BYvEHD!wn-47$D5Jf%_!K0(U_PiojEU7Q`
zIQZ+!zK*kzHH?>=M90XEQ)ry@*$r+#M(U5hGO-J;c-~wzpygI?;5Teojv&3!@iHk%
z<!Ynjz@%kOd!bUH(*|E1^enq3&R9h`tW^(4c7r=@g&V|D)S*QP(;-g<bKq5K!_8B_
zE;#j<nFnD;B?JBW_QBipY7_wWH#7S(Oze&FnB{vSCuK1{lA7u>>5rxGJ28o3uly8T
znf#SaxX{GWY+6?@xPJF8b9i(*VC7+EoA>9|k>`pwvPOb~M!uTnJ^?zZE<?f7e7XY)
zFGg@8Dy6%a(8EQh!Aqg}?vF7a;8xkE$VJwtc7Mwt#J_K;E~7UCo!(I)x|lw&%cvNA
zc>LJxB2}@NEsNdf?PEr59F-7oe|qMre|K_!@=nU4sPVB+w66E&fef~nu0WkWw$FYV
zcwkB%WNC5>k~KK-H5!j>@^2I#(m+`m3TjkFR&E+yW~&KtKm6abP%dhnLe*}Urmh?j
zyLZAm+1XQRY(EDQa)Qo$OHOm{*=BGjFdAeJoA|H)$d@@7YuD)0#$#=D3*)AdT?9ps
zpo$%*E-C+snU^CK8FyESNfG|Q0Owz;EXvtc#Kh8ku45JtboPCSac;8TU-a7>pFVs2
z9an3qdolX2YY&|5a#$j<BGUkINH>iNuRs?K>y>IDiYvBQ-3QD}2HPzb!TJshe9bh5
z2{~VXo#~%AiRXBd$2OKGhl<4;ACN&1jgwKvrz2i|bLL^lUUCU8baxxyFv>EcAZ51K
z;~IzJ9!T8VKJ2Fk?vuNkvuluO?|7~KOe@D(f{R`Fe-wK8;k~aROb$O*|9x4Lt^UBn
ztQ{Phu_J19K5|v>leEJNG;JRqw&Eh4o-n#jJqI^X-+&Ngg^|%yfKlw0>Fjt;80OwR
z_yjP|Y(GiNHTEw=nNX{esST61VB(WhC%i3fQAJp*P2K16Y5D%ia~^vfEzGPP&98e*
z8~yw~pT(YpZDHn8eAul!%x#@OV8@1}TU6ktq0FhGMaDvbUh1MbJ!onWe2~4!%T<Jd
z(hXI_j|>rktJAU0yGnni0UbS6>CbkHK2wK}B=o!G*%RN#&+xUc7AhYa8Y-97eQ1C5
zv$GSCH{*m5Y<!p|r~(Zwj!y8g0_Vs#;MEWuFirZSO?PV|I!jOtSyx9UCnD2D7cRM#
zp#lqi&;RI#)Zs#V4@?x4)H<jQABDqsYYP@xiG$19N&jLmUisfm9|fpgvl-c!zG0>m
zj+MmxHUoi3x#{Y17ZRSH#CP0Z3Xv|=f~d1}t8YuyU%^GZfKV2^vu|k&IJ$TTB7T+Q
zD~eA<yZpFISW1p}(7wWi2YxpbUwReFc8}BcN1?or`AE6!4-;Z4?l^bP$WdR}uWkq%
z6_Q>>0=|qL39^c%K)cS*vYxKAyr$ZUQNK_?j(<FON^VXmTOEIW!SyL~;6v1O_=^Us
zr%gWAvlohhry;)8Ly6Ce;Oa@q*$1-XD|~fD#ul&QG*vb;YAz&I56Aa7Z_kzYan1{K
zVK|XF#^k9Igx&V~-2Mkh4og1oB}`ckelJtL#qbKPDD3THQ~yuid0ylRw5j%elwP>*
zF76bive@7a&c9t2%A|WNXJoU+AjPsTYC@gdwm&bR)6P@zB_EV#M}F{&z#wj@Nw;%?
zEHct9%1|q5^C#nwYD;RDsEKjY+4B2jhaCULr@?(CQSR%Y^yeRKRLB&NJ?Y5KwZD_`
z^0(Tmlo-U-Y)kfHThgp_2ii|<Yc!-Z_xi{*U#=Ur608xH)*ap4*{|uPziTZS#zxLl
zIWg?_Ad|m#7<Z*9IfGayi6hyV`(3?N597jxr(z(SIo9bi-hkHXh@gUczmQJXI9K=U
z@`#SzT!lVxr!E?1!LR3_;rFi<EGy-8&-*mhAn5)PprjP``t&7)+_G+FX~>_O1~EFH
z!KA{AAacy$ZIFo(mqyH!EjfS2@AFUNoAq%a$r>_R`Fw9A2LCyvRQB?99wLNnE)1T2
z_0G*@py#(ceJtE<T|+N_@+iqwn4wS~$7DO~Q~GVNJ2~-%Ze&B1|KOG&IX1#P{_zyP
zV34#B@$%u_*ga0Nu`<RT{<nd9_xvi=!X@H_#bj2AyYEJ!%{$qa4Hdkgqu^D~dl`X-
z0sZ`aWBcuCe#NR)3n+7oba(whh-eMB?Id|1>F(mp>jyyOC9>1aH>PmXf6z567Gpo5
z0(keP-mY+U^&aY)!#heP>eKm3Sg`lx|C}@KzLAV5cktYO{CEQ;IrV~lU**%2(yL+t
zTI~)CQ6^*a)6$UQ@LB`s^yGNZ`2qOh=$Ja&-h5&CEA**VG}6CIGbIVoms)Kn4NsbP
zKairLHzw{m#7JUZmqn-T2$N57-tc-LD{#L52`ssxS$k_s#0nk}W26=jQPWvq{uT{n
z8{I1t`|{k^5c7^Pv`ys@Fj!bz_?xc|KO=SGY?DKR*B#3Xj&b#gIeZUuO;{{D1M)r?
zZP7<PjO<R-bESzb$=8@omf#;rj;r|14euw9;C}Rp=t43I04Y#Yh_f(8ip1%XL#97~
zft*<lR);aqp&>)E0KZeS*d^fG3~Y!r+^?|QPOX|zxAAcP;@iogPT#>s=czh(^RcX5
z>Bk;q#}Gon6LcYjE}TvwLnRDnrX35dB={ywCr(T{t2{;yq%is%2!s$@=z-YOD@z|@
z3oXs~HuGB--+49WXThE$T)0epvFEW_;>>ky){wF=ux^bzXOrlpy#x1GHyt5*Rf*x-
z3B)`bMECd4)r<1S>zuqyLNScNNM#d?K&Znxtja5Zeaor#*qqL1U4o7XpGFY*E9?BQ
zdRqas<zxCaZW^UEqfF9b(6#lTvD(z`4t^(AdzpJDaZmDU<a{ryM~c~G@d;R<lII27
z515N{`pUZkXVz&NZ4)PXYik+{vBK?}<@&ZR)u$`BCi2d0P}vMxf|7gpEy1UMzeU?D
zA>XO3FbnpyTA_YL?r!L#joo@{s>&KVO}P@giz0~)l_0ZTHU(MHWB0*OCH!SC4jR9G
zAq*vO3X}_`4E+6M5!+?AR7BCa|4E79^s5xWa^(3pKa{svZa{9|jOZ1A@o-0N`xqpb
ziiaEX>3GF7TmUx*NnbqU*W8C+);X+x)%@}1b{(1LLHU<rb-ek5^z^}IrIrjW;)c7!
znE4|dMs0e-W>2*ozYbhm;gp4T0y+tcQd>{U!Rwc0gO{}sl)`XYlKA>>;Mu#<XJJ}R
z-9?I~)ig7;i!K+H1K%Xsq6h2X3ovpkb*(79Q|^dhY}Pj>7LP*DN=el&+8Css`&C{o
zk%Bz81jL8xbR+%8taPPj1&nBxD|%YU6ac!OMv3lFRs-TJe_c3rupz%+Wf{zwo-TgJ
zp1hzQU<$+wn+8ns{QwEG4H6Z1BuHEQ$~oC~+7kT$e5VTSF$~sM$v5lm3>?ns?X6){
zG-Fs+s3d+@;e2*rX??*(!Pi_TSMpb&1383(@l`RPGrR<}lML}2!(L>xogZYXBP*;|
z^wg->${;JGaofhB)YZ^wnAEHJ062~9h=VzIjfsAD{(XvRDXKDK7_qj`wBV04MipvH
zna+h?2xIstoZzc(MJdF8eNo<MSw`FIEgz$;bj;RW;*W<=*US<-Iw3H>?}&=Rxv^TG
zi9TQxo1Kc0)62iRCZnXV7ifAzYLKSSew`uOdNOd3p67RMc82)Mj6Emw5gZU&wI`X%
zu+>$V{ybf>VwjG<MWVup;Wes;=(rqugPG<pX=5fQt+ODIcyO$vo2B`9gyma4J4WDN
zxh~>jgeBCz!LDa2C3Y@V_s~4YjDBzGQOXbw*d914y>?}p!GqJm2nh_K(y~~F(M)TB
z_HFzDhI2tNi8xm7MBLpR`>wBE1sidKiqDnh&pZK0<Dz2<`D&6m@3-f|uUL<-Y4$o~
zJ4s<pcA8foyT=6W=W?_y_I}vISvy4I698mBKRBKH0$ltgi%&(Vz20Na4Do%qSf@ch
zvQt1tL$BAAG2+ic4(Af)esxFM^qZljXs0cp1Wy3<uhV#y(Rnc#qZ@1)*$9g}lkaD6
zc=VUw*@|b4R(_kqA^!_<Afh=@b6uEaF@}9%9b{~)81wQeh<~KZp?cotle6ZmPD0jJ
zvMZcuLDG5DcClYYxZFu`*@ne+UfG6zP<V){wi--)QS{n0;7i4^8QSU|J@+=?B_eq8
zt58<x{OzASCHQJi$JBKHD?cL{sQq|a|3kIDyS_^=u8?32ux@nPu$4h}kb7gzmu8&p
zW?}XseSu$HM>8o}CST<-Kei7gn}h}I$lLO!oH&?wVS3Jy>oKaW>B9dmrR>;h@ubSB
z(vp(kdx;ZiuhE0<_Kk!1uLmc^PKk22mE_}J@B=2oo5|vhpMVElg+A8`r!!!4!!U4}
zg#muzg3RYSjHwjOH%KFckTeq|mm&>XbgM4Uv-}o#;>+{p-tN^p?UVK%y=$G3*gN`H
zF}O$=5bv&Uxd@L7da~|P)ujXNRtO{BHE|lM&=eR^?RLvcYl9OpUz+B!wnQI4bvdbl
zXtfN*jp-}e$?(kX2wd-Bs|OA1{W}*n7VJ91b?b{6C3Xzrw&+<Q)Mpmks)gt&P4ivG
zruzs>^_11o`}5?F>~1!|-Jns{X$@d-zpJ2i)lf1tpVg(hR@lu<7}hGP85gtf8A5V$
z3s6X8HSX5W0TsL|ulRVVz}EO8U;hHqiNx9YMvBj6Twb^O(`>J4FZ)(^85$~#j$Lpn
z<DaZnCdRkG<A|YMltMRq$ghyZZ%?>}ZCT5B116^~2h%38D*}aJ0gZ)%;p}ztPA|Wo
zBf?w&FL)L0T>r9rba*;mF($snB0p~)Hdy%kKMGO$`~vBRVx};iz~dJV=U|PCQUB36
z*OnyX+Tm>$gea?V^@r^-yz4RIMbEAOC|+&2h1z_O9Q{hedJXw-i{SC@5hzp<blg0I
zfKrB?l+t^`#uCdKYV*McYxnGt`+DvA#fpw{-nFIwQT*u9_#Ku_=4Z5u$0YApb6ck^
zk4$9`xwE1m8~sMPd&1cmUhT$5&Y4Dynup$fFPo!7FJ%8Nh4X04h3W6P?^aBiB;E`q
z5cLuqEU`h6$BE?KK{XsL5I`SJmJo)QgY$nFg0~5Vox#6O{4;F!SIHQk6Hosxvrvoq
z3&{+`Y{~*kMgCPOUN2U42zs3&XW928EA1ryb;3Qz4+Zr8vZv?Yqui&E>$J7UBR)A%
zt-`~D1eoOLFl;~HS+0_F$mCd-g3#0L*%JkJp}Bqsw<wTvyfQ%TVD&A7q}OB9FlF|}
zV|#<(S!kV`U=~9<>%Yb7IAFN-D@f+L*G7EhTSko`ptN7`Z!Ty01yhVXJ74pY(IP8U
znslDRSsJKuv)ar0On=V^r9gBT{1O-`t;`5!S|3Wy;eg24-b@363O4fwV#h=Ut~XQ8
z6Q35YD|sT<UshFD&@{<^_qLr58i7Id416;pd2W5r;0MRf&mq0!-)?A`|2)b5h4-P&
z2V)b~RD2T77fRi5VcnKH1t#0lJ?%t@7+O#ckK+?xTz{L=f*it0leE=Oj6Tr0oYqfc
zaE)Hfq%%$P`3rybz!P!a{(jrP0Y7t1S=!vu37q(u`sEeFx@Skj9^DwJ+xteXac#6K
z5s@Wy1v%Pp7UA+%QDF3)JL>m9k^>gIdV{&>Lwzoc49qiG{MU+R+Q(M|e=wJhniXgh
zj{pzv`Y!x>RoC=dd-k)UvyVd+h$i;3r91Ju@jdq(<lcM?uTw{^mwkBCB<E>O<#saa
z(3R!8KX2t!nQvxFX91RqmaHsGtR(ibG-+vE#I=Kjr07WM_tQ`EAtq%hgh~?e7<Rx=
zglAfa?QX0$=b|b~!`Su>u{~Xu-AZ@z7E}RC`@E5J!My1L>)AJWx$iM$%<AvEYJi?h
z!@O~xC>*<kr3F*o814dJ`Sjs+@f#noHYs0n6o=;_o-P$e^IfVd9A->hvkq-LTXQ|H
z926@16hFE!e*-16BE|S9I-TYd{fK-++MTTfDrH$1e9INBv*xwAhVMCFdkEWd^22aA
z_@Es>q_YaP=oQYSci&QO6ki{Sa)mk@WNl`#=B34;cX!%kb35^bx9pFnYvAfHgpckT
ztk9ZBZ9nI7@h|C7rGkbPv(mR3JFzA+3DCCp+82zP^*?TGp={3{^y8QFh(cvZHHOSP
zj?u#w_n!P2f<wevun}>a>k^%J&-bS{Y*P#sUMufPg&iuCLC1yK{xwNeb!>SFwxl(Y
zZa?qWh!p+DF>>HNwIkYLMy~C-*J}U1C<t=*fgU1?8)wdYsHsWbl%RBW{er6Eh^sAw
z3k)FdTv=t(bZX-d1;t&8xf}ne-wa`f_KTk#2mk;4iT~HFQ2KA+|B_AdBH8{ID(B~@
zxLIKb=cjd`^xTb%qB}Nb1U3m7gJK==wI<Up@GN&7;t*&`pr4MyfIVz~gN1ecxX&6o
zz5LkB@(X)CSpb&*CPRV)ZQ`_tmgToTWK=6!O~v4MWg7g<*?YsAYqXl>+BJ<v3M)^t
zYkZ!aH;XN!x8%4MMY8{phF2juHY{j#GPsZK9Lw9?E2PL%;Rgjf1z1gnv{P4UnplnT
z-YQb9DNaWL;Em*J{j_+ax>SJk_IaNdLZso!-Ei_K2v9};lGMpA^Jgd8eThTCtse{q
z7*YZn(xl)4`nbU>_sYaN+Vn~d&qhO!@&V`Es(_I~|NM)$fuoI&u5Us*bEo<m0DFfe
zJ42V$SyL(}-;);Pil3MP<RLt1_15@~bDKYg42k7`>)<4}ZGk51sc{Ewqn>8<sb-yI
zt;{ps=so-*!`fR-`!;Yk-ZiB3V(g2O>bCfv89^Swg8VBDN>jIz#)$n^!fL1*&+kVI
z)(F^o_nFec$aBY1j1jFmAv#KruBH9`_h#EQfd0yRhOOVB_M_$EwVYoBA!_ncSsxRl
zjn+cklXJ5f?PYQ6n*N}~@J;*Phu6U5<?6y_%xJlZj)U+aIW*Jqk5FUvHgX2IW8mPC
z0_o!~GCyK$Cg%`N4O^nw$^vh7+L?>B>gpYUpSSVp_qmCbOGUO<{QlHbIcdZuzGjgm
zQJ_x4*PegCQM}ll|2$}OIjY51?NOaU!{pU(Su#WrUv<`FY!(~lyz}ZA%PwG<Ho^HG
z8}}n;Nbw#Iu9#b43l4WexZ{6b1e!Njd+W!J(H*2V;k!RI&#t@M)aq~Wpw*R1l2PZt
z*HSIXj1iaWFHOANKG=W!{vXAly!k?`4E_EMv57Y#=Nj`C2mF<&1ug_UZy#$Sz7kQi
z^QpOp7u#-`!Nq+!7&?T6FMw&%w%{t;s!nbx)@NMwY%3fE2Ehal_P;e;3$?nfBe^4N
zvW<1zW+3>29<Obz#cpGB8ponuRb^a1^c1tuvESos&Em0)L9me;jv7L8+g#HB=NK_-
zDAsy&z8<4-G^x&uf_(SxLW|;;W`(#goi>Q);knbm9KR|)Jl)Rrj!2(dpJET}Biu0T
zrO+IhGqz%-eWcxDR{{fs?BP<Z*k%0mlHJ10^82CfyGhSa#y%@qna6O5g$<fN`H#Zt
zQt)4(E_-EGx!#z;O)=%lkeseWPq819?za1D+@{rW<cp?rVbkuhVuh8%t3LykM`Js*
zpzCdh5PfmftdOxTGNyE8gtN0=|M1&xOW3+4Ke^#YWb;bC7)W#ERJ;Z7czR;$Kgs^U
z(pCKIFuvtqg2ZyD(KLSmYR-AwZmvJfKPrQkoG+%8P@8z|y3oC>A(Z_Q{PfKbaZH1z
zLH*6~XZLq5>(I#a4^mWfYpTh0`KTmuD-Pcb!_3K~?fK2seHh>5gr0{rd9!F*!+PM7
z)W!)?qIGM8t8y2An(Mjq{yz%0Zx&X9?jLbpf@6$Zdqu7=mEzK4##jO~Wb=Z$Me%0k
zz~;ixCI5Vs0wXw<lD;7}7qWVsDf&m?Uz_9i-X~{)o;Kfu8sl(BKfNAVVcS%1B#!D8
zXJOt-tNC;jhtKO_W4V($^{Eay0Z<3?zMr%73$NMU?~%3gh@R4NuQbOFR&z02?{s?(
zXg^J6TEf1q#k4TNFGBuSesAyaapkJf>t|x%K0mdaK0vCL(=?_q8D8eQ4VXvs*LK?6
zUt29+?Jk40z#b$izk;v2WZu%4xq}{wuXDr3ZCPN?>~V?%N?zHvxKjj~U^Vlon+U@3
zG%)olPpY?X^Bh&ROv_4&xCZ#SK9{kJZbK{!i<W79FZp)kk5D4}n|sd*n@Zt6j$CB3
z^Yl_#WrzRGK#QL;oNq)6!>!F-jwX$Hxpm>%symB7(k2p2kO3Xt!nqwU0Vmyh_R#_{
z?Gb>S(RfHx1KTGH_oZ)ERORaOdE?ubKK`lc7gkyUfQgn}yGQd`-E5WgPYn{_`o#vc
zYhECafJi6e{!sa5v5(J@ca$`?K}_3gP;VZby0}!JBAP#>-gOO!!CErykN=}s7q7cd
z$ij!J4zY?(4(<2)x{JxDP<n)S?b101@Xzxp!{J+$fJwOVB$Q3xO|ZRc&vm>Y_Tq@F
zFl=y-nO<KpyBHJsyC$I_>(3(C&X-=KW*o$z$4k85Z<kbC@Awta#n){F>h0{0)n3T&
z(3q}n7{L3oXS@2Gosx@xE(b9xiug;PGR{rE>rkS>*9qNHE;lgJD(KF4IH?1MfCj{e
zr`-x1yoA04k4`fM&sgK=1hxsMf|cI2U13ijG3%uO^K>KC{bZyu)<s)Ea=cb4G|@2}
zDOj`e=ocv`1@OURjj9pTYfl}e<Q-zramDG^3q$WulhbcRLK_&?y!xfWF7G)Dbj4MC
z=zd(j;1`jgiiIOp=;PQrb787T;|Dg=KIXM9u{Y*-8~PdqPy*GVwq)X4MtS2zu-BMo
zxs7#3>sf63X)y{T?5N05@f*0d-KV_*oRBJjaTgVcc~u}7Ih>ykRPg<JuUYm+aJ!4<
zPOX~pBn3a~Yp&3UGnLKM$5|2=l8ih}x{<og^U3OFzI54x>V{nNMRjI|P?%pvxUKQ%
z3j$E$D~`Ts#EuSq?l-F5*VV1AX{K3r&2Zh%kd!&u${|0v09D}^N(!GLDk<*HN&Ar~
z#9bSyggSQEf^dAlBKONbNNVV2P*ar)wd#3!2xwBO=&c<n=$y#da8<#Om}z`BNvGXv
zDq$4x6r7$~AmPxSqwbisbAVhWgW|Cy5ZK%V>ix1M^kpPFM_==Ov{f)AwOBi)^&31*
zU?<oAYV+l<+Q_bIYh%WOH}9AA1Eozz34ZXl$1C09&-pqU4rjX_d0^3i___<7QEi1y
zMZdH+%JLq-cFx;yl{D`+?iz-nMx8~+Wdqpp@AVIUQ*>MLWLftn+J-GX6>&_8lS;k0
zvhyIMaz>1zIH`Ev{BdvZ{D!UfJzlRPM;(OZ_F2J5j{G~A>9pmGNr$b8KPUXIVKA_$
z{`1gQV}KvTz|R=j%vlG8LUhc}wR_-Z^x^M57p0iRO1sh*9{+%4_j&S8*cdyp)yQ3T
z&>%GinB?kZPUWIodxyHp^T0wNlj=?Cvgn%G*T*BfIv7aV0A{P8Pr|T{56COyRV3Fk
zpNT;=bqOJ}cTXdR6C-Eqa?FIEAn>~?yYQc#_E`sG5nJ`9`CS%JzT9#u2tY`7Te?%f
z^Fa886@uAvkpv4I-$;eDyF9WHd2n!<n`Zn@q}eLo?8?86Q6)7yv53w^pX1>dT3F-E
zm2c&s$-V=*6xhMU*3|sEb*#Bfa$c22B|C29;;5A7_=qh%gSsRbpP88?^S9P~Tr4WW
z`FC{$Ri`VQFPK71Ek={?8)ipp!s^rCdp~Tro|;R<biZc1RjKsg=f^C~IintKhlJva
z8i9H_Bm!z8%t@)SAw3og_&OclVd2WTedJOd&h6vJGQ2R@kUDT2127Mt1lQNf!leI_
zj%IJ4vbMhx59Hr14)<4P3w>WcJ8s<VRv)MJ4*3ogPjwl-l_VDO_4b=E;#sP#D%w^n
zzSX9hN|_jo&zuKmLq`v=%*Z~hU(1F!F|mRWT>kmQM6Sc%yqPJ|JBDq+|Af1kZhF^y
zQ=bncYbKh-tfJVq<~Lt*xU`-2K3QJf<SN0RQYO*-&S^-S;@!H74a+2q>A&wS43iRJ
z)pqIQPu|i+p0}#xQMu+W3GI<9#ndGdZ0jFg$EO;Apy9nIMv+pU6;Nz9M3^W7H@pWv
z=L4^+l3j|sSK^oIodIc&dHz_Nz{8?Q&h+mtl*E>_Ny|o?rGq-IhA?oLu$zBhVS;HE
z$v3=dCuZiJkFdMVkiQ3}{df9K^VokB6S7$<M*BntytpfvL7`~t(PgFzq9n_Ob@AXa
z<pLF#H5pa?a{xv324N?qH?vpAj>Sk`^%zE3AdIEo%z0gy@!ZIpzg;`)%L2$a_RgY1
zgR0=hgDV_b#nYsKIo%MXPxc2L*iNM{Bnorta~5SuD<K}HZ-fG$pNg+GaQ`(b-gb1p
zq+VG4VO?&#{(vEadwIslhjt25DfR77iE~KX(kZWT2TdsMQ@eB(Cmd9Hk9IIgM6BK(
zthd9e;_8OcO`1-K|9}3D$9B>MKkh}&J(g-(@!=X%FyUboYjRal=9;b%HF}`eTL0Qp
z8}Hx_Xz%1#zJwTf1_}(l@_)4VaI%$rsHHQU#?rZ(g$iWK`Ek;Xx5i9TII{kAw*LO(
z7Ky^R)Z1k2!Hy1&&4!yG4~~Z^{)p6el7;l;$jo95K9HwQ=BNB|tyA2(+^xVI`HK*T
zK5rid5|r_~B}V6glILEF-fs{C!cUba%kG-NrV=d%@qfEJgP)^prdQtDRMp-cX`xPF
zMficAP81?iLtF-=r}5X8I+f&i1+8nlPJT#=^DLIbftG;RU%-H*2D`F<3lBh2b?3Uq
z&ylUWF<yzTPRyoPx{q|%jYk0H(>SyhyP$f4L1h-pci-wFeSY<tSe*(&r8+_QcuW`n
z-Ah>_Vb4TGzhUjsQ|YNGiX1OewLz1^F&{Bg7c3S<-bP+mCtQ&b!iCpYS-a!oV@(E$
ztjuApA4^3~cr~A@xaVfq@^va#)*>;C0SITmUxVU4MW#+3&kWcnv&!NNBU}$Y0J}ba
zzNp8hrcJRKFhxTmkKH{yNp9x0(}}$}4|w7g$;we!cMF1x&`=qR7Cu|r@~%xBQQ1<2
z27PVs6FcN1Rfb77z2KfVxXsM!`p2l+!|ar~W11BSsZYp}%9$QoPMHi~P3+CxDlnX=
zvi8+~mWKtJ8uJNR@rxJ!W;K=*+DzPPFg4o2%VWcMJC_92Cd?F1e!Nk%auG{?pLJy&
z&h0=*4o$9-yzGUXY+Y84VDr_>1~@c);|ewxyV(&MT$ANQuzUdq0N#s2(fvp9mA6!0
zqwe;%Tke0R8(}Z)wXaKfweJ3X`KbN!N!Jt(%gZ(7JE7cA?8R_Ceb8OkNa&C@I~kDZ
zY8W{$(#OcDt6G-~Odm*^qT=s7Wo7<kE4MTSej6K4?ZFd7N_h5$aoT@w7-&A@K)%W#
zCp<>h?oaq5!A`x{>DTGVT3@bYlR>YzcxcfP3rxCMK-|jCdaD=Eg$rBc&Cbb4%h9`2
zjdo*$|3?A-H^+gTxD(H?IhK}+V8vc^*1FWj`)(L$YGh6iCCYy{nIzBV%1m5g7$KR?
zaSAWrrYx^+Aum5X02jJOG{w5nv){?7bGvO##T<37kII`(&v>E^CK6@(GytyYfKu?0
zfX<p)Jssd><j&Gcphao8JD>skZ-R!p@=YaIxF$s&eYrm^23?7nFzQZ?VS&c5Z3Fs$
zbp5({-|D-OUmqAa0pK;z#@FQyjb)XQ$m!WTDX-w)CuVfw4S%b2_D-7_p@xYXatBBN
zQL0K1$4Am3eTmj%+M>I4n;Xa}{u@-Oa@QPwqd809yVjPS08CjV5_`IwzNSnN$~rNq
zA{?nDe;?fzz>!%=Fv-mLDghiLoOn^0Ia45t5M#y4!T<I!CjK+eTT*@zHZ&)FtuVQ0
z`8tkH=B1qHB0|rOuL=f=^CQ>nF{e7^K;mqQ=Z6BL!uJ9f5Q&_ve1}CaGUuUzmtkJv
z(gHe3sXlo`S<!nxso}n!bCvzctnE)u|F=Psi<-0)psV1UvkqSqc8hP(aSHz%-yh3N
z6m|*h>s1*`crloexHAcy0c62&vb@fI!-=k8?L+4c0l_&xb>p;-kN9;_YPjSa)%XcZ
zj-BK}nY5Gyl9LY%W=`vGH}^9H{>UA%Kv}09H`niCNpY*(G@AL)$oR$?8~@VcZGxXS
z+t|}^oJzwz8l6O*FEu4YIs9+5@-c)rDrO_YL%_GLsL6Vp?RN9{J+IGflx-!gqn!dB
zLamlBb@W%cA9}3M9HaadpI@1eV<#v6CCTfb`S@h@ZYCRU*?8o&JdU-3KYdww7weLD
zd>sCH8Tz*MU6IM1ds4)f)?1;XlT)I^*<q6#i$%P*^XH`(GdRNya?4qCqTK}C$3XjK
zIR=r2p20a01NnNXOG`m_yweVC7R)%X5xDPjypb(K<O=Nszh2_w^NXRD21gBnq;h03
zR{82n7OU#2COK2{h&m?MoJJ#0y-?k+V#@qO`;IpH%_@RtpLpMjtI+mLNPIl$S^>w!
zWP6S2_W$2};RYZ^B;1XSLGEGVus~kD{AN7nD<;RmBOadq@UfGo@n%0M+VFwh4-?0d
zvr0<(km8^)JhK2=mKUf#9gXl}LEF(at`IMjv|${~yW|MaBHWKvdHCp#-oaw9Nef34
zS=>ih=S}4Wk-Gk61Jd$oOi?EKd*k(v2{}4OzOws$*-Acu*{iXKMe);qkgxs4Vkb3z
zc^_SVur`xV_D&HPH~|L|zCIasH5kE~>CG+PKGgOFx$hi}Hlh*FP#zZ%>VlpIKPz_I
zlT|qzzFEp)w8w-5LlaQg8XC#pA;;=zopXz1yQGkiYhiAC=Crgny`Hr_S-&N00$*e|
zmNht^3;~Jrb5k`$Lfwq>^jyp80tb()Y70rANf2SINXtYpovS|LGSRaMcPW$mO;P*h
z=OvKVUXl+<`kDrnqF1bbN29uWF2BwUXUM#W^=X&V0j_1bKsSVZ$}r(LR6adlKfCy0
z7MMJDA~gu!7C{rvvM;!M?a$f6!=A$|qjbf*gRkXf?PWm`hbbCa0#4RbrT2%@24~c;
z=)YR(hF_tg30D07T>lIv)d|{i&(a%Q6u6$`^c+^$w;ylUn2;BQZFr@JigIF$;e~gp
zuVzq#hFEQ68{fo?hYiz=@Z|VTd-{_%cT3%>`ffO0iq0?nTtfTaPuYwVDORK4XLvg^
z6wE~!Z`x?~4&Ujg)F5UMpkh62FS1571j>%-456cFQ0-c{j!EhH{dp@+J56ROUTx(3
zetoS3HyM=ty;k(RojZF>YGg!!;k1-iGg;w<QMY3$#7wP$<#W)mchI|F;zir*Fp@v%
z6EN4FHN^j*`xx~`ikT}}hW^ASrUEWA`kV0w2%`C_eK`M}V93ON7a4@nf_P4^Nj06t
z1d`7?)_<TKZNK0kSo9!p1JG@u{xqf6Y$Huqw~18Wwbd6m#%$LsTd|al@tg|yYExAb
ztZ<X)e0mns?iNsN9zTLl^VQ|*+q^;TOtnS2{9BTDzc*Cv0`@k?r~g&{BmQn0YOBw`
ziYuvaknvW$c%U*rOiD)48u6f;`e8!JUYMau))h%!Le*TXO|kg@F!t6_P5*KHFSdY+
zA`OB;!{`RV0^E=d1`HS_&6sqzqJ*Rpo3sd{#^})@{e=+{qiduzquaZC&po%!@80`6
z`(tNk`|q>!j@R?~d^{ih=ugkiCZ6QKazZJu1%m~}v8|5n0Md)jGW7S<i|v^zNzh5F
z?2o@m&AVSk>r4%YqgRBAqVw8S9kz-|9kP+2sosfhIImlCONur|(|%SWI3dL^_jy3}
ztXZHjnmFWT*Hb%MLAW}&#$4J>zIGWC6UpIYK~sGLkjWiH`!+U)5$rszyQ8;;x$|6r
zpW;S+h|-S-t-~-1$>Sy>ruF?`W1(dRvDJD_JCK&X?yvc*8v%O1h=NYu<9HsA>^Nr4
z_>Oe(r=qlf<1wG#--T`{XXHFxK>@d(0d0TtBp5VTZ2kTY?6r>cQPcKHvP9Ibo0+Sl
z8hPrBP`ax%YRqsI2iSTqZ&u~Kbgy*38k?Kh&SwEOW#0Z$(1uidP1RvoIiALMBPaQ}
z#_MBIyTU3{VWOjSUGQt^kW%DxfKjzIRK)S@s_i$&j(KX{MjiGJ>16k^dMPnyqTm4=
z?kf^g6PK$&_zoJatW+k5&M7H5=*YDXclND8=rodJ=uhjUdkTo~?33mA%D&CkvHc54
z$ly;S6PfObj_DV)j1>a(F2DQQO**H`X0i{Qz88OOvnWugjvxx@`t@Z`%`TMf!U5lp
z<Ua7S-vUWFR|uG|zJ!Zmz+w>^-HJ^zTj!C)Pk;V*Y|#JUjqBgw|Bo(kjZbT|!h}b|
z-nS^@J#+Cy(OgK=c9`?I*WAG)<xLOL4$G90#@&!MeJ3Ytwd_&cFns&?Z(&k<M2|BI
z_fk{`$+^pE6FZc1xhgGM`;RJFp0+sR@@l0;(`3Huf(oUu`CxR8jbEt@@-j@G`<`S?
zgRikbhOYkUmwdhvJY1@3D_0NbKvZ{du<a$m7EIZGZC+s5it#jZ5u;UzS7@P4_k$r#
zi*5ipwpoyR@8A`-%RGC=k}K+c@N>G}POAN)kmj0&p>b02<hpLtU1(r_#ThooP5C*A
zNXfpI!nO|-y1drncZrmd{gc>*u-2>1j!HVnT$G$nUaN;ql<UpwaV=k`JRZCCiG%$g
zwhicq8bZs~Hw2*tqumlsVDx4YiSE7}NK?%RuW)yxlmR<*ZXzU-(&bx+=j;TMt3Dt6
zX&Rg1{ezb{o<JpIQp@)KoV;?9O^uNWxa)mfm?G0`w(jF8OXrApCR%cx)5dA9Hf*jV
z)n{64Yh;vXb$Z2EA~*8q5%RvSd-7%`xS-d0eL-FSsQAuLN0Tms%tjY|GyhLA)c@cS
z{~y$-|C#Tus2yIi^PKGNmMA`j)XyyLG6ooi4av9rjECBW5lFt#eT7Y7uedm7Gv2D#
zFIA}mvW7xihT90!u;9Q8wT7`veW$V@j~Kz--T8wfQ0JAB@1oQ!`;R2vDwVtXNqjQP
zSa3Nm>~Qp0^5)|o%DQ3ijz;woaut_T4~<KK8Nkp?jl1lxC?E~sIdtO4zsu8p!dGel
z^8PxN<hcRglYm<v($%jY9Q+4gjFQ(QZXKEtMj=xBM88Q>6=bG{UuEHizW&OPS0K7>
zte+_TNJYJ*jwp>>*Ob*9!Y$S|HZ`Ry8YwE;HT&4sR3~M{uq^2Eq@_ALD^Lx=F&vL*
zex#}zXl1I2VBmP0{Fc;bo>Nl|V;!T!^Qy0-pIR&>g>AU5ph?Ju((2mM%#dc10JxFw
z0X$1PL{JJL7>|WQ<A7?EVYH*lca;kCIRpJ3jB-T3aoJyWlK{DjG|``r`okY*b;~mM
zGeBhFDSrxGT98&GQiNh_!>Ac0oR=#vz^n6hTNcaQ#nzLh%c%`@akN1_+*a`z&GE}i
z7afFYss)BE?38h?bAX)=UC2j&T&HT`;+1Q5<@a#G3`5SSlx^FOD5Y)rPSuO=pRw*I
zS<%JRd|~K??Qo?a2M3n`>5$g-Wa88yqyRaQ*fXk)U##!)@Ck6Xsu|vz<dQ1W;B{vB
zulZH+(!m~$drju+BcjOkyEltVH)IcUp^)Ul(>h8ivMm6zZ^Ta$B>p8miNtVVg<A)6
z)vqMX<>_QX<?Q@f{AHM;o~U07jlLLr^#Is4L0#4R8v#@1Cr?kI72eSm$hq>8MM~gz
zZ?@jrf?vh(Sv{U-H^?y$xpV(MVX4C)^}|VChu%s>skA<TEkMOH#j8x?ZTnewKiVh2
zRE72XP7x=ABu(Tsruf{rr4d}R#Yy5x={*2AIT=xlU*TL&t9rPq62ev9QM_>11aK}%
z!WqpsMPz=X;NPlt&*d|_I%o-Qf_8^O)OYlXx40W7R6JPjz#5|Ny&^N>`1N|o5&hrQ
z(-|tjTe7;V>NdIIsqk7Chi7C*74m0M(nbxjZ?CZFqbu9D1Uu3*qYU|;x$T{nFML1d
zG0;_z9O!<l{J9%Q#CFZ8Rdnn|F4HNx!|Y*~2j0?ruIVP|5cS|O6}|{Fe--QHn)h0@
z>YavMOP-#MR^e<wFiD{dWU$tUHp7~;B@9~*<_H4Sm^BOrg3CSicpTl1A<>tp@0iV!
zKn^SCOfUXM-ubt}V3BOR#)IH)h8fN;xgN?N+)H(!#CDmv!0>a$V--@+wbgu#zsgP^
znl#DXQnxySxHBZnRbNN65q0x=GOy-SWJCFvi`z&aCCSpl@)^o{8d~T*6TO@HyH*hd
zW?0~vrkef=mXFgaeo*{UX@Ty6<RxE+23Px(YHXi4#GsQsJnbzMe)i+TT}7?dW|L;M
zM3$F^MyS!}p`E{fIw047iMd+fIacy6&I_FN%JkxR<~F;Q8_z`NRM{b7?a~0QvX-sd
zY%-(nq;tIEJ%*nNW;|)dTz#cVS_d)olxnw8ha438P%A+y4xSH&uMe*mRy-H{O;<kN
z#&O4|?3z^d{vOZFZ!25-MD6p(`lEAY!ns#$Lz|&Gpg!E2bv;BKep9ffOK6?Ie4B5l
zLssby7!lhZ#Qspb^W-s6!52Xx2(;HBJTXF!)05KJ5xM_8;76C(grLY=3ncVL6Di(r
z9$AX>?$@dMUN1no7HCvQo_U!h214*)BUc0)H$CGS4ChNPdSYpZmKFZigiBaF?pb)i
zCTH*J(zGqO+xS+!&d+46D#ECtk0a1Ll<>C}znpbmy3TieN$s1_>(u^_D$0yO1yRh=
z6r;>z?;!p)C$e>hqYRO?a}-k11u7ZZS7bWTO4L4_1_hj=@_D)Ie!M9$YQMPK%JC?)
zJFhuUU?h$b`O-*+qN+e!N~Lkg{zUf)vk(_EHr=+Gj)xUbX8}_C(Ur>xA%acfMtb_r
z_MKzs9<;7va33kZji*KF#?ZQpj4R!={P;{&sUDZ0I(I02>c|!%EG;d-!_p!;<Eb41
zU;R}+q`7!lVbb3zUK2tQyMSG1gB9ku<?S-!Y)}Y5bX%raa(g=@=(_vtt!%c+`jWL7
zZW^8lWzJTO>mIRzOL5kHY@}BIsaN&ePNM8d-I%|BTrsLMk}1b)$|ce9_M&BeLW)R2
zD;ro7RA`*h&L#^M-xTPwo1Ezy4@kj3TUA7A{_GMGmmf21stvUGp5Lsu`O&|%W8<2c
ziu}w#M9gT_TxznX6Eb(JD0TADw_Iem-vKJw>=!({xN;96gei3s4)XkX<JuEvnW)sz
zZ#&ta_C9djy8;M-^{ZluMQ05FYrlr+c;AD;ezXDaM)Ba0k=mJ?&%w+LywTLMjZ>s-
z;2M1LieG*SCGo*=;(2b)wyfF}D=)fO#OJYi@)lWs3r%c?>zMY%)HWCw<KhN1&NcY~
zH5eX*+k*K1PHzY21=5fQfE#h(UU0C=A~$~J?PsoUY$7_E&M$i=t2sIv*@fPqfTCfg
zJM}W)o0d9zT=G?nr}k3$=A<sl0lo5@{&M1IyMVtLt_d}j+oy}wc?i`>O~`H>F3h}M
zywxENG+m#b0c;$6D|rKAj1^*e%o=`U(Q^1nGr~A{GUd8&>U&`zPSob0!c)6SzW|>;
zqCa>9|2!fS(|b1LOED7@FD4JY8WvoFg>9~WCafR2h0aziMYWM2gyxyI*Hb;7mbvKZ
zCJd$;Ra|8I{3PNC|7|lD<;!UxS2MHqc@lK901E^~#fhV3I2N#Vy6Q&1*E+#-#+2SN
zBY*F;$%|-v@=68tg@w(B_>C7W!PG)~hOsxD`>cKDU8AIgq)w0|h7tqH{lVc&A}c@k
z98g^qWQXxnC$`#-=R7=SSu2ZzYr-m@MvU~M7!x%b9K3UgxO*mj@V(!ND$k63O(*&V
zLS=($y!RR~`G(<>S66NfN`FIjeNXkDz45~v__E_f*$EyQLR4Tm)o#<yEp7(m=*(d$
zzD2&e3no~QM@58kEOm$8)6r2em*n`P0@6ws3{&BnrV!(tJiQC-jvx8s^}XB7F8LrB
z<tM5z?fUN2g+BJ^i{PDb>ILQ3J!Ai<VlfkU1O1YmZ#`V{A4=C#GU1&{`%vW4t^+j5
z?~8gyLyk8+stZeV7$)!6=*H%?xX9>7x@dK%Mg43Fi&(}8NX3Dq*%w{R4tDS3Q|&;z
z&^iB?$e-TH6PoO8$s#E@#8we+$;ke(=x{O3?{*iC!a}*5ZzNcD&x#o$j;BoZL)*>h
zevtAkM79;=3WU<{nsmE<S>R)J_preBQR5Fz!JSi9N_y$eGrTD4)}%f}+=2iQjFJ@U
z9Bm$6p*$&0trg(kEUB!WChWVD;<kTO<+8zi^wY-=jfv;CeD=Byc@ht{?<jJrCt>V$
zx;zjLd(O3{Sq>~P8k#vFf3PK~&&XsrUH@xwq8?&6gy)j@*##sH{Dad;XK=(Zt3L!u
zWJu*kpnnJH?}!eWwF$?r9fM^EyE96JCj_uT>Kn@h7e={uzdre#s|j}VNdD}p(lHKA
zpyoL>@a>KR<mK_uTusj4R=t$J+pjW}Q}r*DxYifcpr78^keN4ZlZUvWQ%CC;@QRyj
zZENfslC<YY-B(ttCuFgsOCEcx3id8+1cSxO?e)ZlbjkYm-Qu}4=R-$`TC}6tNImoZ
zrybk2dN4a(aPQhq5dBE1<!jM-!SCJTv=0EJ&+{D5LJbYq8qW3n+2ckqJV=i1FeQOG
z_Vs*eGrG0fu+|KwR&k@)W3@$}3JoKC-&ufJi$cr|{BH}*-v$6jOTKH%zl(cZ7!Wmj
zhI)wiQ`Y>$xwpD}(T=+7BtcPIFc>oXISW)klx0dmAB589KA02RZyHhJHS6>-feU);
zrM}`a*5z@8kDEQ1+dfF_V%NbbB>q(_(j6>aZQ4olf3ef^!C&Ji-23QNY$)05V`33+
z$kjzczPDZJuzA9JG*hWb$aD=R7USAcDS5ZMbPNG{Bt-f(TNm&MbhkU(GRt^)33_;<
z5!0yGJl@ujac}wH7%7)h_Fr{=2Z~cq)K=q4nyUJ(uXUcmSbe$);AHZ1l`wy6QY^EY
zqmp9Lq&+<owRFBKy5w#3tdko;Ai{u`aws;uO;b`fukU#xL~I?&3=fm0`IgUeZj9q&
z4#MBs)<HvNOeynp`z7-DES>E*qbfv!AkN|?R|Rh3!)Jzn9+Bps=daPms-ybu-PKC)
zNX2-tdTRCTh|vqt8{D8p_YG9p3B^{3EzpL9NXB)t;H*+n$&MG>UV&9Z;|E^I3E7F2
zzTpsg3RSfas>wrepFYyoKa4h-ixuHiR7Q@CM0otzwrKDyLvP8`KI5I0-+9ZlBL)uE
zIhT&gLar{SC2r#9Q;Z|eh9dpOxB6u;g6I?!7B{_`Tc*cbxTbh6g}~q5+-k2ts%5V@
z?sjKLELo<#FwPL$qP3->QAcx*yuU51@z7uoX=|0@>051j)~U8esK@lOuWn_hmLd}y
zJ@_l}n(e@s4f46RH1~bsG(@)X*RJPa@Aq17u$MLQAH*x=cb_`n`I4?QUKQi|`I`{m
zqoL)5lqPPIHH~gJv#YS<r-Or=SV`W5UpN9wa~)XQJ{fMJ6Fr%lh~BmuYOL~n*T6c(
zBJw`Ea=XS^b$D@qEXwt@C5_tX<0YMI(oCSKk$kCbHkY8<V58jb#EES$+)!zXaj9EW
zLjq?!PG=@xh5zrAe^D00$oKe)$ZwVHwA9{NTlX}lE?%CzwS|IjJ=sw{q+%~iu7k$u
zpx@xwoXaMT%F7xnBQ|@jmilFHtCQTb<V}F<riB&dk<)xl&rh}smddO{<gduogIZlG
zql#AVQEnn`;ot5<Xu5cPZUb9i55b1hyR*0L#!UpBr;pA+69Pf9il4#Sab;x}Q`Wvk
z0<vcl`Be<tUOm(E*%$${glbNSpCWB<6g}Hg3KKqcp33V0rV7RJ<np4oD$=hb>pT-D
z8p}Y~#_cq}iUq6fM~^BS<}tm3so#P8AnLRF-j+1k09_7dX`l9e|B1DVKOK!D=4D=Q
zX{SsquMysFv6p<0Az8EDS`HT5<BYy<ys?`5O3$H}k;bdG_UHq;4PCoN@bZB(dujfS
z5j>u7@vKWz+t^C5pBR}D&<S?z<(~<_^rXIXA3AvCsVZx(S(>N~RY4IAL!IZg_PFJt
z8Yktc&R3%e#7#Sr<yO}jg?fH`qIaNa$b($%{)r>j5h7B><xr_Uar%M1Zv5@Jj@HO-
zXe|nxY&H{`^WzxttK~*-Mcis~5_I4qT!i=AiVLc}3Z5sdc4EX#_wD*mdZYgQWD7w;
zqR;?j*7f*(t%#E=)GLU_H72>rns8X-X{|4+otyNEHAJZMBE9~34v)GjV_07Hnl{kn
zm7X;Z7oG*r(rE-d{zPUb^{iFO>JLubyvq&eh!am^SPI2H;3ZkSzwNGudQf*oCA8r~
ztW1xDae(kBPv>ZNIgNyIb*+cIxpl?!&&#c&At%LPyg;!J9-(aKKg;UvL8=NL1C6~4
zGzn|5AU}vRFxVYvSI96YNMz>^rPf}O2h$R_g^J7yl&H@%?PkQPVl~Jg6@1J@<Ny`-
z%?FpiR|;H?kEyoN(|c*ePKlcF(_j5ZWw0YU_>7uWtC`%$Ca3f!!CXWj3+&-shTzT(
zCuFkfwM=j8(}MB*+sbd7ypGx1=`y!jt>W7(s^tmKf2$|slsP{~-)Uhe_Te$Xzh;RW
zO&pF|Du_raJq!wy0to3;l&5WP;x#m~^@q+?NQO6v73Au>Y||!_ZjY&!&BRGj?@{6J
z_4fgsJk85v2FvoCqO?)Ji4n<tE2e2638;PpBQt+<<M)c(<q6x}L`T+6Yq=Whn&pR#
zpL_VbT6{R{bTMCZ<tj6(G$~T&P<#dBOlkD@wNeDxNcArMymVEj=#A$L=buKu@$qol
zN4-7N&nmvm0U0ASc+Hl-vw~*@J(gfP>71eu^-Og-7b;qt0Y(_L>`X?6j2eZ(FYbd9
z_1f#NX=y<B+X!m|@k_a=_tw4#oCDGDKhSuzueUYS(|6afdcPDAHz6$J_2$@O#w>aA
z<sdFzi){Um>e^A0`D-QLz)O}{<U<XKd|?=%fVfOxWGm9MQ?>DUf-!Do)c(`Gb9AO+
z;2+;6TD~FXPGU7tb#oB4kpupTR$vLchzA88C{^83lPg?d*d!N98IwXa4G9lp6ADyK
znryq1vY3S5p(-h1hZbc=WsOJ&5%YrMRw!q3XSh+1#*N2In;T;%Pm;x$?aSRNN~!g*
zd_Vl2BlOx|l3e`(X%zm>5aa;>z?P`rIEbpUeaA(t$unQLNZORje|<}($o2=W2Bjj2
zR%p%r<?g)c%Gb4px%ra4P;t}2+{nU<^+Ux!s{NGlMw^Q*Owcn&?$nBCH<@crv<&Z>
z&47MWc>CeQNbozgXGiXjGsas;a}}4{WuNP<Qc&G;9WEh%2XgW~yg!I;yF6^HYM;06
znRFd-#OqcWe_yWYU27B?*bjNSyvCQ10T6Z{XPo-a;0gzN>?x4?1kTQ~953HyKNqpx
zYW<^LXE_%P;(CaSh>Y@a@ZcYSEydvWMi!%M@!RPzS*hmt`&gJr&>cH-Tc4UbzeI7N
zBzY6=Rnl;F$`N8O|4nJOx6Zs-Oyxx+1VU~1(CVT|FGd4l9iEZ+v-G+>y6}at@;DEs
zbZvm~J@XwKm)0b}1<osJVZmw2e2J|zYOo;})_+&k3^FY+3_5BIvoVQt(ZA1M%c2+m
zp3+k!E|=GxfMFVKprP($HdFW5oEFq4zRWpkXlgOLuJnX78gm8()@v8?j+}Xx;x{wD
znU~w<JjEnd>wJr`^zq!Wy8dYPVKTYM>4(5zYoEL)y}oHDwFgCZvakMXS}_UH&1G@Q
z8oprWofAe2E#kgL?HOBrxTusF#84y7IRr1-DL@;Nypj6Fw&fcGqBf&8!wuaRjJo$e
z={Zw9`pCjLUi{6Ws|Hv(S?4fad0KhkaFXG~l80fTysTNW<4odUK>tS4aOnT%eDun{
zq5t<m`E5rLhs`H(%WK{^90|JHuk@8cJ8!N3S)v~2Vl4Q;eRUWsH-w=k=F8f(JaA1&
z`THy0;Z(GCqYnqy73jv<f5U)ilRX5%VqchjK(GkY;;owb^e4xpUvMQ<AWL~l8~S6y
z8Pu_I(9&gUS|UkAm-BX|n$ColvhSI3U9dPl0cqoriPrW+mmw!CmY65v?4>BOe{Pv(
zkkEwLmE_|PG-*kYRg)$OW1WTLntuOtRv%`1q4$r<)SUbDIlGc*$3H5@11vzs#H>?U
zx@5;DB)EgcR#Q;+g&LHZVDHu6clC{N>0_Vw-<UVcirBkJP$}1`G9Q$H-P(wmp-TYw
zXkAz~YD-fco#mHtCB?<mCn8s1nKfHtN-B-WA$kT_IFA77w2ce|DW#xN3och$X2dQ^
zlmrmZ+jCnfu}S+t+2Ln4FL6kT#x#s#4CBH>pNX`Vn;-H$z8Z7H(J`L#(uRn8=mu7O
z4W)^vgq}V~Jrl@2grbG{+w1HzJuAu*x4HUD{a=3hlA5QT&TFKkCgN5X0FP~gd#)A8
z<(pIx8@6wbDk}y4X%v|Gwm#wXd1H}_yh)mtVaZj!mDy0uEUS9>38fLzwWub(d$uo!
z6!<y)S7Av1Xnoj&z40GaoyK?WeOSb%mtSWquFaqz<Ay%Vti4z?zi&>H58z-qWuo6I
zYuRf=<-x)3;JV5Kf9pI@kH$7}d*N%Jt82-$QQi@|w@}w(Qh>o+iPWz|B>+oyg*FC~
zWRz{3$-6Q6cMFGa*kVqZ=g}S~=%ysB^&4#$!r{{s+g+wBk^yrLS!@|!rLA^PMY86F
z@;q&~ftB++QJG!Wi+h__s0Q;n?%rV4b4S?Mq!zr2Cf4TiSDoNEBMtS011(LeNq@nO
zMcpXY`2c8qv_8Yc>aW>ifsX#4c1$r=pXYfR5~q;0vSIB<GU(gmQ*_Cq^KnnB0lOTF
zce6`+eTd{Kg~?nUso$L%a`g_19-7rPU5Ya!<wiMie-2Cbz0+m#Ax}H|b%#eFe$u!s
zxj{|ooYFd@J$Hp@+6B4}r|b2|x#9_p_ERqumfO?IRPu7NtWz+2WTGNDnBlr8q3L6*
z+;#o_`LdWhJ=~iST?O5yo4E(n*ecAho_yuVVVu3mA$rJM@=~PPRWM1-<F-kzf`(+)
z^XHp9y>jgo3QXmIU2LY(Gs>Ku^`cmbWhbY~ZLU8;!QbdzWS-p)e75U8)yif%7i#)@
z_hJ$^9?ch*eZ@>Ma;NiIhnh~ypPX&4O^F`kUxUQiG2L|DG)k8-Y}4V{el^8men%11
zD9IS+H4@_%9P(C3(P&*PlnFOEAhTw--96jDRT{|lkE+=?kbP~)Xmyq`@X;~$cU9CY
z<F({xt@8o&e2Yv->obv>JN9aZ(UEiRv%yGGxhM^JI3X9c@XSON@tPT=EA)?Q&V49n
zzpSBZq<d3+YnKv#a3<;oHiT$LWt?@;6e6Mc3P(m0*6E4ESP9_P=GJA@xy125svX+}
zp6U!zahus4@B9`lA$M#|fT`S}+LqauXAVzv`0;*G!>&O>_Uhe0pC48}m9UA<_sBmH
zo`*3U`)S4aw*s%tWSOS{hDUN73xFP|_d658IE0GLWQb}&lb5og;*q||^L67xwq3~E
z|10n8|4u*qpZ#>l<h7oG8)H?!=#z{B)%DA>Q<2+`fJ!{xtSFB=3TeVZddN=TFf38Q
z)Ff9qwP5wRHIEHu)m%&hYo8cEk$@_o8ICanLA-|$nCphAx~-?Jg@$C4aHo>1@0AY8
z?_<}Zira@1K|~%{VJ;MUlneE(k;TuTH0&k@K;7b=HCw7`sk#<%8gHBJ;?^BfkaZ0+
zI4rkRRMjn(ThCb5zjv)}-J~F3?5;(mt0Mt*n~wrK@%XwX^=`evDzBN|Rb1oIGQJGl
z#i>Y+TySdb|CG)i-`1!+64z@Zp`P{kaLyPx^Q8VOEiUG34%X*LA~o>PliJq)W0#xM
z+alu9^2vzFcT%FiH4O8d|4;Brn}xkUNGiW5(hCZfBC*$W@z1Da9=nlZS&lJjh4Ej?
zD7aD079W(QVXZtE5}KM|DkRy_r`-32sVI%7_iM4xs}Mf<ml``<hbb4u^|s_9O7{E<
z>frp@&N6$vNXX8fMh${@ScYrL_&CNO>0VPFhtLn>Ad4|z+XGE*4bOj6p=2%w3pd2s
zI}g>c<_^f(xQD!=iLxE2B50Ync1cr~5tNF@uow9R70%oacy#R4s-S(@+WI=&y92`r
zkzTkHz|xUUmlk*!qe=6<Qs*<VSvShmIwA&7iC8X$g*CIqMtJnIm==ZnGBD}mUVilO
z9ea-Zl(<X5bad(Kdy-8(C?SuNFu9wJY>f(y5~dxA@=}Mb%`>i1o{5OF2%+x7Rlkf8
z1Eoj-Qjb*myCLSVoOw#KS<*tlgi9hGgEpUi4oY-#G_z$L3D8SXFY}kWNW&L1t;68&
zPF4R=#hhyj&>I~KwQ3P~(Yep!%N!hcSmy0?u}sN_NPyC&wUhkE$-nO<1jh=#g@D}J
zhz@e^cgP7p>uPv#Z*vQC`Ph)A4oN3XY~GzH=S#z{Jp#)Hj*MLbGmO<}1uTnYX&lh)
zHK0`m0cCtN3Q&?321|%&BZf5d1Ka#LE<QS#hj!{7_vXEH;ZQc3JXhSw?zR0j@iYiq
zIfDOjU1IH;qU_}Auf98u5$b7yHW!M*bF-m_UxqDE)k;d!vMdb^mAs0401mP9Xm>1c
z4d&9_J9ZzBgXRcbBURGlScxmqj$-|*g<$LOghquE8xt5b_p^h%>uAS?MS*APlba{c
zPHpEXk9Z>RbN2Y(My5aR{deq`?7mML8W^hPjz8s}ObEUKbTpun6vjl-0~5}Pr!kNg
zmgEG|9TBE<v(K8+0xt)@oZwxxhaQBZO?`IrfmNHbDqpK3qs5ad&BJ@jrzRhz+u)Z3
zU~ThcX>o6~%vLcpJ55lVc)3M0fFFLEAsnC%*(pl0V$6o%d#X>Cx$3UFPu@OpzI|(D
zk6oohLNo9adZ)-RSj*$dLH9uUN)aUvD<nlymAzq;m<B=c3<70hL)uq<Olc1D9QZn3
zZpxfYcr{6@P2nMW%-0XSs_F&>3qBTH&HGy*Mzv4%{>naAVgR?ruLO$(i^K$rIG`uc
zkZ1QS_E8m|kIdwBgVCOo=S9$eaEt$Sz3NlxQX!mQs(r-&qsnelS>JwT<;&HT*F?Gq
zf?eFQoem)1|L=2fw{qbv<sWkL{l)8pXelR0yHk_Ek6C<WP6NtKE9*ygfgdi%xpuUI
zsLyXkYVX`?RVndaSzzxN*_&*e-)n8W9%sewSg$Ev^K={b#z|tR71n=<c8<*l+J0!x
zSgZC5A=hjgRKId*+2c{D$L;yN9+@nfu8F0U?{`j86;Mcgvbk}BsA&PnZTI$EOe&hL
z!VifRK`IIjM^VZ=-dGs9zwNHA4p}t9<17Pj4=*a%<lb#MFN=`+9Wl~34oS2%M{+QE
zy7;3;{?cw;ej{zMP_7ZaQmX?|4A2oRulSMkT~Da5Onj{gJ-&Zsi+g@;Yz@zV6cDPc
zv92(U5hw0$+8lWj>&nM3IsGL@&Mkc_;eedSA;Giuu33P@><{V^a?Rg&sAFz-c}@1s
zHkcXly{5h;Pe^oN!J5J-)VA^MOLkem>>BG6l;lH&G6Ww~)6I3}ymniAEe?O+i!akJ
zJ!RZEBBg(x(|F>9d3~6sX06vWBJvDYUHv_9C-#>8V++rhEW1OWpVdIw+u9cjEC9WB
zP<<B+^Q=+twEOtFM|Qx-KtCwbbr}KjT=GzEV>TFyY`BE<>D%I7e)vX3*W-0l@P+(i
z+aAwF$Hyz&n=f~Ng)Y~B)pc{&Xyoh)@)R;rJn*Zt?_0Xvj3~x!O*aYvXGcvcVYP31
zbk%lypSIHr*nUfk__*6<`;&I~dz`#5R;y?t%3_IkBe7^w(}*-My@m#(12;m6sTdxX
zgMeae<%G1;R}-8GW#tT4a<!tf%zVHgRAW61rd14RHxm2p$Zh*@ld|P2F_v&rTCokS
z3M`5i4?=j#G;k0fA(M02<se7<%kAjMUQu5IGfyEPEa32nOMf(ZaQ02{wohNnxK9$%
zu<Z4oaL*Lew~Q2FphNajZH+~<{)ndCuSytLQB^c_q$&~j>rAN7AJ6I}v&rWt>jO5K
zT&``h4pN*(jWAiz=Ie3bN)~7&uEZ}tVP<9F6TT&QlPw2Yp)WM5ySzVXB!Zi+LK4cR
zlhpv*?24<Ja7tAItZYetP!<wsG*y``Xbo~Sc2)0AVtsORra(-r$?x$wHf<be<~P3X
zgy)-X5H1cl`OrvMVjs~oLKZp5LrtVN%RC*nCNvoryQZ=^l&^W&J};B3@#(nleCuT&
z(qJtyJztS;2dAKDc;E;zYS=H~bMUZVm1z_(GE?*sa`@Gl_boA65A*wdkQd8IBGr~|
zz!3}gr`@Qf>90@B*QV2m8B(Vw<NLr#Bhjtw;-}WRZHaNUIrwOtb+6Oc?X9Y!{I0c#
zhmg>oXcXTUj-{$QqFJRMT+;HepT6s6X6pt_Ha6fmi+Z_>eT<Tg{u00BOfQMp@?W4T
z_RA0^?!gt7ScGZEhtW5|Mvny!=DZ%K?owU5efw-Og|(=<9Zt|E;Nwo#i_^9yDx+r;
zLKLg`9FDwv;Um6=rrll7D?Edk`3gniy`w}O=2EMKBy+}h7`{REu8h*wBsk8^MY&So
zYsX#!eOdfA@{Y_5GeZ=TDtV&U!o156Bx79W=HCFY@g%vubG)bDGG8#L;oadtai8Y`
zXEH?He-%|6;S=f`CrKSu3k|^irt6^pO5fuAyz+d?Ru$|Yj`&qv)vtc$mFSeTl`WXv
z{XK2{q|%S%X!1E%7^as5p_4b&YPK9|>nO}B!+Z_n@iwG1)SVXK4E79R(Q&b3Oa3|$
zZD1+#4?QS#UUw0va-Vi%Sqes$U#?3u_+BcH^;yov!ju#wbmyGE6d6fC%<MXfoWHCv
zg#_`l>^k_zCbN9HyENJH_+<&}Qmt!CQ@VF-*l|&a?ZH&}0}X0mO-tpy<J8(@$C<&(
zr)a@k0nrGIxB2YT3e4B|2o<AlNu~I4J`Gki4{NK{QBwk=yy96xzOH7kNfqH1Y^&Fg
zdqT-;9@1(S#>F)5s01V#8Tk?E9u<eXOPuOQSn|u7;~hl1)BpY?G-iASe9Ke#;u0f7
za;cX`*=Q|ynj<Wbs;-5BY{N*c=&2fnZbVeVL*ia1+Urh@=9$jn);028+xER&He=DY
z;|&=-`I)MFHW@p(xOx0hL*q53wZSjRN4h)Z7&yDIJ2c_hu55Ua&2~iN-5+aBwCAt6
zVx<yz;?<f^lds5M#5x-y#_b=}+9^L9(_`ouIn~UzOo;|9>y$NeNzB!ovPtuOYEu`-
zFsy0NnyAHyq&f(gWq>?Ee{u>`b>oPmZgDw5_RgY14};|jq$D!vk9~~+5m<X@-|l~U
zz21)H6_&mFRKt71_x2K&LUB>8I^$Xi7Hgbz*j`eAtpS@;Y`r|zTekDIp-QF0`;?Y+
z!0fF%vw}PsUvUerEa7qgs5IF>OIGS(_j+vfG$yNubIn9LU#-U(9~-|B-nL0k&EG2U
zQ|GgqpRNfi@kTyxBX%=c|Mu5Nbg`uw1wyXD8$_OxPR=wffpBQMb@f+OX%_W0$MmX5
zP$$sRkNmJzoV73)e_TcS(Z8KtW>C?@wmLEwxGDMpZdFAzWb^_u#RBScuP}9(^dJ+7
zJz|~);Q@~5aoj8M(5C`+OM;?D9stj2o1aSoPxrNwkHL5Q1?A=uBNR7=LfX*j66nVa
zYdg^w&KjZs#eR&+9;BF+X-b@qfBcx7*{7p9f3Z^>F`*G4tL3K&A3O1a^JYFo)Dly3
zmmd&vrcQLm^17DoetE&}$y#fAXt!y#zwAzY+c(R>Zm&DB3Zt*U03MdXW@lm|RGz0h
zhuwERD$9>-hCKMv_OhS$$DFaa!8<+namd~9Un^lD0(K3#Qp=TK(bof62n&Z;)|zjO
z9>IWF+pt04l44Jk+qd1D*Z)x!gj+d4r=@wW4@D*<t~@XL8Gi&VA8`Fg<qMbc|B^{l
zsjcl6y{W3=3Rs&q>{3q$^3F=S4jGNReVFWdM3C1=dKKLb>b!xyGFEbu<ZBiBz4G(f
zx0_Ro#SCa^xvJVD%FGT&xzTa|EDj2&h*7Ah@i41THIs<bBxSBxyU#-Kwz-1!k@di{
z|FZWUfk&#-Jl}-nOxg_xZw_r;tK*ZDaWOP}t>M}+sug*J32@S`JaMS3+(3oNO4cPL
z+f%x);sX0*C&yc1lSF`m=+`Rxx7iP>PZkjdu4d+q9_uy0Zpuh*u<iO9V|Xrqjj9nE
zB~c4ni!wvP4=5b1?Q+V@zipv=x+dO!6XP4NX_(6u8s$}Alzm!}%;lCp{*uX&^KsbY
z7nxOx2iM*!Tc3Y<?_V9OwDnFbFNC$*GXgEUCym3M_f`)?Rvl#@mena05E)sj<oVT(
zqO+G&4_sX@c?@Sw^vmG+Zqu%$AUk{C!gRg#Ge3VuN<(SvuPiyPx+^&CmsHtTELqy(
z<ZsqqM6-#z=GzL*P%`t|wK@)eQ^4R+7)&YoU`^k@Bx;_*_VpV|HH`z!)@gbAa$~H$
zv<(G3;3^>tRZ$Rk0*i`(kA$pMeDLqjLUAsoueL6EeMxPWlV~2so}&ssuPUKM;DH}b
zxWZ=4A8#~a0S|e8v3%l0<j<q{x``^Tr$Va&9={A6o*HDnwli>%vcq|ZT#*a?bG78o
zJ(|qFZ-qXYdU3CVpz(*OGeDCyd!649%*_i@tU258uo2_y?LBc%YOZ0dwo75RA)ZpL
zzym%toJ}5CZd{V_OQNRXu22^QO@^X&DR+ZFE%33|k@&@+glBxjEb2ZSuiYUr;u)=g
zM5+ydDEfKektWCGpdNkXa&}JgQp!rZ9%N%LuX-1c!2rx^mvLJg6RX~;XIe5l)xWDg
zQj6ODx3+s@D(TkGu@j{@ARR&fq2~p9n>&LnD5M=`^02}Lm(|pj9TQ=|2SHCz`jyi#
zY!B+O{pT9>>6;)8NW{o1Q*WVnwO3uJ2eN7RG~ZKtJS_Xz)Lsmfi#IE~mIl6kT>8LO
z*h5uFhD`4O*J&yV`&H`(oJDdMHZ0{Ic;8uIMh%|Uuf1ZCe=e4YuF8i>5V766ttK8{
z=81#aaW2c#J7JNR0_RVv_v&S_#xJvt@9$0;@l7}#aQ7|mRmi_iY4}Ig>E^VU1KtLd
z!5ZVAEC??=ZVy0m_mHK8TI}`s&oq_*t3ibWc{3Dxz|?75mU}J}S`^(*8Tz=)n^Kd!
z!vtCu$C3wJG9wb@gY5e>4V5@$A`B`fj<O&en3o<Pfef#6e#(UEQDL^PZ8%_YN4f03
zQZswF?_Hxdcxvy6sPeWDDcq{7$GK(WBgw=QJGw7>*P9YS=~OUql>VJP9G9D9E>CKe
zg2MDl4Lwv2Ck{mGQws)6_X5}LE=9=?|3<gfKM}0%yN%O%gzggwY(C>qXP)P8&{Oap
zSwfTY$Hxnl@X)GO03A-M!3f-lm-wqM21IxUBOL0VyGm<X92k9fDLxLF4r$y#d7!-Y
z<W)mBsAm7jJ363jf53fEW<Si`{JG-}S&pC&2>H`Nbl1=W#0ngz{w>+V)cAUpxIK<4
z$^Xb6yM>-6U%SZmQ17V9`}s;U<z|fUIwjhqyVLW#h+hENPYV-jdW|}KMdFxxd14T`
zK_Y->FFX^sc$C7o@YX;P4{B;TZ>hXo_OhG9BG~<7V=VL6i+u|9O>no}a{gC-tF@qk
zVn-UwxT&;@&SxOfW`1dprEuZaXHZ0{tpM;93rCr20#<i+e5BKGxJ}a}U#GdlcUGNQ
z>cihAI%S^YC7sRE*K?4&U7-)AjJ^jshh?c|3;)vW<-CoCSAYBX@=RiL+|*Mx+}n#M
z#of5PzD2e^6S|M+3P`d@HD=H)_BI#5ZmbEN%@kb=@y<}zHVAKh+`VMGv7}jW@NDUe
zj!wKw)wVd^+K)2XR(NytkLs5H(;c(Va7C=qwE`qf0hiGTt5b30m9DB8tdmvJl4V6O
zk7?4j*^LykjZ3PVPQKY%wc4bC($hmJXiz^=x9}fT1I%g2GcZ2R*;F;obr=(RIwQ`V
zF3WCzaMom#teYoPmp2#%$T@tse|nO&{{O5f_#X`E=>84=zo{A52>xzl{Z~7m;=z@-
zjL4j`@oJ^cnl+B}n&6d{nC|;vL%LIqyo)K#Z^$(XWu{J>bNpIRsrAt)D_r#GE0Ujt
zI4i2(#1Qj4eIRPFvGXDl1Xqy{(kh4NgELz<za*~8?({S38<19KjTDJ-gACIHdowtf
z`B=7!<lM*uY5btW>nVKr4+A^6`-9+K?iN@Kv$zOrk?69GiuZ>TrlvP7gWY9v!ZP}y
z9^!fcI!Dqb8NWD<<1|I6%A{q!<eylb>4Tn^vzsZ*v(~FQMQF7QHt2;$HH4al<`j)q
zK;xoT7`E0dczCA^(Y5{#WTV4^oug3rLaWMc++(s#4cC?}S(q&O*YlkOYsK+ww*y#}
zJF(j3ZaU<ujakN)MBsx?OJi}vJrvvLqs&iVPFxrtR#ty~ou&ou{sY$0fL*Ms&+jSo
z{hnJ8R@UeW&)?DD;+K6T0q!+l;7&sZ_4h%n9cm_y8Xt{7BK*DbmFpJLD0{w1Q}V0h
zh9C7>jw)-o&7<3ZKliV8vgQ4wvN|Xp6r;eTZ)TJ<4`9MDj4TE2^FJ$1_c4x$=U$Eu
zrd^%$=#}Fr&u7eV4<RBJUV*|FbahqviRfiR&jiJzq{`w@Q8gaSVf8|}lnLi?FeNm)
zbD4Zaw`Q16<4qz3zkv<C&o;lT*4&~>Wx*Wmtm<=ua{5Ggylz?Q?mSFE%PlVS0%ien
z2*Eg~w*~gD@t#2AHG)KHzbuxG)syaFWZq0cqP+CI!TjguAhzjlM{x}=b{<hC@~utA
zU(MVj$~ShVSHk&>vwbVEb&w(2Jh?cb8++TfFMxM={J6j0wsPt9H$Lus^n)<I$ol*P
z$XCfmLNfEE$z-yW{x-(ere>NaOJ868ubj#~{`ZKpK>mKQ<J(dxE$>Yf#&Rl-tWC|T
zBkk9Zv<rxZ^EkIIJQu4!oUEZR;~lI?KVlq@{8_Ln?qhq*jkGaJdUd$xXl34EU@KkF
zRh=biJdX0xrvv*ciI{bTMZol96&jFc8r}72hYkqNgQ&R=-=$HD38e?CJcNjzRI*MA
z3}jp$@iUFmB=UtBIn%+vBe+JlVn0!ong7nfdfO&>mx2jO@{%eNPnYYXriy{p**Wt<
zIQ1j+T;es72#hd~2~=+}Qohj!t!_Oalq&+id}8y>xF&!@<|<R*7F(La1JEo%E%h0|
z0k=>va=46YGrhXA_=3_iD}N`#%QkwFLHxf3ySu}4E9~e|Ow0HqMKw7ZYE{Hpo+75G
z7$u7|Jq4r<qN8=9<sz~i7KBW?>h^lMC&m>k;{9@PBWqo1t=`XCw$FZ5Ki;|Hvh6bT
zF<t9=4EW@pf=lv*v3y*wwD3K~&WgODL{z?sD|UL{33njB(0;DU-YvJb-Y0F337n{6
zoScrH*mShGeSYItsHJV*l!4e|>}0Q!30=@r$d8q6ZhP$ap47N{4>@%{U(zas7P_W|
zqQ@Bv*A$-*+up```4=6oQu@s)t8+jP6cZh3y6=*}KMF&>Z9;&+{Tq9bH(NtPD52t;
z-id!wOl<lMRCOu=>fLdHw<m%F2y#yX_!f?LRLzy*<y$*1!W6J@n#0y*-4O|9BYvWP
zb>8Yk)vS}BFNfwgSc<_^^!r?y1gy0l-<QDM5!cp<95zUn<7X$`!nB1xRwWIR#=3*r
ze5c`n6OLyOAdO?&Ivm^`C~qBKtMwl_4QV{TQ?C=euyiuyJDL(+9F1YtQ2GyJM#r5%
zHsCs(ppOOWWlE8$6=z9>=(k<v(=F;E?Szc5<KX+nWOdq@Ae{ot1byAlB_|L5T8Tzn
zbNN2=oc=jO)<S2iN5#;&Y61M*YA4~Q+$@A_yo#77XgKS6u3iEvzH%oUWZklE3KuT=
zoM_NHOjS=Lk6m6T6+H=gr=}W_`<gzrCVw!_xLn^KU$O@ra#-boGGMTdpaJM<Dj%Ee
z$(>cwUJnm&U_h-flLBn0Y6lm%x<cGn!4)4Wa%zeKd(UrPR4ZTi_m!yy$Isu~35(%-
zVe~stTFh=@anOMf%}K&Ih_;Cfa0@4^PPeAx=s%;*GZg2}*Po^~mWL+7ANX3>sp*7?
z?tx_7qMSzh1Sh&>m(Z(8>I~MbJr7&wpU}jN=0aV-+Z*|AhJ>@|4zB^#-LB3Jy1mN)
zp{3Yg|AS5Ge`4SI-+pqXa4o{{KfOo5Il7heCZ*v%X9}PqPV0-_l7J$2Q&F0y$t{)|
zX_lPHanbej2H1*9K)&Q>4(@EL`^Et$&qKwuZ}&fozaXXRcP$$>fVy={wsi9-`?+AE
z66*Y0U<?G8*sp*0vzEOcG+|`mOl&V;DFBu4c&2`v*A>`u&<W9=mFM7ya~WG4{j_li
zo@#msoH(7HwZ=|2P^i>#&-I1;u-jFwWyJL$J%?KEgtS<*1gp6#agdTVSBNRK(haIV
z3m(O_MYDqN#*y*;Y3#b)>^$Gf-Z=MxSGi}|-^#i=+cq<b?PN1+ZuXOw@v&K}hV23*
z!mR_wb+6f#{oT&!k0^^Kvcuwy#%?Ok4+=_rXP|wtCr>IxoiiS^O__&w>+TpZ-Z8cA
zPR_}YVW;*h^4BGiHAQ{pJox*5A%H_+jX@CaNBbY<A&i0p{cgpDJEK<y^Dz2ySamde
znyaQ8NzRWBCNx=tq#2L;M>SS5GU&0Mr=79aN{(GP3a_`FchG1z6(=(^ijfm1l^i^j
z!p@jmcO>JIZ4#|w0EIX}-*6bX>}=yJ&&Cn+jpd+bH~&-22UUMn-`x(&wYp&}jM7je
zAf)r<p$i7gWfQI9r7!Qdn}E4&_I4tPKw5o7XrGdL($V*JX4u3N?6iX~2Yc7@D8b{H
zwr;^jp$B?M9rmwHPg65PF=}WYIiJz6K(^!JPB-<U<DA{SmjheVN_8pWP4hlj>f865
zvsMQMWWM!)|KpCM@99yBGzx~*+si4LD%&?8`lKf7@_FpzBazctclG2B<0I>iEhcLY
z<@?zdm4v`Prs`p>HZ_SilzpWHCxr+X$wzB8a{o=PGYVjOVPg4qc~!7_26*f`^Ty`c
z4P^3G2EuEembN%#=0<|Ld{x||85Q!Y#CwiB%;B5aof2~wSRJ5963{^SJx#G$jx$BH
z9!Y(oQ0jf>nY~x4eCdxLyE&NKy}2x1G7Xp@gFzQ<x63Q!$W^n%CGUDQc7gVRg>-?O
z6(B4`!x^P^QWFrZf_3eDB&Te>;S7y=1T@XzElgby9SaI+x}n6kv#lEYa)&E)<NP^h
z3BI<6h&$3rxP$M@%Ii<L055g%qyyi*Ulbuu;Lk)}wme+aYBq1ghIvE>FN!b^Jw
z=Oeeeb-C%_jb{i-Q*xU(yYCWXD?Aw~SqOHDDL3#}kqBkJrdhG@`bKaLiuszFOqo`g
ztiI>q^I6EwMM&t#m<S(L;Gy;PJ9h%Nl(nD7ie8Q$1-&JBdx;WGcqt0jQvp#2nBV{f
zHjLSRm40@%cX>9Uf|}p^024|Ubd2Vm-CFIMo;_JZ2>ALdYmj7-GfE{t?*+AS3h#7=
z04Umf+vVc{|L0Z_CPjE0xL-5OZte99<i2Crr>@PxYI0J{HIbZkOLDdGBOYx2*J5y#
zKR%xyXBshDH#iM368KND{_Ft<FEC-*uu%}Yq$@GS^o(7v>SF4-AZpriG!5tW8{afB
zMAA%AQf7Ce_dZ;|M&Nw#M^wc9$nwu!Cv8m?pw$72J^zn;*W=CeSvfNL<8HdQFmajj
zXU5hJgRP-FfP=y)8PZ(0St9V28)Xz)H&T(eo~Y$`aHNFr&(2c3iEbaHIgBkSQB<4v
zOHrMBv#Q`6&ZtwoWv?{dM9_R-VVk!;$uaTQi?%{9PI2;eG&)oNGvA?E*qomY_s&xX
z_)}_{p6qQ(`xGSGLB&^|?;bIK>nzdb0KYs5H?ZXgniUjdHioL|4#!lk9Y3Rr@2_6r
zN+L#pb>-Xya>H8XRZ2=v@CGAGz}YnGy+(2*eyQ8U|MkT$kCOS7PG9xV?_C$8A@$F4
znoHfjVcq6gm0l{pAZ45J#~39()^D>scOlbqES4nfo>!)L+NU$ht0D^W;S@*oKdPoB
z9~0NJF~|EmLBdOi6am^+#|WZ4&ta`Dd)^lcx$`S6Wkf7F+0U=i(SMp4)&|fHns+0+
z%`=X-E<GhnBD9#EP^s7sGPOLy92E@~yyX&+pgB($YJ=Q&i7q(T4$r|f$mwH1f|^3E
zuC5?zf`*&wjO=VO{h^)3u?ofZNm6Xxy5EW|_Vr=wB!Xm@O((@syjh-0i3_~((~4cJ
zvVk}7yG2|0CZ=QD24FMUFLJV<<2d;?U_JtHBr7ZV$oy%<j5G{klTZlCy0b<tz1S){
zf3ByY%-gr-7Ptwb+=-<HK)VTiAk<Us1o;%0ZVo2U<Y5F_V}bF@7wgcjYA2fcsqC!Q
zBs#%hFIWP_*x?DZ#}TZHfv%Tgfy5?!Ost1;*3QbI|A@b;@KfN2gYB<mO4nuK;DW&O
z;^5RZ(3}-RN?SG~-?O*+icSv?`>xHI3U+`{mTmHVZfp`DcQG!Lo~MiHCHawfeOsj5
zpU8&UCO=og9$4!1rDchf(EGB)xQ_D3TpebPcy5=F`O)%U={L0|R=af<;|68GyXRdc
z$Ihlc+qOjM_Ud%~YSgmXeWa`L6htcB>Zl4Wv8IMh`O0k*hm52gM-V32aq6CmO+R<A
zas(1VL$*hZ{5eu9RFN4()<7S{Mf=*z00}diP0Q07Cpt0<=U`<Az06N-`F;LdIVR{~
z2YjHaKQ}Ss$Vb<q4EStyJ2Z&u9D!r;ct+9onf|iT%o{9)wXuDxsgGiO@;+wX2lPr8
zB^mN~tIhuHjp7IMvP@P}e+b9%kBlC85m*)qVA0PVdZiT-+2dj8r*I>RUp-~nGNyZv
z6%$43iNMPSArwxO?}?1~PsOCMdDT_uI1)VE`)leY`P%z$85*r#G9Q1+pIjYT$A35g
zD83n<uYUJlYG`IP1IeDcWsA(Hkmp+02HsuKA(yvJabIAJNejgU!a<=jTfO-`!UPnO
zG>amTG^JAX6`P2&cf@xpg@Pqc={=#iPwmS|fls=mYH*<)E%8u&r{pN~kxI&-JH?vt
zkBUR;1^p3lN9F56`0Y#NBmLG<xei14^5B_Hm5A<DHbs_i9M)ssjQ*CW0Ppc&oG7eY
zk@lBTOZ<Xk@G=UTJX7IMtjuUMU?u`qN-%2)q9Jz$Xgq2=c?2}cDK+4jrQ<q13M&q2
zhLvq9nHWl?_!+M)T$H#^Gl^X)9lKH5>sM|ryr>^Kf9cqxA1l(ET|&(6gPhkb+@gBI
zUG=wyYmhz6jkP6ImcPeYT4ia|ULq^9b+AdpO|4;TvK)ik%EqgA+00yj6)j*P;VA!K
zX({6D4zE*Khc!!gY!&{Ci?<NI0c!|aHH5qT`bUL_wEi!Yy=7R_fBg21fheM=fP{n!
z2xEkF3Kp>;BSv?_z|oxw0@6CzkQT>)F&ajPl0RBvAfsc%=mrU|{qHCD{op>1>$;xr
z*pAir`+2|5^L(B39K9Uhcahmq!v+(sIes67gQNq+Z%ndekiVL=iM~g0M=ltG%oWD3
z;EravcCUR99SZ-odzad}5QzT<HoW!vtP}VEeb*nx!8b62ia*u-Z-4uCkXkEXAV@N<
zb8fA~75;S~((_eYNI*p|c?=5?m^rS-sH>X`uXX({epX!<3B;+#2CinliZPKz*HtUK
z=phW*45yCo4x%`G=C&?qGXwlSV}E05tB5*!00^Zh!dTHy3gI3A<Sj`P-)E}=S~<zd
z#7H_?KPDbAtqnw|No?|;$;B|Z2=PoN9K&Dq^A8z##rd{ah(`10#<x6k6T8CK_oOV~
z{vLF>388PSsJH8DV1AUMPgt9t*efxpZ`#;?aX7)>HD%@+94K$xRzW*=Lznm4mz%I_
zo(K~pwJJ|y(3SY^X^?#WkT<9~S|obr%*4zBW*+m9DCY^nbsvWq!%m7B?)>@r>hiY?
z@k;0G1hKnnOLm#HtTYnI#?uYC+xP=YVf+Zr9eif^>0<O~-FbpiQ$MRwZ-GC2>>t!@
zBTsU<c7LzL<7W4&zx7n)AtjSs7%{BVXo>gn@t`cOg!?ylPKEqGk$Q{FNWQpnRImAN
z^M#xfAywZwtWFm`O!Y>5?kRwEX{kW_0~HI6Qai*krar(CKO|G9B)@SsT{Fb%3ET%&
zZ=sq{(}C-Sl_U0;Blo2`S`bfC_!Q!e7cetS^0BC(2RQ5J)k$dl7U@U)@ZLzbd{cA4
zgs~db*A(aPKaqQ>F$*aU8i48%(AE00&vibOuiWpv!IIR#!p$uo?orpXP>=1Xs?t9_
zT&;0g*D%i%3GC(bI%P@^;N8hPt~@Eloo<xkgigfgXBu`N`fZo=kHybFv+O+lif$qN
z=CmGpY6&)}mcp+CUea&9v8`~o**7-4S?_h~v{1dI;i0o%aG)`3Zc=^veyj`kvDZPb
z6RH!h=)5mkG?O@8KTRa&GmZVth7fzd`$=nrzuqB5)E8I(8N2z)?}6s&ny-X^+U0J{
z7D6WWKais5531h%yO}}WM%JQ|yggeZ0kXp7`OJ2Zha;)0o8J9EK%g+5org(K!A3T3
z0exg6vW<6;Jv?%o)F*<6R`Tj<n=^VnB~rxSNdc>Qdp^8TzV23mZj19TCsr7DWgna!
zccG@8G5~E~U#X<EC<LfqHhW@?dL6+(Je|Q_nInlJ3S@O6*v0!%P$i;8K+}=G*F3rq
z4vjo?>Mj9!{!I+8;&^$Bug2LqJ>oWlA=QZKC#sF-D!3bYUo)6vWP}b(g6v`T?#nf8
zD~;4`sq2{r5tjoU-QA5L=JTJ@;InDPfY*LT&;C)ZTnN-gPfk~k{#9uB>?v3;YGLS?
zCa)67m)0Vk%;rU2!=NE<Ik(AVv%MYTsBk>)eZPw}0sbys*93L;rsCpH{#!4(zM&7I
zR+vV8_vec5BXqe#US#{}W!p^=-0tUR4o68U3p{MK_abZ$9)b`3MO+gfl+!clO|-g2
zohe+d2rshV5cu6LYsm|IgDAp$Zta=oF!st3h!0M?mrti4c$$pa&I(>=x#?%odBkkY
zm2fVf?~HOQjEyxjXRK)+>2rAANX&K5tp~nruVM>Hi7l9-u~XHS3{g&7`t}~0bzAh8
zKxH^gr$Z}~9S+DnY#YI9SUboB4o;3?HrDFEPDri=^}~VNyEHMi+|TnfcM|VWf*(@y
z<Gt>UwZ;c_&cmG;zVJ-iG!;L73h<A9x;{bZ=P4g01zu6}IY`mO{6uM?=8WCWc(yf5
zydlt}&H`oPO1o}lTfg_;!XM`yiz(A{9LJD@2=nr36!pTYMi+na;s5YV#5U~SmP>4J
z8!cAlnDJ~vGM=T6%wmN@EVk+CL(*jJ<s#^6I<~m-l|cf|aWbyAsBhH;HlAcLW48d(
z-q8Cggf`toaqOWT6N~yv>Ak55O)SwbbWt_zFp<VD^V+j??4S#gD=wF)+eox8fB%ol
zZmbx2UY)Yx%Um$|dg`Ydyd`U4hcW*%lC6q7FHXz69Cx}ijCODy$*wOPS=QH(9V$3z
ztkc}cx<B}QA`)sKzta%=oOPVuT1dejN;}m5+&gz+ldemk9bh9CGYkQX+>gr9zcqt2
z)(+ZR_3EAF^wXyzzUT{O40`r4vuI;TNWnSpC9WYP$F)&tw@Yv=?3OLn_3F&jv@(y*
zdCdE_=E8^}>qVB%B5&isM|<NWgGTIL!zP5h0ufHR<#4E2oX|G-!E<7AV#u45`*EMs
zi_KW=q+id7KJAcPZXdGs6P5z?-$9~_+qeCir7s6ZpGF={RGY3zznqv_tD*X|Cn7}J
z%kv*czbZFPfRL`>@{ms62qbOR$Z5^dATBm<diC#UoY#bOQHbV?d4>I`VuiL_Y~N&a
zSFLx3^~CB&#|klm8nO_UPsfn-<VW=t6bRD?5lqdaJhw1OR)ki;jB!FaB|OQS`<Ci@
z($<t3|M#WhK$+gR&C(%i4PB`R8O;I}6GVV;Jon<dfTxS;aQtE1CYNJMfY)~KYvz`w
zXL)1Az>@c%ptmV>mLvB${#2im?po5R$k?B5*>dPj75UG5CZ-p%I51+2@RauI?Fx0U
zyV-lpth0*wGd>1>tV*)!xZf;teOCjNuU&KAucM*l<D-&5fk|Y(x*YNwl(%KzS^98q
zDXeHxhkw+4|H{3MI8c!&OQ9|k7Q>;)Lf&>bFRtDy{jof26eLE<4{Kj=svAt#ugl&1
zvDy10pTk0+%0z!DtRlGqr&cWSU(HkAU!U)csGU7cR1)4iyPopyRuBJFc%jO7oI$GW
zXekg>3upp#$bv(%?bEiVN+{BZFT+SYyU<NxRgH$;w5pN6MaG*r&#dg}_=6SKX_3rV
zE0(KjWa^D`u^q88lLuF>QAfIuAWm)>3EYn%`S>^;<m96>l}mBWO@bS4`k6-N+OGO3
zcpSaYz+bC1-mU2%#dZ8PXGH=sFJ0k-smtC&m0xuY_OPY*UXh|HiQGI?sXXW9aSVop
zc4++C9=56~Ledc!s&Irnnl>rQuJ2N%0eC`iUe=}k=7n*%QH{-;O>8DSTVq5c@%*Z)
zgsF$nBlyp3{DALF-_5ffER|ZWrBzwro#LwTOnqEd;_+k%nN2%yt16>uQhq%zd%?8F
zJd<6*@fY|$B|0)*Y_33X%5A2UhsnH{va?4R3=b_6YCin%|F749!N&XWmqTp^LY&=g
zvG41DbG(H+V(eu^Lbr~u%iY7g>>bJqCZc&Xf2R{rRcciHo=`M2L~B~E$NgT24N)^z
z4mlKP@i)0UE}oIQ-AfBT_R%qD^!||`Z&q}r26k76Y}j}K_K^oYBsvOL`_o)d&bLZB
zHXO_dh@MMkA%E@)nBmCJ80K9ZeQf4DIiGv#1H$#D`?WIivhpgtFcR_cD|D*1Gg5@R
zeN)}i(m9iKr-fD*o9`vX`g=ToYH0LJ?7b_VoV3CxEiJTXR4V_P;hyPYBvrlnxo_O4
zv9^bK?=8qtJsJc8H)hsUO~M*-|CB7Oumzn2jErGq)Yn<@j-`0>hDNv1>2xRId+KT!
z+k`hp!&Q%}uPi>FDF-mDF;w|({9J^0=0-1@Ngc_C@h?$s7r#Kd2mHZSCBb$qlWBQR
zrSh5JYgQKo4I`5*w1(@pL|s)*`$=3LmjPN&53MtVO?lP$7T9~4z2NVlJ)o{q2EKID
z=n3RxXgukdJGLlu^2N=^pQd%vh&m<S4d~6?b~bwn|JA>)&PV$e{Oe&;Uv~U1J0L#6
zsx37Ocq@%U7<`ZkQ@^64KPI`>4~|3gdR=6)G=PYu#Ke^LW@fjPSU=`Z3ksDqZM!%w
z4;{>wF0v-yn0{TxI39<!UXjVSU+e7dIw(EzyU0<z|0h?=>F-lxzYm`hj@w6Qnnwxy
zi3jT@WiAKp-6TP6xYo6}yUs^~^us2S{Y|Lb3E06~AUvC2V_tfsk#N><^MeeGutP&8
zd26zwK`B)xan<MKfwyP|chuj#Owz4oXaXOzvHT=;>fVKUE_`P=stkpdDE1GDT_qmH
zFFcYtyh9JIBnV=zAQpNSWJj($EdTN4_7NZ#-F>C?hUvmWsK26b@eZB2K|-4iWsP0<
zEugFb`4^;I_u6bxB2DRTUqw+bfBPU!nq|2RyN1+g!2$)KSVgWw=$N~^$fXfhGp_uf
zu&=oO5&nPWPF9}i2MP$eE_SCuo`jxgHvXxBt#*pDSY34PE`*BL)Ou;uIs@|z&C(&Y
zG+6->GI4#2Dqf8Ms8B*&$u^860Em(y!@(PP5QOT@nK4Dj-RzG3H|@*}C%*Gjaj2Xu
zij}0JFg^UaAmY_`;8pgpn{W-3+J46magnqZjBO+L8)^t^;=NpQAfDe&VGIW2Rq)1x
z@`@uNV*jYJDO5U<?%JsMT^4du&xyTG96Lt+eg=Qj2nnfBCozf%=H%_v8Xkovtt6&6
zdHPAuJ4BTPK^M(5;EzA0+U~P%;e{Z($4W`AoErTX7k&B38hhf|GoZV}&@~$au|aqD
zpLSl*B-3cj#;R6siqO(Qgq&MqYg!zIN}}jk7nDu#n2NnE=mWQyN%C+@@-CMocW>e-
zW#Z|^XWe-hr^A)*=VK?`yOg&`>Ch~(9XeQ)jyS(^y#rnq2=W;$5RjelEYf`VVA<Ck
zHT1pe1IgX<*=hPAglpdP;p-;6O1VE$O~g4Stv=joIFc2-mBBRL^o%5e|F6zE#ID^`
zJ7xg0VWVMH@z?d(P-f5w6R3G_erq7#MA-4oyiQ7&2w<ej#N@WW!!$Lf>V9g;_SUi*
zym&#c4>4`lqc1ZA6-s%JT=LIkUISWFpm;ra{}D7e*?&a<p%oM7>zcahSmWzhd@$6z
zwANZ8h8K_t@cy5^bg&l|iZmKcQNH;~nlBj6IJxG0)=a;sM7%rD36jW;tp4hp7L&x7
z#oJQjnL8;~Ee(?rekz^Ij;wEd+Nyj79rhj{;-9d{ZtraFA4+SL^<<(m&%ykGVStbR
zGWwRx8A#azRcxe^H6T#vB6i`xx3h99MJK_o)3A0P286Kc-RfHR7S7E1jAatB@utjS
z{Pjq@6Eyc?_54B-RlX3f>@C7bPD8ceOr`&;=zTHtvX4Fqey^wFrp!Mo)vCyw0bHpO
ztgbX8-eXGg?PLA!TLX}y$^09C-`FwzfJlr!*;-?D4D=fS8k$Ov%7oIq0sMSr6e!av
z$>&l5pMk;}z(s|G<7)8gapIjIx|1j@vADk_V)kvBrNw6bV#oK1Z$JP0f?wys_{?pK
zK7UeKyU28;mBm2M1Wrdo?x|1Z{vhD%NeSb=L%A1T9_PnQ@z~D=BpvTvNuK8E<(U?{
z8lGXeQ3vsOLJD0e{ZZle{N~z1OI$PUh|8-<-pCIr=Dbbum52n1eV)e&uUxJyvMlKi
zO6J_=U;nSPhYbC6R@1#b8sJ5$PE>a8dGevJT=<}jB~|&NZlnO)Bz05~FQERzAgTIz
zh+9s5@I!Uaf6cNd;l=N6+#G3qEV!AvCw()oc1o&TLG71MIO4|LjWM{60VzkXQ`7Ht
z(Ab1h$a7y~dEQCGsmU`2_Tqsu33Eiq<b91bCTEZiR${u;nK-##o-SOo<L0p87G(5o
z?iTx-d3+p0+npLtXi|3V&!xD`aHVIl%+c;C!kkW9l>fhi?YM@d%V20Ag;`Up4#f6T
zLzz=xMY=qb^^mnD^uwZUm$X0sWl<KE?HQ9~rF%uO4%f7HjuL&Sv_EhZNisZPr|$Wn
z3jHEAHq>b?0f|ElpC@RV93fE+#DO7vR$SDF?8Zd{YA9;kxF=0x-D?xRdEVdjaFg{x
zbEL<61?aV<i)ko`;1w48lW6N#`eY__;)$ULcySpYqvO+vwjC=&RBn6aA}Yp?&6tIk
zJzB>bV%xA57E{*zW8e#aGq497hABy#SbeKh_0_0&<~C{I-54|KxUi6hjxqqCjGkzB
zYYV&u@hx44mwa9@850(Zsh&AZ3)b;%nrx9wsz%j&bj-t)iMb9MGu?WhzGLzd_G4mQ
z9eOa;h@m7Czv`pOW3h6Sm{g;*=xITZ=kc&|Xro*Pu-eU5ccFCMw1xd)=H@!S$+frv
z2uvLW#vSHM_aRGfY;$7wEu6P^O}wXyJ@ZdQ+J$)oVG2svq~uYrl)dp#V7;@?#bl2A
zJ~i#l|5)cl663qY;yPVgcy<gsiWPilUFoAgp<3D_&@yX9POVzgGUP!U>MZw%8uYmO
zuYF|?k9Sz(m5QxM91@9=`4IQSHW^8t)Ax&LQArMlc4J0og)pW2YxlsFzY5prx(ojF
zGDN9IR!5I55d9wArLZ;<8Dm%t<<6U5ILVHgfVKLb4=ubawn%G!m|>HulXu`*{HYPP
zpsRP9ALT{<M>T@A_;$4Otuaxu_!j9;>Q1M}#u&pR9ca&b3ke;EU&XdWqIh*N6*_8X
z|EP{AWU{ctbNv%tlrdM_mT-vZx!gyah~mRXKNX*xxKI;g>g{X1R_pujz0H8tpGZTP
zj{}O{Ca26*x?Jm)LBzicrCWfFv;&l!BTx8l1s52!^&&8``K5w6;2)56B91E<wOayj
zJ6t23t4CzO<W&-YMq0(18>2a9vK<|3g-By~lbMkAS<{ZmQTT}_<qkjz@$P&H65rL!
zs(7r+RvoSQo}#Ru&R&|~`){n<=j7Ka@C-5-o5F?Q?gDh<3#WM+-<AlZsIK`!ZA21S
z!#rd2ypnh{j@;`OGOkKeGVa|sy$e&3Ec2e%23eriGlkEaY=~~lv6HEiydtnt!Gr%(
zt(fZj|M!ts0r_eM!v0F$s)Aj=L!*D5e|9m|7fX1+PO!tBJjQ3tI^v{V_IzT<hbqZy
z1L!LK$(kSLrI%bo+ngcG{w|N`+|bB44~lW#y?gFxQQ@w;1$TUxL1#Yzc72;oG=w~^
zD)eHFl2pS1SCK>HAvBx1vcHLucfdw<h599@e!22^9jP+m++p_g;PKv+t@NEH(oxn}
zO>awH%A{|D-1oocW17Jt(+7luk+)Ug%T1%aFe1Rk#V+#Z<M`z)7)foeCQ@>mrxLuR
z^0hP7*Vl?ogwK)A$w@{hj;xD_v*LseXz;%h@$c~n02)nK5AVUz0a`e7jGP2u6suE2
zkkegzttX&|mE~pgTKgpn_DpGD;qTE2;1&6B9(+*}I+rsxrsd3e^@O45%~g}=jYjod
z%R)&^*6$(HwaZ)kuVS{geQZMSDL{?iw&$gC^!KEGKM#+oJ<inowS-60vM0#I5&t%<
zZR{`)<z-&h6v8o4P}eM_)s&&_)ld^A1EGZ$CNVOS*JQ$4406SE*06ow*vmcW9kw}0
zinAXq`#Mwb?ILV39~_4iXo)nK5!h92zhl1ma>uSZA8R(N&fAH__6rU>F~@s9k!C}c
zVyC*fCrkz=bJMoS_6>FLS#jC>H4A<j!KUwKn6KuIrZ1<#$1-P|WJg_eVv^tk7?u2I
zOQe3j?a+b~({S#2_WA_pcKnW^0E@93`s2@$d<cu6gySm#LxqNGuSFA{Ht}QF41*My
zXhrdLHrrgfA%oc!s-G3iN`Ap|R&k52OvG=b=FyT@H9Jb-3T@eP%SVP;(hYjNKVYB6
zk01FO8@5JdHDx45>>85E5(O6y_XE4nW!}r<N68v_Xc1CIDIm1BQ2RKsHOTDmD-!z)
zq;9)?YI3d3<C4NRx5ILrrl9Q!TzgxQYz*;gzg>pX^46>Xo}pC>yug<i6DKrhoPKZS
zbI0nBaBP123lAPUjN@<Xu|cA^DW?VbydKU+RN^&fuUDs>#Q1s5v_2;tLpbKX>0p^5
zQRBNqRTyJ;_1lqYp?jO^)A_uCfyO0JpNlCyV#=x6Lf0_xbL|*QT>ce+Tj|ZZ+Ae`#
zV@Z9UGRj1;0nyPZ0qcxrk;30cG6JpvI=-pLQknC;f$!CQl>1Kal<i-AzVboERzUO4
zGu0n-<Ljrcbn|76cz3upZgtdR2Y@eh65^8up6dx$4d3=W^<>S^FZI)EqBSuKeW2c-
zzm*UFabdo(Mny#>cjaS(3}+iv$hE!Fd=L*pU;Qnr+s6dHcFZaI21@X1de{S3H5`5K
zRjfFnK4n2R@Hvuldwd<0woEp(jDHD5Y>A%Oxwl)poq%;pV@g%;sNCxDb|yryxr5B=
zAh~Ep1Y!E`q-QDhhc0`=G|HDw6VtT_1gRu2(?sE7+6hT`q#WZDxk`9d)1QWEF7cD>
zqtYD>oT-r?4S1}X$LT%;OWp7jIoC+0|9;x<O68RqO}DayS*U5KQXGASf|I@>p!O^&
zw-D7$fF*fsP}Hqncjx{;DhHk89~tFPysR=rDy0INg)Qio2$?|m(X<oL>I@y<&TGP-
zC&gr!^*IIm$|t)t{aO0Ps@pzKmh?h@Q>Ss*-J2Lt<iG`iPcC+EsNgTB@jG|30kmlb
zAKv#5l;|Zqls(rddgbl4$NrD1X!xLCFf14D!q#$cK*4n}?r&wQ+vQ!>_X3K>ZohtK
z@p1410-FzzpgN<8J)QjiA>h=7(v@FX3~ry8LO1HqjpCx31VbMh=!zW-Hz3(lM2e%@
zI@v6eI1>6Kkf*3}j_;TKa@9DmwC4O=3#YW$=RRZS&RA2jT!QJ<4&E`|`uY#-fC*>n
zM%5n$f;kFtN|DX{?!n42eFgUS;wD2T<vKNQCi4l~BL%knYCBw#0V*8a*5Vdy9_M#x
zQszAAglTRhh4sofP`{xGCmE}zWtq{vpe;6AH^zgr`=*<=gN9lh3g+&oU+5F3`2SHs
z^NIeoi`QM#drT7sxMhA`t8@MBk&VvO^vH9~)<y%B>-|F>=q(i?mMGKA<C3DLLsp~Q
zQ5_#8KwqPIrjaK38yg{`g|sYwZ{GKOD)|EK+&v)OuuKS?3Afp?`E~T+RwO#$B`bs3
zXPXz7n?-KVS(hkEvrfoiNW_O7@h~?h#o06Hb3~1FP3IYlG>M%<K?2d-*gUL1QS%58
zlmKi`c-k(#F+)#>uc>T11K}uHkH*63gC<n^`+=;<`R#C_mwSy$`3)jZwz!>9=PSP3
z2mh$NR+tX`EbhoK+)SyQ^n<KHPWM`Qm{eH_nJJvkp+i8hHnK{TE22iZbrB%D;@
zLdfo6dz5gi)PU=PQoJI=Inz!gW-;_Lb2Ix+eq>cauZF)r9KK>Yb^n{hh1k&3z<y0=
zo!f&E&GH13)by=RM)`3{RfY-QN(9{vWt55sOhuyNT+C@;tuFB4a4dNfo+3G-5TxRr
z=$^bQixL9~t;7^v>K{eR=Uxt`lMEd8F=s|0lQU8mdD|uDkLGZ)jkk8JDS*)-E45AU
zSyrBRuPshmmuH{KgWQ`Z+w}WTLpc_ks!bWEZXu*!Gpuu1a2Aj?W$psADD#F~8)B@f
zf(f;=6OPd)NcwCclmH7QFEUwlbKWpp=<&-229!XJXMnTjn>EXtnxvCNRHOJD5*6qy
z|3Z4RCWWrN&6T<kBvB!CQ}1O&BF%NWO%8CLd5hQ2rCK^knLVrLSYBT~m3V|fgX>qv
zG=pPr0?aG%mpEm&%^aId)@|3*(u6pTkg9tIQ=grgc~uvkjm%qYtmLgNjGOG{W!nV?
z?nzdEwhRk&hFnS9w6SDn0G!5IetWXLyg5}KcR6sJqfQ;>x~ZG>FkF9Z%~N~Ataw~n
zT`H)d!+Kp_^}ab$5snOq@eF~1yA}URqmzue$=}YozrXudJLHw8O#3u;L(AXs1{Iac
z*DV3jFqIpM+P@@s4QJTr8$A*2wycUz22)3q_coLyCTGr*7wv$~_SyO)C&3C*c4=A_
zm~V@KJ=|;2c7l71iSfSg*tVWaG_M_t!-@A?Z-wqj2<5OI(!XM?HbM4P=BFxWor|R6
zR1voT#O+kJof{`%+hk~kYJ9_w{>Yn;!+vK3uX}GD`Mx{m-wl1BU1gQFAYj(PYl6Td
zhJxID^&1FHazPM9qlMDzC!Afu&1uIgdXGOcN^6@MPi`$9cls?^8#IFW6)fiy4p0BK
zsB+%vo2^0{$o9-#ayi6CJ{2i#i)|y|_^S>Y7iYFMHd10)m#_X_`nl)?KW7~x&HDew
z^<W+*ojh72c${w+oijF!Zt*mj`2c$(S*~7AEF?(qJ!Lsh`Fex?%29md!%iQAh;J8<
zIM`gW)U&Pf?_6XlQOk*AOTHjYUl?^X<c?w<Qot@uE@o~sKs~HtP#d*HW<#eJu<&b?
z=5*_i_>yDrx`t^giMaAJ-r+s=S*&Y8y9W=7@V&v{+IGlVHHf>?j^k>r4s(Qz@%f_U
zj%wmw3rrtnbh!HJ+(=&-1uzQiK4T3w$JfF8S#;6^*Q=T*fxGI0IuI1hgj!o-d`!=Y
zKK-(}D@~Ya3~@KmpTKhgl;|ZxX{Jsu*+zpj=Ww-4yv*QH$vXgbVa@HTsglem!eAt#
zI~m*6g9$s2XYWezQ+lwd%8+4u2<UIIfvy{}bZV>zFr^R*A>;vQ&a*@3RIu8`U3Rk+
z_ZuA0fgK~R2lM$%|ERd>p3#fqczd1O*h=H_y&bq5K1Ts#^));>I#YHj2b%~wcQX8Y
z(903+aUjmPIjT7M)z3uTVckFPR#PeAsdz4)#2GWd>G~mE)Y`2MR?m3Bb(+Y2tPr0L
zM*3671aR$hPXCQ*%9WJOAVATKL;<`;X0bSQkG2Yqf7<m)5$MGgjR5te#zp$rIyz+K
zFV{sYpvU~V$(rWBm7KDpj)zrgVmA)AqmJMImq46G&#qe4UaXw+$Y~Obe#Di{Z!7-y
zIlivvgL24Z+l-7G#GB2n!y0H?n>r!0h#rjcsCj43MbyrJNF*H%h+hUXxScP`?1;oV
z0u1IiAO;=Z)Ucp8g6&<mQ}K<Kdl_witZ{nU2JWqe=L6(yc_;0^*EGft;(dCA)L#**
zE-ciMz8wWH-?8Jd5wu+;`%$Io+k1#B9=b_g6_*WR9rK>F6#jPme0g#%nt!xSAN?5v
zPx1!H<`L3BHE<gToDxSDX(B-{I*UQ#=c31yp0rc#`w+Hj`Ukn0Bx-OY04&v9EFwiH
zda+EV>E*mjCF>x2T7C7#>-hc#8Cjt!ihS(6(3rQCf=b(mlU63Aiu^~Agp>wmyP5T1
zAr2&Bcx`JE()nU~;lt!jJsrqaxv-c=i0h1^d+io6_x_+hhNgjVfG2FB^D?^Owwq1W
z<0*<>C4VW)rGHe4iK#7bQDZWhPzwQ@$Z0#2Kgn<tKCglzZ~4k3@LTDLscpO@7Npnj
zh}PF*=NDOO_$Z#oXGHLeS!n@v4ff1UxXutWXF$A1M-8uIhtc$`FTO!pI*?p4o%n9Y
zwKX`8iy;Uk+C}?SC^7Ojt8P!12Zz=(bMpG*`n50D*zTvs-UGIG*$<#m%)$^&m^DL{
zH3Gj@v(+gda^z~J<ME<V*a8xxw~Lxp(=-4U7dMYP(Tb(Pq<<h@P%tJIX>0nunz0+G
zmn_%a!YX>AkI~aK0x@-WV79#^>Z@Di$KaY2_1i4c`bBkxA_03R&?NjOaNXNmE~R;N
zO&!F+<nB>e2k7`8{?2jk2IX?Fg|jgBnhY?{&2cfZeqGAf%Wf6UzrkUwfaqn*Q7d`;
znW4$@Iqd8EX<tvTUnt7rcbc3YgsmlN0OeZ(#9Yoxz!?8$r-0Y1%P~z=%Pu`c`9nCb
z9Y4l?q0X^$Fw2wV_znDBptaev2kalgw#G-)+*){hU#h^WQ|oQ@U)}=Kdi5G1Vd3qx
z9qqNg?+Yzy5QD#sc1Ia?yLuvUDPX2VvVpYvxjVAzSjYY7XwAh`La3^FwEV*_jDr}B
zJL9khcOUN;+u$t!?>GO(^}M-0e1K2r5o)j%{w_cQ&U71(yTv{Iv5?-7Vf6x_(l2e?
z;B7Yy^mJ5JAr(*Okps^u9f2tx$;L<{3nvLiK*tuznN3@j8)6y#46)*<9|_=2SbMr2
z+xu<bIfs%HA3;`J)GY#2blA#M1Y*+NwWh%a$+8X5e4r+Sw#5maEi6D8+@o^rkrc-*
z9HPg}IBJia?a~g}z5_b=#9US5EQX(n&RbV|jOghytg8@;Eeg(l`}^Dd)W@}^FZze=
z)aAPc`+ike$=dIDHgj}~lNEbqf4<H&cFEK)PLs!}PAmY3cxvXN_lp`ZCl4P7bjq@S
zq?tVFPb}m?BO2X{H6LHbJX<5A5APwod55}m-Hfw8mJ}=WL)AvP2|hO-%&rGonpm1B
zIs8Un>#a|ceaq{s_EOvw(p%obwm^TgeDw4mRayCMjws}m$9p}sp{~9Be&90gU8J5+
zTKg}s>1<=X5wFa}vQg)sVBT+h;0hm<V-8>*I;>RRI^82R5eU!_g3BoX*$ZX#gR1P>
z#mzk0t%;l`c%4;Qj+yg}46hrty!*re)0QhrvvvMrQy-@GbKqJzugUKF{YWc-y2zl|
z_@>)t4-1_)z)GL_2QI2Znhh_YymrfKmnk?eBdOTo<%L7|$+pt``!@P^c8=_2Sz%e3
zGlfb14v0+srqkL$99N|-N7IS5qkt1&zm-j2QEE4IqowG?e8J3SE<1Xv<633^TqzS7
z0mnZM?P|_m#3#IQ7q1maQ$)K4r#zjSX~*0T73udAVTyDzGJ{z=HgsFMpO?2!mX11?
z7(?@!D5Gc7r>W6W9VT{I*I8RzOP;+^s@2+3)^l6N!dBB{yN)2~sPJ`Hc((0;R^-z{
zea|d)1A3Wrmx$01hdy)uU8xAq;!w*Vt)0u#x$VzKpo(#5RZXJuu5vlo^Dj?Ni9V%y
z;gSAUX8}q*?;OcFA;KjPlDTdnl+xPS1FKUha)>Im`w=t(vW8Kh?-uZ=Wr#2ujy}5`
zd6Iv}MBs9KXC>OBL~K50Q||OFF4U2v1=T-trI%*6pkG*O`lHH9rzqCHx7TTaoV??_
zpqThz%4t3*bgunLWZnGgN{aT#@5H8J7<imH%SrfpQfrO&ioev6V+3&;X312py$mw4
zc5It<UnOhB24R8Ha-8eaBQd7sZTFLw*ljHS%;}z}D`jM#KUq%F>Mi$vWU(cioXl^K
zg!+Ou*If@1javc8WWVu~*4fFbgWXw93Z*{yMt`J+iQ2r`#*6$dYj||kw_HTbvoY?y
zIrv+*wPik7+Y9rfl(}D{l%yd=;;rFDv~<)fdWz1o_eShKXj?Iq8(q)aK+5h-l=?ui
zF2<tS6W$!l_=NS8Ml*faL;mufd+a|WA~O6HQibm-tTB3qJ@?-me$iQJ=-1+3_o{aj
zMB($|lZERT_T1rHnIj7C-o#Uin?*?m$1pq_<FLtCRt46fX~l7UU^+#rKeMquNw7um
zPxn5b6$i5Ac4G&t082tdFV!Q|*NUD-LLsy*uVif&qGs-0=<3{?|D*Df{Cl4}yjTYU
z-!E7t_j_2)KbjF{u1K23VZ3u)ZAoKBFt@hUuWtvDV&>CUHs+$AuhGd0-uQ+0rBY~|
zk3B_}i)YtQh7k%SN=QN>lLSnQxRX>f)-gcWZ72`<7Oms15$sa$)~3@`l@DI4MqWO0
z-reW<;rfOhmpZbtr!;0U{gv_g$1tDPmG@xjkKH!4HZ)Hj#pom?FHQM3voX2?1~ePT
z#WN2H{1^gG%4;OIkWUAhi?~=8VeW;*E6xmN2Ff(Cg(1nk5U_NN`%Kh-j$;nC?Ojj*
zUT}%4h0Ki{Y~&v2?vonBWaxWrUZOqS2Qxt<G0rIg61+@3^%ph|GO$AygDldD2^O}B
zrR<XB51cK0WE$V|@%Kz#s9&9OUSRVfVRa-{EjGYoLiD1%22epyxet|)AM2(FMbH^X
zMaZ^B_+u*Y=im2O)Km_;rri-vV)-1VPeklKd`daLEZi1+w^_fOj1M$>oT<vXzUio6
zP|Ak?QO=6-YkFERZ<)z{=UEoQJ)ggGKqr1O9v}$}eAlt8AYLcGi2q{h<g1z8;2pf*
z$@(qmH<7z1ey@(kWBje#Ay87-a5SF_PstQJsW-84ZV*9dc!9l<07+)A#T9FJujd0`
z`s1V&zH+qDc*mGe<<ZGfp&Ug2@*(E~*crBGz&-*MRZL#1_cmjI_~>7ibX9x_d4-~x
zI{RZ;In%(oAcWDcTVLFf0ZD>S`rf%@U__=nHj?>R3MM3-wc~=wW~PmbnrZeABb3=I
zuj~d=cB)MJX$3tx8I#RPr9y;J27UQr@iwA;Qe<^_P#e`u-$VcG%-l#zI;EN04@W0|
zbL5Uk#PqovHWcF762YR!mi3#VAxD)IhS1iblLhoc&vRC<P*3%=2J?%6v8*SDCtGNT
z8lDHBHP>i$SJ<LEuZJp+lppDB__YXGl5c30XVgNy`gCRBW=(ox<M2cv&H@<NoFWF=
zo2V)D3GnZ^t%t9w-wV@EP|TU1yrZIAXNMpjJvqajf$lUN+{p@aY(=rNqa#&Jx2Rt+
z<%)`jEghTMZt{0~j$}LZY#<NDgt~qH9F+NY0;!pm!Axa%uI=aFJ`0=}$4tX|cH{p-
zD8*U!nW<xbA`2qz7LheW)3ACBsiVpzr=*AjvHQjHV*%`Ktl2AYc+;HPQV@b$U&hsh
zzki=S`$4%sr%iOvuCyj9(?F0{H8j!<-m2|FCNOM!HwNA1ncKR%XezjB<HpEO!4E|W
zd0Nd&;r$c>z|Hsbi){2g@~R*n6-yU4PAtyM9kU)s-nL+V#X(6e@wgVkdO9CZ-!}gv
zd!|uoYu9rlF=My2bbCs4(Y~rI$Rx~5K+&*hkeWt-A++B(tRdpSMRzi+>7edBwo6I1
zagZOY-$Jj43@}JWf9wPc<O?6QSuZ+Cj;mokG&Z(zimk)m#e8N}31@1fLf*GQjAH%H
zVqi44yajA3?h45iNVg|1vUTbMOIiNow@@)<M`w2{mjGtsmME!4m>=+7Oz>Bp&T^p^
z_Zr`p)g?S=roT_)0BMqa)gm*^E7OdKg<W)Fd^4W>?|cPU{*C=Vo&s9953^QG+chzx
zL7zu%)1UcjKE-FMJ)3**=jY>VnXOe?7o&w{)4Fj&@E5BC1})yb_`iqZo*`or8%1ja
z@L7{itr-1{(e$QzFJXQAkN@wh>NPi4j=+Uox3?@SE2<*4gk4JP)E<YibebF4c`)5q
z<xTv3RqakOo1{AvY$ylfjnhkukn>ttksUDR?*vFqZlp|OkpTzKtDGoqQ`qkumAe>Y
zf*lJq|K>UXqT>YMoj!H;Tq=F&H)(W&^L>_>bCLgUN5Zw*NX=3B98xg5?((V~RWHso
zO^pQ#kr&i6RC--zaU@wz`bftv*2E*fxMl4|?%Xv}e`#dekX}dbra&xM1nh(GOk6nK
zzHd`o(pbjoTGeK&outQCEH{xYLQD@TLEUPyf}9hOnHE0a<T?uSyX#jAb)&%Ffl5Iz
zIZ{ZhlOx-eAUx!ON(*k$Y7`*~-?greT}iE;XRf&x-glDCS)dJFTGlDnH}Kjy*e(Wd
zzs)K|u=RMrUdiuwg)@Eb^)nICBg@ETlrREt0(denM((-kSj-4fmju3doG)zhS}B?9
zUh`AG(naRawquQN#4$0ALu+z^!V7Khs)ZsVd1PiO`2VWuT3BAt$Lgn_|4~(w{H1Pl
zw~n;U%_XNb1?;I44Kk4=Sz}@Y|A_e#LycfsGWw+2J!8=`PQ87BEWNE{qiivum3P_B
zjaeEy<T=FvXNIb*+Wn&1IoWn@BzPw>%m`ty(;Ud$Yay6I)9*b*7^<-<!SBJ8-03~K
zZo!zVa{DX(M-~XH=E{WPUO-^7<EudMNi^O863<VBy87~Vv+()Xc1Q#NtMwIMFm)K+
zBeGASu%Fxe3#`hjW;=B77LoVC<mW94L{fg=R*>By*JnkslRE#qX<w#b%^V046tq*F
zvSRvT3A1}`=JUrCgHemelpy%S-DZW)>Q?Lf1@rp};_ZU^Fo4~u1n(xl<r68ST9@HK
zWzBXzp(nI9|A-stnO$k+R!4Kiili<Q=YBf%9Ua}b&KsbEXhgq)M`C;~h#p6W%)5M#
z_A-~#cUs!aX~TenF*AJCJ8^IplnCvbK-z3VY3D87bd72e&N$n(74o}gkdgv79dL7J
z_b18FZ+>TVo^i7qqQqEiH~6eruGD<6JNr>|=W^=NQ)oWx0{bIMR?S7m*d$~tBNLmK
z;^1{DvC%Jmvf?~nYgc)qJjcP;`h2n1_2Eg9ebd6k30I3)pH3S@Xx}wb<i)-0WbCPK
zQpW<a<)em)k>qvcY}3XFsN1EfXo)z~5%hMX$4z$`6W^iNW#H>X`8*{yQrb3UO^Hsk
zzcAE8plMnEgWg@=h3N^4P0dFqM`YERYW<_MgnUd6-!)2t4W4gO@Qc&aRnP2?P<(F(
zl=er)WsBStwhL7(6KsTU49?}C-hQzyhid+!IsHOy;rWT?G_-do)hES6kI_BeC3lVR
zx8F!UR7ehT*5pbV)j=2ELZnaC6?1lGg!mv-4@XPzv9N_|uJ&VnqOEU=+jSaG*LUCP
zY$w79ZP3q-cG|I>mH?GIGkQBNh6TTb7_ch$^3CN!MI`ixDc3<K8M`!ycB?(!c?^b8
zX;LcOr#!zwG4zjAmF9eC?=1_I;>i5$pTVyB{#;+!B7M|O+f#+uBY=oEfbmYu?pGjD
zuAg&0`FR%2=(rDatXQAN^3MgBLD<amHpp&c0M;N$-MeorQI)xTaj~M$-ppWPv5cih
zQu<;`I9}1NubF9sS#qpUG&uuZ=sYi1eGzr;$atzKzcZDtZODMOsHRMppLh(E%OD;e
zYRA8($LUUax8>z)_v-H_HZYwJ7j9M%R$ExzM%!9_o$naygQvbVRAb-|3>|!GwBf*D
ztnu;<>>2a9ewn^AJn)xss?O^!0WzCv>K}iPsnpS}$BXwlJ=|WlZmYjQ%V4~HOOl<;
zOlFQVr6rkzr>kLSH7eq5BVj9E?r!7AnWkGE4I4<cp=1<U%EM*Q?8U6WxRS$QTnMLy
z+&`+H2GSIWg4D#hQm`FvTX(=*{v{lLOZ9p@svo%&U=S?WbxR+@9KUd}a%;kD!f3zs
z{$Zj->5LpYOBa<OSqODCH`QPTQpnsGxeF;ca2?K|5VEhC(7Hq<>yOJxeUGo2bQ+Up
zGaK#^O}4-b^3*CI<9~7Id72IVrO;(E*z{wtoGRy|ncEXGJx7}kPhI0;T!53UM;>jX
zde*2?!`FNGs-WK{W*e>Qz(~zBl}w1=x9H@>JdWruq$n<VioSMUBXDN|z<1lips(>_
z9Tw8Ul+={eBBjpUD-w<ZVa{57K+ZUQ@K``#`FQY_U4On>PW6^P_ldtO_bj!WjI3bb
z)s}!VBygink5F7#>Er8)+BQprhtRwiEgJ#>j9>$Gl<LAwr}VMSv-0NSp;%bqx|}+y
z2B8t#Cen}p(0Q#3!NSDCt&l*YPd6`jV4J}ut!_q~7HqlwY!mHEU=mck&B|$np+!*m
zLrH<t`>T_blJeqAbWe)TYTCgp%=IMSd(KVrw#0<%|MaVG(NRAckbeFbNrh<Emlx>W
zXIp0idwXOMm2TLC<K^Nf=P4zY(LZK8AQfYq`HzHMRcR9YD+(X{8qL>y(QWr=P^i<q
zw8A*>#3{ww?eJ+*)taRJi1rZLq_tUkH@}UG0XD-o<b9#}OHp7X)kJ+aKXQwWU)aW8
z2ktHLinpmVTlUpaI`E{V>a>NF>qJguj!x6Tm?!;o_o0nj1qpb0Zg2>{=Rpmjq!7#4
zk4a4zr7e-h%>hAEUymya_c4-j^Xzv1J$FXicebSRpUEYG_YK(+v=f9URnpY&4TB1+
zAr^HllrY#iW6R53n>F(r7dhh^JF2n5Zy%-^ByL^_|Ct3qUaC!Med$&RVQk4fn&09}
zQmOO3db`2b$|*Zx*C5KT57WzvY$>(mg|+IYVYk%v1Uw*McLMLW5}>D>t8izK)6&!R
z$kcA<8!?0g_O@Od5H|)(T5c>Xj4UJKbk4NGAgb%^;%__|p_9GrI~r{2bieDn8Ozmj
z77>7Z6`)60x`eo%-mqKO+0OerI%>Nmd5AEa#8|u}T0=43VtDm}T4LDU2}I7#95G`H
zhR$BHqrS9$_`k#g(2kf2*cs3*j_%|qG>HXQ9&%&+{ib?Y?D%!?;7Q*IslWZj)0eH|
z8m7Lesx?{iR?=j{!DWm1166;JHAsf+e=d6d_^NwiaZoWBDayo71`X)6NK6I@4~-4Q
z5PLNwTUrCOwBvsuK+dkfZOmb4#AaS&*@x#ohu2C?9XMWy1txr7DZ`Qod$^6MEL3&2
z;$QNa5D7mrX6CJKP&2pHh^!f^jNcuMKj+<9X$#1XJ$EiK^z&^CP*C*ADI|-3{-M#%
z<M#TSnCbMCTJIeUvZCA5S3Fm%(=5V$vHXuh<sFkp@uhT%oS&eXMr(nqXvU=Ee7>#L
zWNyG{5{Nf2Te<aIG<z52`*D7TwP)9;)AJkQ$nf1=+H{<3Rb8PTR>(#rf*&y2ZtV<a
zbcBXuWm`p+C}y)s?n_!~To5$uthJdnMQg{m6)pHV%|o_&S2OWPVhHbewOn;dj>cX;
zXF{a3?X8P)!MMFYeEZwXyM%W{v9@1>w~!!3(?>At^5NL}EI<dIR>-JG9-(c~>5+t_
zD6u6JHzNHHscRnv<~^sjvKXRejwf1mI(rl+pU#8&pRJJ$G$=`{fRQZg(M8sMiYDF)
zfOhom*R(ZL?<RJ1KOXZ=<!Wud{t@{-6D``b^mtQu2yV+#pGpV|g-zj1u$DqUvXd?P
z>)cZtlXGXdZkCMTk=0EycuoiBnWF5c)zQzATgt@~0Q0l-+y+9uRunBfrm&+hxh{D=
z65F#a*n6LbFDjqezliBpAk6r%P`gE;#gdZ3R{8_pHX4yQoA!Lyc;df9RtejD-?@73
zxv0@YPZVCtdg6^SHtnhku3mj~s+}pg`Mx!G^KU5Kj^w+yFhculzjut7hse^Hju~l4
z<^}0t6G1;$wiF}hiV{yb9bb$Z{t;;w-)U9R=kh2{rj<62W5K1+t?Q1&=0@4?+=~RI
z`>!FI<WY9FvSjOBrd{bwnahC_nt6*T1%{kItDNjZf{l{%XDkHE5wDZCI`4j3<<}=~
zrv;|g!AeS4w|MP&zGjFVaK33}+&KWKc-e%vstiaxF8++WAde)QCTHy>muhb%yV{%+
z7n<b<YSSe;>`w<AVg@|l>n@QOMUfn8&{$txH;U#Q1&)w7g=ziBQK9Vy7-f}Tzc;D!
zG)yv{<|Vqc?gi*rR;fGW9~FI6PDDPNVSp9}BCkR_zVGU<Bx}}^q<tcQbka}HH0tpP
zw%tB<M3{q2#{OGAH(E@*<;Sc6H|eOq%*-*2`#kg4Ey+ynMo{ddw-3xMrfSbnNBYMM
z&CkE?Imb#2coZewQLu+6wpl*kG4K{;vblXkS+Nm#{;J%EpAx$&zqobXb61hHJ6B3x
z6gP!ZYLlumu)o87j%o|{)agH=nRoTu2^g@Fll#=ELHle~OHAX-ZMz*@YqoV|5<dl9
zfp6Tob**4hIBDK+DeUNdW~c07VC>C*RD`n1S`{FrJCiTLE$GDjUawtj!Ns3tqj=fh
zZF%Dzs<fv9qp(RN!TnQ>7V=bcMog31Loi2Blha|HAvz-W#5dAer)XisH~*W%NW?oK
zA$Hw0y90Nq`GNVfxW$L=50$8ce0Rl;IGnqTBP$oyP|B!B*^GVFM5eEI8l{l)Cr`uQ
zed7Ky)O{zpa^<UM)n@4+y7lszU#vb$Mo)>DpKNaylPG~Ng*3V-?5v>3sw`KoourMF
z6Y~J^83UBn*LKaHxs_5|71}id!+TFcvMZAXe*d=yZg!47em|xpjngTSLT9+-)7BOY
zQ%XXl!K%ZYY?NPF6W}Ra-=XCz)jui~yhZs$iTs1Qte2WPoR0$-=a??!pX@M1?U>>{
z`|N@SsuTQzgm(OZN6E-tJ><oD-H=WI<PKNdin*YJ%qc{M&As)t#^v7zzc^K7aw%BI
ztG-~YI<0Oa%{(0KI3#GSpXy+Zo4I-4_Jw%8jzZ_a4PVifjH@2AWh8d_uS*g(Z}@u3
zWP`ZJ??mN8JhoP}0lLZAL=P;m$*dF>;+fFzDLCMgr}QYgL;W&~ko=>i?RKp&kvA`G
zg6(jV(@*QlOD^t^(a>tm*3yZ<w^j?lUkmhH<cFmqk<A>d``qisNz(UboILoE3Q9k2
zT<<M(qQ&v6#I!!bWtP|AkR+DAvH$5&?{3wpDm2>5t*Hrw2X?6ZZGy17SVjFDNnVEl
za;xg=o5xR`?d&fNQucj!vlm@E5{Xfra7vgxI-oJ$&Q)S|C;kN733{?K7*T}j5voqI
zUW%&UD--cfZxgy$Pi4hK-Z0U<IF68c7HM%)7fUTyG5}3BLB;$9niC~v8@vfTb)hT^
zB~sq9tG}^ZxMEJ?%og!QmAV5to<3@p&byS!3=<h`_bo>>9k<4W7#IR|!|b|*#lO;7
zc0bu*b}t<T=%6F>(!l~`g+f4`P$mL@0CIMAEw00yj9I1j6p`eTl2@cQnRX?PHOz=|
zLM9`@t~Ljn^EIs+^uHeFEJVBFQ?0MA=w;qdcRw3?Df@6Ie3+%%m;JvDcXJ;R&K1Y5
z=`bI63x;#=5tEvptnO_Qrq|x2e(2?NjPIvMA@0Yd74+JV^3C5r%QvWQ@jUU@{q@3)
z-d68&i>4${I$kcpVC_-Uqo$0S)l#72*6|izo|~HEX%fXG8k%Avyw=iKE>ULO=}?D~
zdHRVN0b&t&d@WB?NAzQ-=1yc!;R;KS$I>TL%(G-TF2#4;xBhI?IYrpm-2<*7+TekT
z_HN!I;75YJ0=Ro6|535^2bm8li?}-W>8KykCB4hF{AO^ygH<OaXxq)-_N;%)%Hz*4
zI1d2O1@B@St^ayT?p6`kU7}rk7pyB;M^h@>WL1FENE1aEUyF!g=lJ+SFripj$c9$l
zWv;SXvjk+5G!Vd^fdH9)G>?+pI%<yX5AZ0N#B&Ly%h)iEoK~M%7}(<jc#7d%_Ta*)
z(wFX8Dn41hmA0Quy{^0o2&YuI4Ql|MZL16{c>88eCWqH#_0uM*38f^0;!wShnSn;P
zp4wTQm>kE4r@p-Y+zM_q3btRn3*Zz<X566f%mU7!S)7f^z#yt?e4AsHz0nmu`_s}D
z;C^IDaj~%V^BI#ipOWV~3p}a*y<T>fO=_pK8GGRHeNStTb50wuuIQh4!XkG^bTH9q
za3foQ1X4|2dQ2U80J_MEk%{>OQYjX|TbZEI0H_n_I1Y_J3O!+T-S)4>?@*3N-@bMf
zrJ458mi}RX8m6gQof7owO{k|$&GD$^__F^>SjL@<XFGq`>LlTz!AOEzPViP45^X>a
zQwo5H#?1^RdpQs+W!67Gza#hOvyBZ}ch`L-P2Fx2wl_Lbm03l2w&d3mt$2Ng$;R@%
zq*?+=jU%MnGfcIIDK+Qo=fR#5OgZ8g$oL$p^jCi6L<wf{q;qaRz&Xt+J-K444JmD#
zqxk*>EcXC`qn;K*AD=44oH)n?ds~-D3OB7vMz_t(GA*EW1ju7<LsHYl+d-Syb7{gk
zE4Df3yV*(j&-&jKHy>=s#eT#lP0P<2=VdVL{rr{|cOAarwE=1_?R+@H@A!jgEW*E2
z)m%&ioa;1?G=V)!U-`9Q&^YQKLjVtc#Z6t<(03X6-k%$zmuVxh8=&?If(^tl>Kq$3
zI1<Kek|R727E$_o8VZy3N^{+$-!*8Z{c<w-cH}%X_sLZaRkgjK7YkLfk6xt>8^CWv
z;orZMOMNj6^J#bhJW^!-`)1h?)^?};)K-+TfsGP0e*nB4vpui~2-a`Jn_L+68Wf+}
z?Vh$oC{VWxc>4uPcI+f5!fGw|nV?et<tvdQpQfA(%mMiFo69LIkNJ`GaRsK`mR2ri
ze0u0@KfeD(*?UGcwZ`q6cq}L=C`CG`^w4{UBM2BGq4y?TdT-K>(xeNaC<KraLJK|g
zB3+1p5NaTl&^w`q>g0Xb{O9Y;d`iCTm8_k;v!4C@?)$oQW^v^ZWSb1`;riiOq4NW&
zQF+rZv5v^(l=YeRnLKNeRnd&wM52QAX_JytLHYZ-I97}XF&EdTzWr^QPjUrBX*E04
zXz)?ctFe8*=%Vw*Xd7U~^^?}K*dc&z?NIo!%-{y|BA>P!Ct#+6sr-~<zcU~q@e$WW
z7{_RB8}HH$=}(ot@wTv6_=GY;H(leI*)7<g?*hIvwI=dN@^))%0FN75(~Y6~lD-1W
zEgF(2FG;mMQFS#`8X}qp(l{WhI`fQ+_hH9r`6{F|r6q>Ft{d&L-VI~U^TS*xaeB87
zsptNc9)ceB)LP<@!B?pq9@)=IderZEr6s=5?i7xGQ*Xerun3CQc&U-Y)6@@mmUuI9
z4!P2WoX=xV*3b9z>R?Y~yi0i}A!*rZsZN9Ykg%f!-?-0X5_I%_rMiY6zP~rncI1nF
z7N<&2fmX}wEqmkSMfS8;{`2(P|7zSaT}8)!?@tJE(;Wa?C3u}24itgn7rq<4qo6AZ
zJ09izy`p-5LS-su;XvNCqG&=Gk5zM1w0C;aKQq9q^mam`tBE%e?y+AqX{UCtGHkTS
zR7mwJkAt%yok!pSUq9KSj`#(AYE3H>9QW$#g(afWq8Q<BB<Nq?YCc`L65tU-sp&@N
z9t5r_9^^<FEBKX!19JK|@=L$1Jb3jxD{{q2bzHp2m`;w<yU@>(>wxyYU@)9qmA5hx
z4d(;s1h=UM@Jf^=_&U_%n$3$>hAN8J%A8!`;>~5Fj=MaS2Ej54G9DGKx%!zx3R%B8
zELtkquiWvJ*v9JGWWFL(MVTY?-hHq2w?z=+W1&!0HCWDCkku2gQER$sYF(9teP?TE
z0m3#|u3y)uD9g!Is@K4RR@U;oy6B2?)FVz~I&tj5GuPsn-5QaFEQ+jK`?c=VV`TQ4
z&l+DMg3Ch1n_)K5uBdWb^?1;dkTux<^yAgHk6Hn<$9w|5WBnIq8flP7vo_BY`(a&|
z!YU(G$5zZ=lHX^f-z^>kGSIL}r9VGB*k7anWxrM2S5w4_vOeZ9e$Fr>Y+0(hus1?c
ze4M)8JH4Mz9-QK|kMY7>HVtCMacP|Na%I4hWhXO(J#PCwPMWhe$YfJ7^w)wt@Kr9(
zA(wc!tYDl35>h2eMK(W5%jh%U&bE|a<t*oftu&-2(D75Br{)1bw*_9T=b8Ik*rBN{
z&ldAKXbw<;cSyU{)GfNFz3?xI7FR{1yXAop8xHHzlYwzPB7EOnM$hBk<oZagO*ARB
zHg|psLkD;zXI@FEUfk){1pM%vH@;;<9dGlIezQOEAtUC$4SsZbVj2~n1=sa}8eVxP
zoT9T;1*$J{h3@K1b=v@zS1~?k1{{+g5HwzTtwak0_8~7PY>mY(Bn2mi8>O3PQi_QW
zfNyCvzq)e%atd6=##rz!KcUDWPdGgmJe+Bnuo=kc%yzUSAM5Ha7UJqvf1cNNRMFok
zrn`n=h+GMIvuDp3Sb{-iP;1A*g%Dj{sFJPm0lhOHU{!^6A&S0d^a-7_6IKQc=`>N(
z4Z$a&&jJr`>xx*Ea2+vD0q|cc0+p@LTuz(0xpp*KG#m5Wr~qvOMHW2A`gBd16p7b6
zeKTazap|J(26#*P4dj}Z@M-ojh$zD~gKFa{Z++XL3v0vlAWMNVXZfLoPXEJ$guuLr
z8bTP)au`?Ns)4Qj>F-u*A;<W{JVk1{2(8(b(uw%g(wM3ksQn{Q&Y)l*K|MjqQHw4_
zvOQF5$-OZYrz!PV>yyM>t8trW#&+{;(e`j1cv4`7=dG(B-nv9_>v2oZug89b2J0?{
zO4ieAKie#8eb}f9wX{xEwB<=b^(iKizpr?O_7T4VNKI2Vy;=C^FNtas`Bp@ixmI+~
z;dxxMfFkqnGHEotPlk&M5z<MgYq^8SvOCeoGb=q`M3%>*SoxmZOEo5Q(kSZieSXOn
zhb-UZO4cD*;JNzWXF;Dj9dDro7CdA6S~y2j)+;LW1g;-&X9z8rETHHF@W>D7d_cPJ
zaV|Bjmc}0hSbr^8$-kJQN-#5C8}ivdDV;xgC!~=r8^qT=fht=HQC><gXFg2hvE8!G
zkrfp2F$oJ*HfZKk7QI=2P+wd+a@f1HB7JpRSWJLh9qP@QguFw)Y^~vZS&cW0JMdbL
z4PdE3PUSZ~H=_J0`7uB7$iTWa(<S40<9le_=(b*3mH}HQ+3-n(7dRe*EZAde<<;2q
zT#|>x78-Vv+OoKYasrw<==%HV%`M4O<<agC$mO1_R)`uorpo)Ynh?wpN?EY5b%|&*
zV$>Y&D1B<NvziAOnSQYxx8X(pU&z449ZL>(O`mB<Qr&tH%>MU<y(R2YwHy|_nL6nV
zxw4{+M~N6aANAm>k*u$>Q`fxC0s+oat-AFFVgIuxbosE&r@?LQA`Ng%FKjjb*Qm8A
zylyMwV#QhdWu*qxflrvB`f=ksuh!3n_3co0fNu6Ul)5xN2f6OFAYNH@Fm>zUpEa|b
z@qh~41|zGEMC@$?$51>?2lV=J9bCm{@W{yK4pL6{AYXER-G{Jw`UHMDqm0$3U}15$
zC)&T5YKNu$WhiL=aY^jgS&ugjM-Ayan{HVUhgU0$Ety7OXex4Y4hWr;DV6o(y3lji
z_mXa5bm=^^)B*3iS(%<f;f>;pvk1S?`)P^nqx@1rJsQI-BXxqiPZH1fT-j|TFSN@o
zcsQB_y7$x~xBG<?tO39UVe^SYPf*#I0TIf0d!1FZ*fX5pV<lV|*|x!3Hr{5wtwEnK
zRPOOs4FOjX)C{3nAwvojv4f{`vY$xx%GR=B|B~1^jW;`fPe}IjZEX^?BhENdjUF45
z7st{SD3?6g#nZQSKHM8Voqxx^-{sw(C?F0>2Waa!kIt$UwT@iIKS^^4s;rJ1I`pIR
z@zcu?Y^iI5Ge{UBCl7Rs(z#C0bSKrv>fP=F$GeejHfEpSrf#oRWD4Z%=QT4nJXm~5
zOY@f`G!7ZEE+yP&ldYF2z;{*eWGOz0oL1#k@Z0eA@bHtO-mI2kCFe7rC=Gt6oXpp2
zP`^D`Zk1A>uHcvUst}#+t_LTx?crVd^ck!`!Ib-PpR#k^H5G9*{k^E(Y2oVBqNV)8
zM0rQmxAG(NST4jA`>{Mka7q*I0)KJ_<akS>B+MoC)79_Z`<u0k|L%GAI_523m1^vn
zn0*hE8*sU0Qp{?tlY&7k$7N@?{2EHPbFjNPS+g>7Z_@xvRR1tL`iH5>3}7#k*>C2(
zKR&Kl7zK5<EAClY<RBZ)G^OzNL27628kg{*f^=-vbaC7$+zfbSOyVul=-kQnViO#s
zFu<ZP9b@8Vv3(An<CQ|>qTB}+&N-2YmJTP?tx>mj%RFo-WK!b;I^gkxM;9jlZjh4X
zM*S;efqfu7a7R2ktM~xk8Y<ZiSunz4u&u=C>e02p;)Pj5!>kpq#i-%3bzj~|N4#A0
zF3r4Dp6l(qX3NNyJr3rG=%HNri~km)WNU#{aZ+O%M;f=_DVO&8qo!QhvIv-DneInf
zgOxqr)*W}C<(#j^9Cx9DR9BcRzA8e9y=6>7u6RcMJM-h`j;|*);jQs5t3P{!UfQ4N
zd^TGc^l3wL_6|HtTPa_1###zzXRyMMtEHs-9YbQ^za+y|t$1e5?s>7^z@R_B52XW-
zEtLKAe3!XlTQ<1%4%$Tzm*M-Z?ptggAqqDo;ucQqb$I<(Lz^~I6Z|UyjT9uLdl``z
zXCAEwcA|HG_A3*U4N@zttW|YAUR(^z&cQX8w~SP!F}bKLm_*QPxgDNXfz$TUbwR;H
zB~cX1JFtolt|Y{(3+MQ7%By3>RrzLw(`<Q4G;?9JRQyIuYZ77GhV~FRaI@DQ8+8K=
zB~U8pH&B*Hv*+I}xSfm`cz;Y>I4*gKd+BpQ5;epURw^6Zq#Uv0S=(U@@m)5Dv>Va=
z%RAQPT}WAj(jh1sJO}a%&1wlgt@kj}8Tx;5+)n>*q=CXnulb5CPF~XIQ)ySlmYv))
zR^5dsV9|*D?^%N>uV2(MNUoOBPJ-oUT-9&n(ixLGYIEcQt^(RIN3#QuP#NvSgt-#!
z+aL>0C52EdRilMPg`dHY`~UguJ6bvu{Jz;t?OJ-RpZmM}9|^ke?-v=n?JdQE<Wqmm
zko^eb$SLIi7x6(G5sT2GjDgEe?tsO&bm4ToBi{Zx2?mvO$F<m5F554?+TGgaaeanx
z6-s5Qa#N%Uj~*NhI7%zpO$8myTC}pgBtUwzI{R+ks8fv=hbUGz0bTbvpU@6`;f!tU
zE8VWHbqCqoFBWgbq#T(qI9SxEUQU^0SW6p5<Y+m58FfLYIqS!7ME@hEDN7%Oh3ifk
zCumqXbwh*nHm<c~MnppFV|{tbzPYpYOsq%oTGEO=QXY{`FlQ6mKRB>Hti*&JA=V4>
z)dv)1539PB$TcgRE8>~o&&@bZ?G~IHcJO_3`(Y_+l4Ku#YKZ3>WWT4)=h`SRfTvIt
z|Ije!?8m1mr4c1z4@e>o7#pctvz^q&T~Q*xYxh}x%BGv%no0+n81_zp-p<+IT$u_d
z9$>5>mc%<G_Lh&3wbL8+5_*TdUe-UWa?x&lSIU8$N&1rZHF7<h>|E>u0*sucx;@BP
z`zD`xU!63<Q0IKd(B=GF)|C0F!!BmGFj!4-*Nl+dee60-m%ZYQ!~3}ViNiI+DWW)$
zJQm)4oOn6GAd6N-y&>XmCV9`|V{yBdi2}1dn>{;5)(EB)pRiv+O|5?Gqng0*X$u<1
z@1K$$|8nWYCGmE9-q3T?#r@{%s^Izunpt1U)fsP4a=StKfLNV11Z6Dj>ce@u6%QUA
z`djw}7XO%H66hIHvi$9=dL^svVXBWry1q%D=k5AXv4MKn%*F@~QO8ReHsiw+PXqbI
zH@|#BGW|K+`sh|86R$D0Yl9V-ABuUpbrxLd=viy=yW(mNC+Zr{ko7+}TNW)BogKAE
zG_e<ZJ(LL0gp)b%_2;bVRt*?xVLx3iSZCW`D@4J2%!Oh!G;fc5o9!*@_fYa+uFCi<
z&EQGm@~fz7cPc}6jSxc_g)q}3*xCzzwu|4L&Xt<5iMFEOj(;)pXKTg;(I#Q<b$&l1
ztC2A>{*%NRKI%$Jwl+GWN83&9IZzW89xa(s8bUU`MXywM^t4O4sJ&RC&7DhgME@nP
zJf|RHA~nqdY$3BIqfBYIieAes3X!-;@8Asq1|nThN(43E>~)ik=@5gB!P14I7y+LL
zX}dy|rtkJm4vHI1WS8<QV20653qZAi5Ohg<@hjb6427XZm2QBAlLxiDnk{%FN9inv
zCXL71if9>#o(_y+P|pk)REB85j2nm9Iy>0H29(IIInRRIZAPq^U7r277l0$-s!xR6
z&z;uz4+eb*Cu9!$O5d#TzF!Cp=^O6!g9ihUdE(IAA(2`*Q)lfD$XtpXvv+AiZtmQy
zUTRzE`}bQ|Gb=Z<@6)9Y<KBMkZu`?Mp*rx*kCxv*R69*Qmvt*L(>KMKsWLw-dQZ*#
ztcdImNk#RKukSPX`yzsBdncg?*>r$JnBnS513q!>_hi=1e0D^8p<N1yT!Usl&8Zud
zj&e>C1*xsMa2<rZ(BvEpmuCvbBcT9}tk*{`48G1=N51zn>plLF=BWtGSMR?oa?7lc
zW184cHd?0dcHr;3$bmau5KS{P6)a9FW)*H%=S5B#AS5#l3;`8V9S}H!DXM&JjZ2Zz
zBp1hG1|E}8LEcW@#r&M6C`xVgKRcdN@qfDpo1>{f91SI>{v~0_^K@Wm(A#Nt>9?0j
zT~8gbAHHH3ng%gNWkCUY-th@};tm;QALp+qSXX{4T4}!biKBmf#pNGa&5$WI<<uvc
zcjhmd$*CbhRTsCG&fC7#YTnQxGmhZ+JZ;c~e)D7h^ATV3Ms3SO#ejqXA13jHqVPhn
zb#}mI-HZF>E1O>7Pd!HjlS~Gr$tJdJh{Wjc`wyOGkkayo2<HHGK$Jw`f<EAAu)IV@
zO-k_$e&2JE_Pz6lCG)zz7j?Jl^nmArlWhI5In7J=AiaFXy%pi=PJR(_PU#Q;oCC(;
zG#8lH6Clc4J5i4ud^zi0vHNqc{=q*t$=pH5)7(5o$+?~8-Ti^PfL`r%b4{X2jzT2I
zKikM0uj^adYNh@}{3C{Yg}$Zw=L)A9Og7N;Pg=h85R6|wOemVSTf&)7DCo+}cMLAN
z+TX9&a$g;o#z#;71|lUh1nX@y;`<L2bdG=XnnL?iO=D5bv&7=X)(U&6VIBcCQ%^W3
zSkP275A5i$66IuVhlW2C^mF2Y-#0kbRI4a6m~<I6eYAh!J_72!y?dA$=f*f~`KSIy
zgZ$IlH;>&PEs1rbnRkOEYk&R--8?z=-8G_`PtI)>p1FR<Ik+b@1g#1Pz2)Tv7nx07
zO@^j|mLq^X$2j3MTj!G%-Aswpbgm0?NK0ZqBb82ktMBHO*&d+O{DN+PttKFP-_T_-
zCaZ5F>cFH?9j<h(DNYJue&bri=k)>P-fI{@r>MJJq#I^%?)iPb;KJEQcW0P|oih|_
z4;MfnN{aW>Z>MRU#j?OfPAfk!mM6_3yhs;7{dlHg9+`2xP@GOPJ~Jtd_F9$TZAZ*N
zxdZDXZIqTd4hgau8BbI5_i1U&jUOll?}@0=+vv+JyKI&?(3BqxVQ)u8GXX|Z*}!6m
z+mS}F1!e<!_%+*uQm5O3$HsKM^KE~9=&W3&adWrm?ar0u`}d#Ms9a2R1ApDyTTS*?
z20i%I4QW*BW_nST&#IAfGZzmuLAp=XnM%N=@(hB5=Z6jQ1x?3tWcd1c)v->_IeQL}
zkYQ}cxl7tm9yl%}R?q47ZaweyC6xKu1pd#bxHW__lj&9Q)YiiO`L*HV@))QkS`fws
z<+RJK@m|;TZ)nimJV9oGLAFwKXF6@5+Z-G3RMX*O9`hrk7xj+0BI%=~&R_X*^~=T+
zQgwQD(=ST(r?$#XF*^>ExrtcG%I}+HO|I_>b={5My|Zf;p{Y-wR$CE=`9n6QfG(jr
zmgD=<&+?J6g8Wvmc9c6WUcJl91u-<#)Mc;B@F3i{|M@`2IrsB2?NG3AFf^-YxY<UO
z*?#47>1YKkGOv^WzQd_I)Y;lu38$mkJTO~sV$00x0r=YYTGd)QhNKyvvSzs|GJ%ft
zs)vApO}NskWy4HB<n856dZ&Bxm(IEsvky|^&Q?@m6e=8^3j`(I9QAm4(!0+U!=nzk
z`C0owl32jDlX!5Hxzo9+51@aMH1E*RtXHQO=N+hZH~Wt{p`x~pR$h~Dj~A`$Rj%e{
zn)?cp4uCeo4fk~vr(dn``>V?<Qp%=@&@-!jlOLtzHo6-(X`x&NNGHR$Z!d3m)0ScZ
zX_>8Jm$U>GeBtuIfYw7y;M_9&%bwp2<I<N(cJsOIEY<b-!Q!1P^DZG@gc@tAnZ=vz
znsw#z-a+;-mSj{-ki)A$S9=ECQqt+=OrkY1TyxsYjcryqa92_>&BfZezlgCTW8Da|
zCG+7@{K86|Pti)RlDgvmhkE?ZBN8ew$+p%7+vpLDS53qlq7!FCWOKF6jJwUc4~d!A
zT(C0-xGsHFkbVDbTQsz}ik#tFGn`CNf2*Cv%+!1FEuR$4nFr86n1yD`jq706%yDDk
zcQLVm?eht;6+!gB?gk&9j?yj<TH(fJ>yd30mvbfx9>m!oX5&o&P;dc+LZdpcWPL#-
zo?@d6+}ET!xeBit9;8`Mzf~^)UJYJD>Ow8&is~g3(@)of6@o7%igQ@2+LOr$$5+bN
z(l@!++<!?PaVH@a8l@cGhA1o}<ON2*gn&w3T{mP|e0jgl!S1(SsEA@wWI>yc<iGYH
zRl@Oi#&{-mh=xqs6;%^t&8wcpmDI1nSzWw~Z!S+|K6G*OD$r<6$^#B#651?B|B}=+
z8Qmp%Ajq0K{NmRO)9k*fP2%aV4O0v_+UF(7G8Oq4<n;2vV58(?g)5z~nY?FVN`&^8
zoyYrc0*2Wl6d5<8LG5%sRj=y3w7hG0GA;1b-F=l+?j&y;Te?{DoS8e1wP?IDq@?<q
z4b#dr!87tL3DD{pxvgP${T&;-?2S@Ag930GlkQ&<*O14TvYz3QPz6Affks4Wv>pyD
zF-#+Tk>SJW2KFs!Zq(u%EY#tQ>Cx3qH+tBD?wj^p?ItK)i3CFW!nn{ycyOlIN;u2*
zIRB;9XMT|&dx!i8f%SH8WwVDB{b<>@V@}WcTgffom00x+9mzY`$3~Vb2ezpKxj~c<
zn_=UptSQa{hzTe3f>EtwDNZO!M(}kCZu#p<$>9Lm+M#dFyiR-v8}*kqHvoeH%^J}p
zCRe8Wx%8Xu^Riq<NVZaFr(@ns&)Y-uDOO&>r{_7iQ6{qsJ!2<)e=R|;`!Vgi@L4>F
zif9I!Tlp}t|57ypZ|g#q;eSIky2&g0ZtIoD!L{h(qia4-lSXPS`MC4ySETA{ma09$
zKw0LKk6ii=iio85KYgp_obry8hkcH^xk3yw(vH`v&li=Zwv`J5-_fd}FVo3Vp^d%M
zc`ad)TbBy$DaU5i=1-LZ)rY=qzOwvh<!Zo(&AKmaC1bV8^`Ar_&s1+wl>y@AUlPm0
zvyZ_eM`4*dw<>+>v|du5Z?JreDGGKA`)(d#7@_uNL%dYHywnm_(9NZfaU?H)%ad7Q
zA>XsQ@K@NbFxR_ADavuV;19)FY8Seqz{<7(G1L)M;GMf;LeG+CefowKwW0WrHVDLg
zJd~Yg_0m^X^j(AuPTo(xksaj~lq*|V)3eMoR8e@p-E`a#uAHFTwlf9geH}dR7NW#W
zC_MXu_lTQ4If#;F+MJ?HQxw5#LYQV(q*>h~mZJentfq3qPJc-9qq6~uYuD^wt{tns
z_;ejQSOF?6al^*|gO{2^XW@EXil*s#M-$~A6ehUo7*OZGle=!&e(8HbgTGgx=(HC8
zk{EgOzcYNEHo3^<mdn>?hSlq_uPMP|0)K%8sZfWFyEj#V+1gfAgdt*IR%g=vU#F1Q
zxS)k|!w++$L+D*E8b-Hj%nvh6pDpQ7g{>fH`nP=t1KxuH_uQ#>=C^W`(da33a(K$?
zBP<eSO|P6Nryz3^NrV}wb94D`bO@}O2vV5{Jy{lRlJAJG7lkH4J)1wKYVzXnR&C`;
zmHTtV@XnX=1E5byhsw6clMso+#^`iba+>xwZL#36Qt{T9z`~j8Zn^(b_DM;@USqG4
zDFc0anC0_zlPIKj?ObnLlN0##6EaQ3FAM;@XOSKSZbQSM!?vA7vH5M?L|6Ul87B*V
zLE51lMWb==bw7aIhxww)dZ!!${gNjs3B}~O<6smC@xlGB#kho{zy2HDk>AQ?WeIg+
z%=ZnL>!_ocp?5U7&NQ)ents7TMuh7Km)2PUvyXg2Rw_qKXWc|jRn}(M9f%RZ@k)Ns
zdWDUxAMJG6m=wo(9vQ;!8H36d;3*Xj%(H0p5EWYk-Xz1#)o;@|SAV%??*@iP)_g~p
zdY6N>i&j&U#xn^S2%-J*&ULRk^^x`Y*HNg_!K=4efrDa<nUM!x;dUseGK_g%9jC?U
za+w+5=25X7#!VEud3&6Ox|lQv4=dj;j|z!bC8rymd(Nq>nVG3k(ZBpm^LFQw?!|I~
zq?txmqI4<j!>k);t(!tPxrbvZLFpMj%GC|npRQJl?UM-gu)|C``2_)Y#zzKG+oMs7
zcl?y7Li2Q{t@G6XfLUz(L>3o+Y_cnhIo-W;i<~SX+Nz(DyjwH?q*bHRy+?|C*Afxo
z2Op{&>-8%d0tl2Sy!4iSTDl}$Zs=4h(r;>!P8{LhV0d0-S9m@U9l{6Olzi%RshZ|5
zX;?yQ`{ll<SDvV{H;9?#Si@qmWaLg26LoyrT&r$ceJyzGXm{>bKhw=(B4|sybpzto
zILs?`panXsH_TXc%V?C79@D;`{Gp59{?}CxJ;?_1k)2}mT?T>m!DhJz?CI2{zh!8%
zN7%lh?6}5|<bj``%ZY_mXm*%ssojp^EA$Bu-itRl<MI2tFA?&EHFE0vE8G>y340e(
zt7(f*A`tQE%dEz#M?)3$pBok(jnL^nrf1;`{T*Dn48h~>npf3H`&UxPjEr@E3ut!D
zYF0loyi&$xR=)iG>$rZZZs{BH@KN=xOrMnGwl+lYOo)QKs79L1Mwy>|tH!jMoO&&2
zXnes+$%@*H>c9Y#elw))4>A+*oiy{$-fnjPuA@huaX}*g;;Qf8wdAdoX@5^TdIYCL
zy$AP{mV1T;Ydhn$>$kM<tUo#}+GcYgDa6@U%ND8(@=4(HzXl9so|FoT22UUdO{pS(
zNkXy(D-GvBmpcxZTUAR1x*Gwt&LUx8jER{;+b!QN!eQX%&jXR4?4;{HXbvbXix>1b
z!S5A}Z)bd;SS?4Sws)+bgu2GCh_{>`%=$KD3wVSwwyK!^OAkosK>u7ySPB{NU|>Kc
ze{ZiPW(EBh!xfOVH@4`JgY?~#b2z%zV|;20+p%L)j;c;Al7|D#g~`6N%gK`b`-@Fv
z^h&1B?cuQ#C+Z#=$k%`w-#Y22+Q6HRjp*qROw-U|XJsNr$jsd7X$zJW$6=Ff`IHPc
zF)`o4Rpwl}YSw}On){rl2e&)~JK=?BTN$~^J-MB+PBQ@^;?KRClH(OnfA3wUy=y}d
zN7Afby`z#8=^thrWTbB^GIjh$){01WvAn{5NC-Md?%(o#SXktfYwcKghdV{Jn!E@U
z#5Fdl8Dg0?YkO(DjSEW|e&SSLCcB^1n1oS^7Rn3CJ&NgHc4)R&r&Z4RsN0d@S(0Vc
zI%$GHPb?%l6M)^7tOyNSY7J2sud?$^P$5sN>nXa$3^k~?BmnRIe2xi02Wq`>($K63
z()6Oe9`c-O5zM;C*jE{9t8V(t%8>AbrmC;0uFIXGudHq`Up`M1YA@@{LkubkL}D=+
z>`x3ej6b>?n%6tOZY+!V2s&X-PV}jejT>mf9hPYb`nc1>$GX#viA5^Y%0LuiwJ})=
zw>W#o!lvQI)zjmjDf{LfTC&hK8@2O2^MD9Q<W3K!+0+-g;!o*wNdN{twG!;iXv*uR
zP*`pmALa%ZJ1VD_>Gs?0!#>gn!UG#WYe+-+%qz`rM;Za=1=&sm=Ofg3pC{1`OgsZq
zL(Y?6+7cZ1E-W_Q@<w(hhFgXD5qwZqP`h~C6Pg+V=UX1753wV7)Cw?J5vvi?J^uKJ
zza$Sk6YVp$5W8Rhl9Zy`-j+y!9!}f3y6Fn2gDC&AY3?G*bXX<5!pSvqnM8<J>3!ZZ
zEy|>{?WyAZw0tdECfw$f#<@Wl(#Rq-&6Ymzb=Ki%46`RpPi%;of^5UpZroy*vwS!8
zN`pIHk_Q3cT$6aXke^anxP7+>u+$*Ctpj?<y(f2g)!S%@5o~J=Bf(oNOj-!_-M;ge
zI_Aa1s87eM7i|AX<61DAb$w1dyXM6C%kVaBS&L3uJ#G@9WN|zAn|r_FxkQ;~I#B6z
zk}O>hafi2ol{)Y{U$7B{y9)l6`w$dgD=Tr#pS*qTn1Z&%1m(VFOs|o47ETr~TW63k
zU`COZ_Em-$Xh;Z)ETnDs1JuWc!u~vqkEPn7VlP9EwcC(TnY~dSE|;GB<ntm16b0m|
zj6V*`XW|U3zp!=TDkSqd&T5y{3FzcE-vMs!&ndBFxoe2Ec&9hJZtyl8>`Na{r0Fh%
z)aN@)UZfQ|V@T6R*x#fj2{D2`of)V5L{w~-`~WpR4zE2KV+JOs{<_mW3^Y;O>nCCn
z>jk)nTYLV5j40f+JFK1qcZQZ(9ZtX)O`qVh#gk_iX4#sdiJU{ogfhOFPHKK<!Iv2_
z_W*QP-e;WA{eRpC((||G3@9F6umrdt@WWqvISn=qf))ZUiJ)9lxISJ&qORdKl)br)
z4R97)u6}FdtTwTQ5bqf`YnYr&J@!zx_i}$1=Z?VatX;Y8oFzYXb{jz1yV@w(#N?Z>
z=+J6iB7X`z@v#(83md()fTN-c{X7;lb}*n8(?%v4jV%qdi`fW8faSR5?JLBY{>q0d
zX^wjT#Lo1&EP*|7x%<c#_f*^z6I6`)I0$oAJ>X2Gcv{ato=U=0(~x^zm7VKNaO9C{
zfHpZlhxl)CRkZExH<qP|C$Lu(z)*0bFI5Meq2gfOP=Bz=v}u~KGK^7#=B<64L*p}y
zS%Ev3h1k3FLaK)sBju%So)m1Po_rO&zy?f!`>@Y~cY9^I4SR*&_yp0QFh0Eh+rlEv
z?ZdSYWZu%Zq{9qJoIsReWU=Olqf|tS8ORSF2xi+ufDP@MJ5x<q|GfW?b)698Q4+;L
z+t_d=n86%mzuu2I-L&638Y(;UOzxsE^R7JElK3EJwV0`3*wR10A)&EN5daA}^L9HP
zvNYiKUCnN6z5h_%4ql<5NK4Se<uNZ-*5S;)?<Xw^qk<wEk&SGUC^MUsIm{e|e_he@
zYxn1MG87n_V`i>@SVx{~+2vPkYo4@u@S<G$BoFK`1a;+TcE-JjK)#TV0N<sS!G<(;
znz^=$y)s}c+uN;=j!|vYj@kF>nsZ2iNV7@X_o<bE4fH0+pozqqzJFk4!kGV}JQrn5
zi=~9K64Eua)8{|vt{KOW63>Lf#$$O?(D$KZ=W`@l!CAX&NiS=)S6`FY2H+GOVPw@x
zeCL3GHNP*aazxn_m#!1aT?4euMdwT3IyN^R`|U(>J75|6<ttMjvhhGx-xmr34V{mD
zm}9tpA40m;o0z#|KZ8%U1x=;>r}3<U+l|QW92tbacl?^H=PdDmu%-7sUOGX-;lcpK
zi{n$j;9W-SmX2>7yv9hmkDtdiEdC=w{;JkY{))>=IM7)pYz!AXJo9qjPZY&0q6j3H
ziN6hc1UC>dI$oqFgvY`do090N9#203YRrU;6y%?RL08|yxQ6Hl`#9;*RrigErg><=
z3iW9v^YCV~a&7>)m<$27@Z_CF#Cn4k`npE~0|(`r`?|B->JR+Ah)=k*hX**ff<TV}
zR%#QZo4B~C-Ut3mqxd#Cxu+_PaZ9iLD9%VU;SRWky=Mj4n$3+ts^Ow|3Uw_yL-<)P
zV<j6?j9y&-I?Y<d?@d*ui->{)UDzY=VDnHVV^v+Uc=_08IOUEId5HOn)?+UnMA!G;
z()V~O_~|}#K97UBLRO#9@`f*<)SA;M{a9rd4NK^0E*)E|NE?j}vA>ePwjs#3q-8y}
zUGeA%ew%Ay`ws2KE_)usPqsD?FaG9<Z4ixDdD}FGKNb6n#CqA_A0w;Z$MRLQ5%Hx$
zXKkG7gX(m<*}u!UnTl#>N<p;b0(|fHNW&bRb)d_)UkKqVnK7*V`aSQ(&M7IHj^bl#
zqnM3qmSt>S-3$tHRT*XSR;;ejTZ?TC6|9APBXk&pdVHCPoF|>ikXO_E^-igVSL&br
zT)rdZ>8C}$LgqFz=`(etQhvc$$6n-inY)GxPDa#)Y$t}ufu8W{!#ECNJmze&f-x}I
z<J#%-LLDg<y#QzR6G7{wDwW68xi9P|j(2PN$tn8?8et<*^e9t_nlv*#Tm~_j`sFNb
zO?Mg=U(u|ynWfaX^QMDQDN&%ZO#v53BRFh1E}r+kf3`kH#H7M-IQEr-nv7!tQJ4jM
zai^i@k1-q&Xxt<b?i9J6W_Igj)YrLS<7X7sHN+xQSeD#_r+ZZKoKa>$E4dK5<^%jQ
zAl>ih#O+_QEc)G>zs$=}C)%O#OE?X6GGAVm6FI`g-n{}?joyDfQscP}YHj+m%FD#*
z``)Mau7`W`;KBop2(72IY}iPhan>J#XT81w0DNw$_<<O(6u*I~kGYJpjo7BIUsP-d
zGb_C7fSw@Y`kUMA^{WsY^afCiJPxNTM!6EzzD73vKlLo!nzJHW4Hhd(uFXAfD|~KY
zB(oGEDb=JL8{Q<Q9ga-jZSY3PQZ+Cl%&FArc?%$Q!9uF`AXxlklOX-iH8%q|>6gBu
z1FNTL#komfsK6jtptBmXHm;j(Ub{y_+V!M~c3?;b^Kfu=IVDL({w~_bZaO*Gqy084
zySTo6uS!!YIoLZsWMOB0`unj9x-K55@Vs#`l{a6$fiy|<E#Jr;HgkDbS+Aq;(87DK
z8qT*{#37mM5W|ZTBNwPU@Y;0uqJG`Qz|o~&XEJz7cN&{;D~*j_`Hy*T^R4u}8ilka
z{Zr6`ySZ_6nH4aaIWLcTw}M5p#<Bw^lk?7QAAOkryenFBtI@x5D!I-6FNueOskjl!
zZ-vleUoDo`R_pZ{OtLA~Z=5(eGGB0NTE*9qyOWr{*FGIQ%N~367G+jkS%o?XQq!5e
z4Jq4Flhv@JxO3lRfRu$ny+P%X$Y&t^`;XBa_e>?)b0!Z$FC{0T9>E+Jt>xU_Oq#`6
z@pV>Mk1~%e{9>cPnT{1roLs+}tmyHlu3y%ZKzjL7zOGY)94st&5hz&dw~e-}U0)b(
zD9rwdDYCc(A*O~E^enP6+cz9ge}243aP?3dG-S_Tz4IqvC7<JvDUhpmNLGL)>u3=?
zbiPxD&Q2}5u(;)!+r8H5=qR_cY`~0-N3*kGm+(iBbeXN1nj`8>{VF#&|KubS(8I4$
zqGGDyJQLHY6@|Fy%p@K>*V<xa1oYt!)z2?T?Wi1&Z)h8tN`mT?yZz+($@fwAsZYAR
zCs@Yf=Hq9edx?ch%SWdQi`G-{)YzQ;>ROMk8+WjD>&}_!@wh1hYTALRN3iVp*F)iF
zY2N%jUyx?cpE+xPm8KGAwU|)x;<UT}Wl7{Jk+?(Le~q-6_rMEqAX{puzYBQy4P)F<
z*@G43<GPn=OIs+DB@w0euE~z;%<~d`=czPC;E8|8!uRE5Y67~;gSD{0S=VX{6_Xk&
zUYGIin6(hrj2TE?5a(;NP5fcg0q6dG9FkGw6(lJO<54pzCG+;l7Z5_3M&<$B7mn*q
zHLpfc*|_81$<a+2VqNQjJj)`<St78+`XKx8_(#b=O+F-wF*WO>fJfks%aRp_PG#CQ
ziSp?yr)f3-(_HAN!$|l>y`z|^h=NM#zk$EF$*%$^1Ir`&yseKhO1c%8xV~}S3`Xyd
z!T2eCWbLH)PK#1r6TF)l4S;U1iROwXu1-B~<o6=uyA9R1YU$-fG}{$THGuRsT|g)h
zfogqdovA$N!D`l{=Wl0%MN^gTepZm3-jNOgv(aiuR8N*p!z2PHVA3Okw_ZD|yVEN|
zzg(7gmYqkExDqvJ-(zxQt39En2MokUw5gT%u&d4g$x)CHdoK6?XLIKNd13qe=YInn
zl){9@z)PtpYrO71gG+nC%6I`xi32yNt`p?;A<`;Kg*7=SR8+04E#(FCC(99aRU3Tj
zPt5UvwU_Zn)4B6~NV$rYY<(e%Yx{wL5R*c0DCqv<;Bk6p%jfNmt;PX{;$D=#*iwVE
zipI4qyOphytydf=wQcTYo7*aUg(L>)(XS14AHG<MNP$i#p*7*Y=sGRm%f*q6+MH8U
zc$rJd_P_u8t!exOG<wvlTPG#VP@ziT>c-UVv$Jj0xCP7ttq@1;FQSU0<N(DC_}r)B
zrX1yJ;B4%wz&llC>IK(Gmu?;@THT}X2MjkXhD+Hcc+tnvYtVX0XI1CN<bw@d49hvm
zh>UAXJ!oz$XZWD==vO~tK7G5B9w{D>wDOJ+03WGy%5vSdZpJ_UQ~d#nu)%(=?S`pr
zWtm5Qp+T%+kb`gC{*suWYJW?SEoZyaLvP!dT?`V^N|)F<E9sta{)R~f+lV|0#-L$=
zJ{nfB>lYn-33v^fi^F-Tm=c&;LcxlWD3sBKE;h1a1uVCGc+|;_2f%f@pCWNY8d&)9
zLG=FVuqlqnDb<X>$NRvf*!>l<AzlvN(hPal|AkbjIo3e1nDTzYFuwk9;??#ff^WM%
zQqKswR%H9|fa}Ce(BXi$f8{Vp@PXfUlOBv{-wiv}$T+utD0d7loJ$|9%rHv1P{1<{
zw4C=PQ)?oeEB?%=vcE3adN~Ryp4rEw{3pz~{$2_@+9*1BRN422_Nr1ZZP}ZjMP87_
z(jDE46F4v|bs5ACVni@dMf(18mj(6Z?;}33BO;LAX-KW6c0pcq4D8IBh0!}~r&KC{
zR}M4CLul;v_=4f?-lKJQJUR$@9YD<$Q$WFrQ@yW=-7;6`$a5!%Dq^Q~lj~Fy`yl<e
z4nWW08LLP_Slu^V^AtHLgV0B-<IBdT3*m{|`P{m&RsG^+qKhzq)P_(+Kko4)ec7JD
zfqut{ULiD!39H{b?DTMP-Uy10^UmzbG|kAzxjFCjDT*}b8FY4pOStu1gxOSGX8Qpi
z)}Vh_)b!B+I7I%bPF($!_vEWxdX2<d-bP(uTNr0qeTk;Ypa&@rwm*q6=i-B2J0@8o
z8<KJ*=<<E}=$b{5bs6K?_1-gnX0lgp6mlEp?TV*wN!mxK@2lyaSEPR0VWaGz3sELa
zm#aE1@7%e`wpn=7lGE#L5@`1Cf7M*{8V{(=U&tjQs<MORmMho3Nc&$or-v>CHH~<l
z+n(`iH=Hcf;s-xK-%n%_wz<X?U-@~?(5yJAV=9Bxwu{AxsM3u!jYLb~JsBANUlLTA
zPhDELZpYKGp)eA{hlZB)wH88MpVxi=X^Ah|11^eEwBx=w%Q>xN`9cS?g824IuCBR_
z$pPSClhkiWj3RW1;yfY9c3<sw2dK6`9pIR0>WAlB(8yqsa8;1&hpw@}ECX~AIxydj
z0^pG{W?*Y4K1T=`#d?I}2{?>Re|(f`!2X6!?A>n?<NutpITeSc?a~8n840n2%2fVS
zx*-@fz>yo$T_5R9c%BG;?kqEz%bF<NJSZbskbiy3q`|8u+pHi!=hUtniB2e-*t~s4
zh``w;o+ML9Cc`yoD&aV%qH$_>=~VJ(9zJE}PalaiSJ!2_{~2$y6D7?2-eLa<*65JH
z;h%b()6=Uzt1=!cCsI-rm@^wn?;U(7?J&M-5@f~k4?R1hS1MUvb1@tk9E(eYtVgi|
zjnWXczP$Ri1gIU1?JfHD&ZFPnftme`h%<__`oq&UQ`*MibI4vDuyi|{LME{Cg^A)(
zk;hTvsEzbOk$z|0QJciQo0*Be5##1^zcU?u9xFF)vPz`|VBxm#Et?J`vW^)|8>cg+
z?^zQ~1@Z9PV=Dg=*HY_Qdhc{QX?tL4vB!+1LY~(;9B^2-<89$W&FoFiqU`eJoB}ML
z;hM`59^Z}VGr4j3OVXY5QRJhBwB<W_-QZcB1Utx>98d)n#Ck*VY`NumgF#F=&$U?J
zF|H_6^&>hfn9ouW6y`QEPGq98R+}4dno4|oT~F>N*nR&OegCUyaZ8E?ixJuAQPk=v
zQzBx2vJdBwr|~VTPE#r%ct~@xyxuTtPL<7_o|%?BSpNC*?5UAlR$}_Bb{qgeX__~*
zu48yhoMa77)6VEF?GxAYd=qK=63z}=fJ}Z5V+JoW?_{Dhm!mha7A{@Ck1&DP#7oK9
zg*FmF%1AIpj6hee$2t{9^_$ZQZZ^%96cQSRtI}753A^`$-iypQ@h;<y19rF70x(-@
zhiM)RVf$XCO#MZ6&B=KZ2X~}C*+EQd$N1$fmXb8rGGIk>&RLx?*4!B$^>gA`x!>t`
zh=3HNN$G>v{C4x)Yn6@Ef#DV_-u$Cty>{A=9CHPEi-1IwHJ!Vq@o<~N6F;|LXP~J$
z9$XY|m(Ti?7(quh#RG;snN6gR{`4AjTW_s3Pa4p<n`p!(k&0w&XL(RsbYhzZ%hZY?
zOIi_%B1k$(OU7|uRCY-OVm;RTlE3!uv3o-k`oIS8lV!n7*+Tl4-wUJNYYAMXE%XoZ
zDeu%pAF7`p(@RWbYa?J6rDYSVMSJBD*;$)wcB4Jg<=yhFFZysFaJ911TK1A~t0(kJ
z<$6v>O(Xi8zw0~OX;wOqO~w5vXK(K>+|uC(V(Sb01(1PmA)2<;8>{{<nsVBh-si5)
zc>$#IUps$juVihb*wWVs>HhO2L&X#YsTiQ_<Q5;FYIt3%f<%jwRMqk=wTb;jrERnu
zN1=rZi{=2XzSBp!ypu2B5<NaL9d?lG+)3HN_o2guh3zfguOd`1M5Ht{&-mewucP_f
zbCg4E4_{4N(q5s+YyRnoAn!F-i1liB1)Jz>FDLdKFK36=&@i8WYz{Tb*@P~K2W)sj
z`Ff50B*>)A#~%e(ky}BYry{cp4PLAEIp_0R)kSY<83@UTx`0}so0!QuQ)enZjx6pk
z0$k&#KvL|7JzQJt6I^W`KgiXOHuF5&Ch@`L`N|{d0qTenxGD(SyX?p~VT5V7f262}
zbf-#?F?!_<8D|sH!)E8l!vKEB4mMh_zDsLxXEP+Z15+CxQ@`Qt#-qy1CK>|1Nxi1T
zZ8T4|J@J)iz!}c3^WbJ-mh;c~sa!ZxY>ZL{EzTD7_l|<%W{;+)x*cZX42{157pAII
zu_apvz+eP|8nT7R0w0ZNckY+!8*FWNAijXbk~*INa<2{d)C3j{h?#pW;_#h?K-!=8
z1o^Uvu=A#=hD);ngRDNT-}&>%;HXlvtWNmeH>^z>?Ms!t`9+=Rf$=J>yex@)0fV;!
zd0_jIc{Aj^eR)f*W1hUYJ#U$y{szr_^=bQHpUWXH*9NVNPv3~(Wr^8W9P86<j@X{Q
zh^yW%_A#dYuilC~k0T<|2L;+v36nV!t}Fi7(`gGf2nc-c;H(?9qARM~QIm>>t!A}+
z<UXv-@fE2&xeER&#y~vrP07*oO6?OVx#5u%cSgCI0n7PjCK#$KvBZgjJSRajk4fjr
z(<{piCMK&Ve9GpBVJ9V{3lV$msV%`fgkySDi6?--eVyW0&#gT|A57+wc$@Ru+ljoT
z;B5SE_t{_>oodwgTUek)0uXFG)zc_1DfjRuSM2dZrY>kPX<;hr?MRe;(bndCSUhQB
z&vc>&`OF+O?J2aAHfRYCbcNCVTGpjB_1n^rj?-L#V$QYf33B+V0i}o6W^OBa(X8xR
zWPz{*;qB)Y4A(3xpf}#bMT7`ost%3lJD*h*zH(K*ixx2KUR27hha^h$`Zi|Lr_7hv
zhT|rKQaYc_KC;g!BW(+3v%2<Q-(XRDj=HRYy?t~=a}GNO<?|R7Q$AE_BoAi4ubx%O
z4k(*+?YXZS;^&F3O|t*=c}lN1{iBA`p7^WrvT#+aTKHc3;ni94rx1yf-i~{OX84@P
z<n^O6t)p#4<xf7Pff|?kkuI`f2Ma(aq@RKUGkHl4jk(maJh1zFCIybjr{e`j9gvM_
zR)Q0^kXh%Qhk_Q*t`uMQHa)(20<L7OP$(*>x;It3o#`Z4Gkt}at=q#~*lzxR<*D2u
zzjI2YKD<~t+Q3v-eSyCwCIg(e$#MSI>DbB(-NglrfNFbZRr=)ZUy`;`zUTDZ(Bw2<
zHr&FsZ9|~M63$D4Pbm~@c?hHXP?@=2=N;zeQBYe`Z<9&H=l*ORzx{YYGb%W{0d@P?
zuP_ph9~&KpG~P2}GQ{$NKEMh(i_5mL?kAn%0g|&E^;(9Px50bk)8kh!Q}13-48Uuy
zlcdk+hgFOq-czO=xkIDq!rC<pH@pX9{9dp}7@`t$x)UrU<%l|5+_S%OA+VkK)a3n(
zPOOSo<iVQOH-yp0ZE_|>ki${sgpM<X#o4JLV};6Zq-W73ub=T7Zb|f7-RZmb`Lw*L
z-~GoI|Gm8vt3M8pOct1Q^<DAP7=%BT!aKvytVemmbp!95IO3A&@#1x^HdQ^>k5r!K
z$BuHVWYc9$B((2###E!d0)MO2MTPi(lP62F0D>z@b;_?X1T;>l?*IeAQIwaclwk?U
z<}E&KU*z3NE;3B%VqD1&l7Zu|5-p!dcHeW$khTAkdbHu{!G4gcsn+>jO#WDOR0LUe
zWn$xHuv<gFo#GYA3;Qshf1jxJnY>AtZmr+Pq`_sv>&+#)7FY<h-O`Ua%^$FFp?K8u
zc$T4TT<j7fA{z3wzr`4aEYVEy2+q;i0UN02NhkrG%<=NG;=)aalC9az$1TJ)o%6`h
zq9Z<_5AkuLUUDgCnlUVbgwy_8O09xqzjpTL(K3rz^1>S-vO|9%(Tesc$(4Z9iv9{}
zm>Q-DVaq!a?vTdPxv{1i9()lo5MGo-2l!F$#TnS8A=pSs2v0XeYE(`YCJdJ$uUee1
z(E~QTK1~Z5cONegN{8XE>2C7cXb<Dnu1JY{n^<g-TLmGBz~Av=y4X?$i+xXz_%xbg
zPuAz++Vr;Io=4SI810g|poKDpu<wcln_Fy2=Cnvi%j^jZ*esa}6sjY?8vjcIkk`O_
z#I~vY?m$9uxI;parhT&{?FfUK9@xr}Qu(ah5-3DG6lPhUzO6s~^?V>$Ky=(`J(ZkK
z?!!WG>_*Cq&ks{TKykFjuaYl6>IT_)v>(YomfF&eX1e=&Zg%<bq+mk^SfVTv*9O=1
zuH6a(YfTNZrKw3n{a@wCdLm+br%M_2OZ5`AC(Cu+)M!feKCh=0VRRzd{jsS5Z5>wl
zm_4Smji#|%DyM2f(xKSgC=UBplQK&DtbZ!C@86`AUOly5nkzXor`6N;%m<R!0kI+0
zKbPTVLd)+&NAMH#r3M4TLm4`exx-g>&>Bd5z-s=!g?CFVhI`*!VQF#F79@hd%cRHE
z(^NLm6nt-MaulC}Ri~LtXU>JDAMB-v*i7<^3&L5Mdcq%s7_DFp*=PwX1|3tU)DYI1
zN^3V)ojD=On3E4va%S}E4;4qL#Pjv`==6x{si>caj&y`i@zGAb^q9Jyg_9jyc7QS9
z(d?W6Dt`!8oPv-za2ho&hIVQ}10i1ziysuLbk97NG>Jgx2@UP<;Gk5<cPpY$_?JEd
zHyI@t88zn7nLjW(o;;&KM`AxVCPeVdR7Sbi$Fe(}%?3fASa9Yvkv{#S-vWFfR?VIV
z|E$W(&=cz+ms65}QI<HJ9Q`Yu<ikz%S~x8yV*P+C>`@Nh!7Eq8tJlcab3DLU<4JdV
ztA2oY*<X?YKEE|n_QtAOMZ5nyVYt-6aScA??iPCFY$-*ECl)xnZyCo|1iVU|#qJ#!
zzql|ol-$WqRMBp>=XJ4lWNsAe9QqPw)2iJw)jN?05cYw#xTc2MEyM;9r(<gw1}=K}
zbh5Q;$zVjsp!4Etqko;_aqE>yc$Z4p%djzOXV$v}Z3(NG&WH96$o6k<U9YWHTs*@Z
zrUb3S#Z<=Nq)IdXy4kUiyLxQ^yXvY*g4HK(L(i(%WHHO5w1Np+w+lmjL&xcS9vD>V
zF-k~|-+aF{^0XukVSf)dm1AGkani|$l(@OFRH7#c{iJJ&nrz4^+!|kWrrLXpiy|TW
zZzUw=&ig1?`NVpjF7B^<=kl$9?j@d*ZsE_7ei>6c#i?fPR8zn?knuH}JWiu&aKvxN
z^hqmUyV)EPQ#&TuE@XQ`z8wCOMeg4#Nd!(mUVrLe6YF{mXEl0;DnKo{aZfbA>*`Hh
z3F5a7AaM_EL{mt|Xv%|dtUIwh&B$SNtK6%@@fjgv{lVJB3#gT>X%mxj4n$zNQ+azu
zsEIwCLxf#6@?X&}KiF*RuJ%Si)drS@o_7P1jh_9+#7~xk)Y7loF`8RL*&ah%>dwY~
z_nPa0mLe7`=kdYqvc(z6R><NbK)=~qob=IdFHQm@6X2~}L-J&<;nT$9e_Yb}THV(Q
zteh@5hkY{|#<gH4jw(*-7dEXJjPt=E9M)(FEmibTn!0v=g>@<jIl4x1UL|7%rGJwx
z9dNU+vwM&LIs$|c(WojXoPP_F1Hm0|w$Na;Y&5o*fyzYnI*>_4vs_NPsIJ{#go~kF
zTu0?ueaGAO*hjryKH)gFheDvAH&W_T{Z~_kDdz84nd(MbGlrCjfUr^j7hUiD&SoF~
zf9s{aI#sKswzjBE?afs!Drurt5UZ$>*dzAnW!J0_TC+x^M5IORMD3EIgs9ki?_J~i
z=KkEr{XIUv-2Xz(9OwCdzh2Mh<Dn`%MJ%U)E)@NdVMpoX#gmoZ{_ddKCzG`{l4HK(
z*&^&GX5CLN{or7@J*JhDtbcPI9LTCxiQ01RJTx69t)VBg)+%~h4U;7BBJ$z-Lhkey
zzChDdn~YA_S#0nfa7)gmL@7)A%l69FDvYn^@nqJwx%fuEDbT@s4lcOSW|At>{C<r{
zfLJNI^z~i9Hwu8!^WHrgkj}qS&nKDRgxPtjGR8wz&s7zB2R23}{#X$ZJ!r`f6Zm+V
zn1u^^@K2*#Fbt;kwp-PLA*ZV5cG%&|@uwZh;6+X0>A%Z>SW?1F<=P_7VJ!H>3^b~z
zjR_rF3Ee<6>JfbRX@5|OQP@By_%1MUp_w?6!`M0ZPLo^y3(a8h#8=NPtd>emB7vlW
zj2Z@3f<+{h^DSVN%M)oUE@;us)pvirv^@J9es`^lJ9qp9_x<Wa`x|50s)}wF!=waK
zhThcr<Q6W@gMie3iCb8i&puv26cI@LqKo@?UcQNIS_lNI%N}Ku7Js<s2Sn6zdQ-_(
zX&JeQO48xz6Fe!yQ8rU{A(ZB&?^qM&=07aiu*S=0ymQ&}U(V|{RWHXY4Rx#kOb&Yy
zFWsOkqwv=|8Rl2h8!JhY6`--kO2{gGJwvt<s20r~bJL#Oq?+!o_|n|=Ye60XuFD!%
zvtQ>)n0CM#7~i`!X$aWgyxci5jMlNyeAA|9&nN&)s?GFb+EJ7UaVT7#nUbIwmJKd~
z50CB<D<;!2n=U$(VIuy9h4_A>sRO^HET{_>=D~%=Hw&aC5<R?xv8ZI>*_)h~xNY5D
z+evfVyAcVW*nfY1?tELr>e;qU576Sw(7P&VR3ERa+z;9k8Eu}S4XiHG6&6E=Q`!L&
zW4fE2*^4y;F`4IUVF&4v&VSoXlx>7&*L?WA@Fe|b0YCU^CL-jE=290(8S-{1Ja*C#
zez?_;BHHNk33J0reAOhWg1ZuRk`+Jjg)S@^cY=*-YgR$ZVAlIEUuFF}0XN%7LX%SB
zVZFXTC<)oD78XB!PmM3a?8RFAhvm_N?|KPk?;@HtX7RU<H52#^D>NlyxSr<A*=N0x
zi0KgN*7Wl!x<=`cbh=nAC5VKxVQY!|qq_H(0#%K_)|C!@MsonK$1URYu=ucHK6#0(
z!#((zVN9QmO2E=#XjU-CpsJYEy%kd-_!yzc(z5T_e+(I%pzkomX%q+l>(rU5q<@_Y
zI-iG2dwIM&(@$0TJ;{qZCqxz3<D$G;7kx#9!Vhy~3=246Fwp=2%8OOX;ac}D@UV|{
z$2*UgcpHAOYw7zI!&!z9j787^h-N*kW&NO?8*=vk*rDBU=7|vVV^we0Gd|q-8YfgH
zfqrojt*;-l3$SN*zD)wIl31dIeq{KiAWF0f?5))4*4|Gu9&Aco5!iCcCkSbCcb1~)
z5IVvK^R=ydL)i8y!KmXGt`GqCj&f~Au+8^Vsw}TE1Vjhr7^OGy2ELD8*-^YjW_m8)
z4u<dGa}5C;?hbn7snVOa()XG&sU<@O2a0W5G?(vTa-^E=srZK;e;v1ZcX-U$-09`X
zj11Z5;0Npg;;NhP{8VU^Xq2xh{LQsL=KoOfK<L_WeQS>C`ZA?MU+AZpu(s_U=Z8Oi
zlEoACo&gCuIh308`BD==F9vaMM^a+@d3;?svyDRcs%-!z*7GaE#Zmc|P)G@CIL{wN
z&gP29k%Ae=x7!9*M+XZEpL0?+r7b_b8Vn@|pC&-0h564R{}wR~T0U**XwNofxBJO(
z^A8d%(w2tQykFW1C0Bnli=_u^!sh*}pw>%swV={^<?541hF);#mF}1VzW12$Nj)*E
zTA?trHo=~;PajW6UM3G7PUv{N<(`IT<@d5>Us4*gcjWcSxn;iqkPImm-kq8_;;XU<
z|0{`tm0(>m%1|Ez&-cN(d#Ca@DjfRd#`MQbj!;SjkBMhC1X6!iK;x`V!c4nG@OzDs
zpeEY#;ay=ftAF3xI-X{***$weE6AA3csi;vYcbe1U1scoQo<CM^-daE<-V9fZ=E&(
zGu>gj#y()I%leltGf8W+4vp#+)mjOzL&AMDZZK(J{_fcE(JXp7!Z@p64{2gwGNu+I
z<hWq*y73X3SO+dJyK|MyQ_Mb2$~Jp-MH6Odnqw;db}%XP$o_3<CrzW$eS>(v{$vSi
z-Ir*&AOw@|)d*!7(N?S|>&D5|<TPs?8_mSVoZU~Y!LpNv`E8Vae=B@?#D7`OZ@r%_
zuc&E+Sz>_W?e!y<miEPfL_4omXf9V>s3Yu4_e2F$+7tz|IWbJ}FHV7E*Z?9IJIORi
zW#(`xp`VqPAQT3bUI~P{x#lL<tl88J*|`K?!-cgl@{E~JZy#VI6MHr9t@*=M9@{aE
zGZlEB4Q;0u2^yA{sSKJP5!W4F930u7sfnvj?gC943<fJiP2Q{wCh}{kb}A;{W`kK9
zhEO_;ci|(WI@ILN_tV(5J+{(Nv9T9ZjVWfsw(VO(3E|AkWr_Fa7Ls~or(=9+Hx+rs
z<cgkWGXq46Hv(<H?jAj^z>8Zy*%{D$?;{v`e||8r(esl@&{<s;K^-N*(mu<&@T3M4
z**dui(bL8UjpMKa{f>hn>h=YXaZNtv`o0A;Cllfh_{%5y3hBPN^FBHJkLy;=)9;~$
zW0$4KcHf4-CDj913-klDPsJN=PyNnWN5|z`ztpY|?=P#ZKd5`fbW6arDsj1|gfwVi
z;v=rrgh1=na;r%NoI;TcK<gmY+@zBH$4!2JaV0nruY4Ymvz08E<uL*JNNfzcezf?e
zoMkJ4LOR`pD_-MPL|NQr<$(BL4DZY3$vz-Imue-3(tPL9SC#M)x8>JV6>r?9y*m6d
zz0`Y4%@&G!rAxeZn+bUD)9mcU1Rw!fSiH5>pM5hn<*ca#%v9*o&$PnXLFml)v3K*~
zP3Leg_mu$fXo3ceC2cNP{$q$MU>VUj`Qehp{EyyZnxJEb-RAJFn)ZhUZe5*ppAb>3
z%Hf}6r-U}Jc{Mszux<2JI1R4aNa=9Am(2U8Ohj8GR6#$X(?J%;%|suWF8<lH`M0Ar
z!&FP!`Ya$Njx(7zLXTE)RTLu*GzAx@mn_VYM1F<OaD9tEmlBsxx-k&4CV=d7<7*3g
zV<;fw%*(2kg-LeOV&w5L&Km527}Mu6F5BaKt>8y3c4E|SkvZ9TmZgAvCqtCG<m;(4
z>CR&MzK8yI#ATMwNdy3p+0o`<v5`BBf*EXyvVf1=0&ljW>5Ey<9Kq{T6M5$8nDc@h
zyL%9`@gCo8Fa!%OpIy54|FAAc6ZFo}2r)csfY?joHeUjlXZ9@FqRZZ`v@iPZ1LN(5
zCO<#Ua)UBmZe9Iku2bg(0qDAqtm=hyyE5BzRHr^iLd-pRC{U98cu8I(%<0x73RXJ&
zWqz>Kl(W<n?0snbrJfYN7XG&eQbTm!2GX-#2X8X{?zMnfKdx*RYqjM$M{BTjwdR_p
z4kFW+xOuJgS^q6nYMFJg$Q(aW?3%jVWV-Ec5z2fRwOz2|p31-U;yAx3I~&I-LXLv~
zC2Yv?b8Uah8XvJ`b=TCV7ALEMo*Vk+K0KqdzSkhgwvj0BWtE(dfdgrWh_OMnk8!)k
z7=^-<Qo(DJ+Xi3e1j5mI52({&5>dyIZMPrh1mv;hxwO`wJ>B=Jd3{)E^P4nCdLa8<
zo`XISpAI89TvdZSxWX}*^9f^pjBUBY1JojAcjaAwPBM3!+mk<kEO{Md^eV~UN?b4U
zkSqs`=H^oD&TT3MdsTZGAXuz7oGobZ18;+BxFBwU2|>+v`*F_25%VJ;tL*z?9Uw%c
zok~zaIIZvIw1)Ct<x_N3LrZAT<6xhs+N>3g=QbvpcQ&JEu`ib>?z0oZCw7vF<*N*c
zq*}(@3H#8B#2Y`%awHMX8YU1XlakGo0^HUa8IncF+f-G;nV)3d^(=AuFjA_|6^nHy
z!UYB99ymQPP=&e3D+O02fO-2AoQj?y2wej999Px<eDI+|;nu462w<_K&J`kpSTpPw
zN}w2@Zk`nF5Ik4)mYcF*&RI~by1<4@DZF|SsqpyY%S-&Xe$TV8M`#$edP~R5=)QW)
zElXudNZ8$Qv9a<cWp4#~Ec65t9<DSati-l@2%&^fkKxIc9<O75CwQWJi|I+FETPf8
z|Aly<s`<BpCh9}UkP+IdUNupjCmUoDHpK#1ybU5sacGa)*r4#dZgNyHZRan$<0znx
z&h<Bej;n2q0(Vj4|E#r=#$(1ho}^b3@=w$yu!KKWU#1$Z_W_j2k--VJ25Sj7?BG#C
z5Y%n&!E`$BQq2$(N>ggPMy6@$M(|%Ird-*1Ua&x-<hk$S<mMEDkoJ24dtCwvRqY{v
z^7n>fN-|ff(!5_1Im;8-@8x;6ZApRZmj(2*)nXX-uSCBx0!>(}HcntBd&f{K4j5du
zvY^m4rADPS-;qWsIg&6C6pSe^%+e{o*>bzd_x2@`sYeZ!Z{~ff8B@EJD&mSaoO({3
z^FtLEDO=E$`^A9=f@NM|hj<jBKU}CVBMT>RFv_`u72)ell&;SAm{ChiJl*)w=~BXp
zv&tG=T?1!Ypr=gHI6+5GW2FB`hADB7w0W^h@%@j%g80U{iy>8KJkR@{fC}%`&7HKN
zx6?^oDfyR2tQ07<B}5_rs^$HfWKptXMY8gc*91D$vNn-=aWDw=wUF2kTL}^ry_UAb
znE#adBjafLl^>UH8=tx{E>`^LG9ec?qZ;vP!8R_6yh-jpCQ}n3_^PnY3Em91)wG~d
zb=Op4$@m0pP@<!_Q+(qG<I|o?KbZOY%5Upi=jv*S{c5ul{gtmS0$_$&_sY`@XfE9Z
zH^M+ycjxPZ#KaXlkyD@og;++}T3^@^!jQ}S4OJ~1ejS{!&h!0l=$>)MndvSyyXKVp
zh&&8i^~l-19E%H;<gK~KvpHk~Y1b<$DK7=<gQVUTPgIBCO<@}Li!MOfp2E4Cj^=Q>
zQI55649(*Q%reeuJfD~1!L!o_H>x#Rj>o>syf3KBnr*R=0Xj9fK&+LCl#jXDUuCva
zBhwml3H!?K_IaKM+NM|DmvWjm%{=JWayi|HTGkF*Dbcn}-}YzqdV|?#8{Bi@a?7){
zqepQBl^wsJnQz|=mVhUs?&Vn!cN@^L>YQ~nUiUt<q*rC(hkmurq(fp7Wid>k)-F88
z_`90>^v1dWqa7~B<oSXuqd!UEev(Zd;&Su*jp00EbVaX(uPJ@mes@-iN!GFiA@nav
zCecGk$KWvFi?&N~=<!Ir4LqlUS|J7tej(fD#Yw(gDhDn=zWl8slNTj}yryZ>)BF+*
z$#vGjiFBfsv7MgCbZBT+^Fc|*w-W-uM7Ym*wbgiuZB7^@FF;+vI5y)pyrGr<+gty?
zEcTlJ4g9~17O7c<R4+VzqXd=D+9VFC$(N7)E>&U*aed<Qa6GZJ!R&eBOy+{}<&lE)
zSyIubX=(r3;T>Pm?yb$v;=r>Dt_{3>fA81Su7!_h+$gHhb!$|{*&tnz5?E?p)b$^O
zL-Tuu3you^A{wzJ)9{zvgaY&A2cd>v6>fW<SFb$19TMj_xcQy0en_tIi&+NO)N<X}
zlM8b1Dg@1CD|_9z{a57?Y}lI)&TOkKBZ?vo<JZ9DTs2*No*KS;uk!MEo3=hWgw&~7
zS{yC0i`nk5HM+}}c34BIN^d0SvW1G+aE5-Q=bAWzCTok2eSkzNfqd#F3!oIZB#_7I
zA;J&e2WO~NRozL|`L{R=^0Elx_sojjQj(u54jtV(SZZ?@3a8InQ{<L}1|%~kK`ZPN
z#Jjx-owi@u;lF=61O<f1UsFN3m>{Dbu^)M0N9hjWw`uNOKWG;OY>3^EZNt!;`aumI
zGdcw8{mdykIC$!g<%`qL)Pv)ovcE28xu-tfsIuA5CIru>Lc_2b(@)36o^|FF#P_dt
zP}=PZU<2Q^0E4YFaMx8i8a_@hSQqZQp$$NUbM)@HnzEZ(_Ea#(EE^{-+b#tm=$E9-
z`uGmr*(8DURd$#ZB5W7E`lXXgr2>yW_ITMiMBtbi+|QMnz~s&9Bb98mufj`PkXr4!
zEK+@&tC2o;pw70~IWM{8fy5p5tOI3xo&{y1a`vf+z=DXPbMa*9%YZ~MA7e}|pfZ_{
zDK;{Ftnzh)sX-NZFKtj%W6w<>+xd)i+_>}H<QO~4`1pjH$423yJg2Zvr@O`sNwI>d
zP4z}~bDlLxO{mLT6Q}OaQVo7=!xc=V>tY|o<KL#isTd$Z)MLg1bh^>#ogd@%cc_h8
zhmUqo-i=I|)&ai;OWIJ@8l-Syba%3+Tf-xt^e|vd``CV_hAOYQpRhuEDd#3=WuI+C
zG=VVsOYxLnY0n%H2-|%6C{C>}zHUd6#a8APXS)f(QrmcC`fI9x>HBi(dogP{yA4qS
zC;Uz$s8PqNNvQSrrsz)+HkQaO&&n~kgG{N9{!Am*5aHPdXuf3k-SkHsug(^S5xGvC
zDhpfh&vd;Gv8PLWe{W@6d!(OnGdQ5Zf@UGKIUVPIVoE=Usn#xXNLt19dJ%liR>P<S
z^V65Xx~E@^!22`odbB*=_)4^XV7D?k%h2`wNE#pU<10PX#UELum8dKVK;=SF0yyP9
z18T|Rn#xjzxgO&0QEl&Y?DoyWSJaR&*CvCxx{7_xt9lFlP9-xNg#5d@xj(=r0u<lw
zt7kQ;?iH^u7L`8Svq7jdeHgATCd7vjgnTPjfhcdC@tP<PLzh|4Ske4Nv!-4xLrmmr
zthEf>1Zq^I1&5YN!^*W^jDfuY>4ID>Cdf>w)y{;%W2crNiHgkP-@lg|K#6y|+#KG%
zXKD)Mx_9I?C&m9ZbYr*s`-dN|Ky5+8^M$C5!XxNB&_WlDwCr7TC@(A@Z~D}8ClPCs
zuMS897ccl57Ei`+n!4LKs;(6di+UY2GleMZ<R{F<o8LLs)k#Yy4Y{55LS|WvH!1tE
zWAxLVm`|yCljyV(rQ+-ENbjPh9%{iSM?`Z&+}H$tL!PCr6|g$|O)OWDw|cq0&ww&E
z&=5t!3eSj7DVCH$QVy+e1a%^&Cq;{`vJcH5vqR8f<BAzx7e%c?@0_s6?ff>WYXR~Z
zOZpVhA-<NyrJ?FS250!7G`k=0y^$M|XLMJ*sP<pksAp}p-xoukXF2wE-Hr48+Pf0n
zCS@O^x9=G}p@S>O0(^pN#emuS<(Qbt5Y)4ouRRr#nryPHr|U+3Q~=hmB%5}>2yk~m
zM|yxvYSC2nR_fC?SXN&5YB}x7{W8ZiJ2|-?eslBeT>PjK3I)y_4SJZX`i`LSNBdTY
z5#MpGG5yP)!Apxm71D{k&65%oe{qpNTa?W8sU^1{5BDT1AK)^*wnIJg8=zM&DI+do
z%6GOdUik4Zt1!rDj{ftK%mN?s+pVlQTtB6+WCf<4bEvwK4v{eR6NHug2nx2NF!Zr^
zE79}%H2(7sk(NH)em;gb3R@`JR9?1xl?U!WhK*4R2(8LHX2X#}!OJ!@1DC0#s@Cf1
zZ?j&w`ptVcB~9ok&jn~h7>DyFIs+$vmqGk!6Zfi`EuKtdL(N4~ccJ4kv_=c7DN~FE
z$rx8VRwOVs82q*NLrZ;;$)!pwllvW#I_&o@-C(cKS&Yl%U=fzE20LX2BSfpSP14;|
zOlzyHHjJ#pl5{Mqz+-ZK>ul0Sa9Nkk?1>!va7QW?#a6A~^q89D8eo&CcGrJ~{OQbM
z`wt7XVvvqvZmGdPzOE~!Q8aV>?W6mg+u%6t3#+5(E^YQ7+x#81xtN_m!JtngOau7>
zPAc^opljyVEW}sfOCnMZamiJ=*J$!Q3zsqen#&qobGNSEw-w&5<uZ+u4*A+PAEIyQ
zPaF{xXY*1#d!<&5dCsBX1gfJ{)IT7^97Udm+Rxs+8JM*wGMj2*xDZv#QdD$z-ulI<
zeay=->8*fmeX_6)ME>ZEuPcCiHc1q*HtDw8byo-+E54stIy;<RV4&w+FG0^!3prnS
zby@Q`=Ex_MlPbm^1p?Bd0Y{NXMW(mL&vOCpGK;SXCss2;_v=I1yFXl-N4$7b`HgQK
zS~BL_rv$t8j;|fR0uVEoZ0u1?G@&*PTcl%11B4ND3YmMsw>Yms=XRfk#Y7R8QRLvr
zm)u501h;zlKihQqg?K(_=SfP*nx8lWa>}G8{!p%O8q*&@e)h1xr^ebcUZ15@0Po2B
zAmRIv$=84^y87nGLCd>%!Nu*0;LprG>SA)HC+ftWoY3}W?;0RkmFYPvaO3%1Fe{P*
z9P;u*+U#z|M^byS7%9e`tKY8>%q2DWTq;+z^$^?r@1aX(zbtDttkA3Eb3)+*E*9ZO
z`3CRr>VCSUWfIoL*ooLrc6nxJUpw-&PtFUV2^)%x61Fq8kwpRGQS;}c3jObY20XZ_
z5%ftr&{oLh+~{t<%}r&XNzd$g4M#IVD_2p~yK^VrdvRNn6P9Lf4xC9z2xE?5QaI)^
zTN?Ta#m7j^Kh(Jg+_^8^#Y*_zQ*L0bivcHIeyzn7=uu7f`l$AnNCG8!e<|vP#PXx9
zbdJKNuB)EDIPy*`$?p;o(~>?`>%a2Jp^U!v=R5?!<-9zzas?&xy6&+);1F~cR2m!^
zkN5J)Au%0ePX%pO7T4OtuQ_BIR<@BfBP%dE!#K*NkIqLdVOx-hrva58(UVl|sH&^l
ze_ly?uqsj7X-OcKvdmMtj0Qa-OvEo!UenK}DZVQ69$Iv0>WqX5#^R@1EXq=LB<NkP
zhxE=&pi!Df+HALfhmU+s!R3a~jiw}~4qUs(O@ZYxYYvvzcn{%+5y*<a!yw9+h{sf;
zK|LLi%_nmsWT?f8f61m45PESWh88Ya?xv<p3A#+7=-16!H{Ys61j~!pH$Gfn5^k#v
z;kNg(&%^vnzN;tJEaZYY%&nzv=#X{Mgd?8yS8o@but5s@#8XYWhSol(ejxIbr1&?+
zuOfm?@8lG(z5MC@=nA0u)g99kh1y<gYefkY6MgKd(ohaJ+yKiPLhsd;Y7v!vM09|<
zKdXbV788GN@p><p`R>G{0k4rL&Y}6a__jVNy<cVZ4je)iNj24OG)t;nZ_r`ikv_~F
zAt7ZpP{H3cAaA8G>#X+d^91yR+h%pb@!R;>O?I#uG%z9fFP@s+zIQD)iiSC%IlDi@
z*eb*>PtSfu@ew-T(8<Y~GPFwnt{J<U7BKLh*3BcCQdUmsKQ}`57pq*c4|I}XAaKbO
z1@ybldJmv{(G~K$PQB_3jdTqh)DN*TkbU^=yzM+Se7gAS6c5)7enYTyl(y<`tsNU3
z_VmSjkIw;dXCEv^kGmXy_*%Io^M_iBLlF}#Vu2nQOO@;YG4RdiT`v7M<uDJVH0RyW
zXBhRRRuTxG9maz_!`4^8gNd1921w78)g1ZGblK}bsh>sSY);QJ<i9`$y}W698<4~i
z;*Fv4w4~zf$feBnM8gg>uijhl#SIIA_rEoCbEp2G-#{jS{6+txVOsSrj$jIdQ|N^j
z*eIq5<es%m(Kt6Rrhw$KT#PBBSQ||Kq~6Q_pDV}zaxw{V=UpW-=-5!`%APfPJbsjT
zj*YL8>={Choq8A}S2L2f8X+4l|MCRvrR8KddX(pmg^o4jv-Y7}9p-K>tN;ARK*xlQ
zV9s{~_xmY47OGaong0ELr!Kg49D3u({AGBmRZn~Q&z=I~Mpd5pvd<Zgf^jA$hUUL#
zbcGn7RKc`~h~auSH_0jI4hf6@7(_S)=<T#lLvLh6lu=rm#|z|eckN~e{Lo^zYiH|l
zfX<&lIa~i#Gor@0tqZ9n?J(8S<mf$s5IqA{YQoG(ny)ED<NotS?FyFxB|7NC&KD=+
z5T3F7);VqaLUHFg^SfmJOCLrOaA5ZgAqALmfi_*GyD;FN6tvRpBn~2?M0~tOD-#Qt
z_f3~a_QiSwDoN%Yc|r@vIg$qE2AvMvtXsc+Rt0%4<lcgW)780TnX-h8V$Y0ZO&(Ck
z6cfO!RH~z&DMWP#)9G^3WHpD~uz0rZSQPRH)iFue1`8!Lxp=8X=r#pnCOp4czgz$p
zc)m(}DlU=#bR54&He28IDTSCgzriR|bIJhpK=Xo-h)Jo45AY~u(kCY;+q-u3(?y1p
zrG1+<=L~H7=%%gr*;w3mz{}|`Guyt-ZL!DjB&A(lW=>z5Z-}Qm@!=+g=UKzEaI8sz
z)Tq>G)uBMBhyK836R@CrIwgj>oCedlTgB`!%Y3h(jVsCgYbK{-zHfN2m)a(+rjSd9
z>#b1u1~<n{M@UfHaHjz_wnZN7>$ziDmuPIU=nmdA7M2@C@RaH4)zv*2z3@Dy>}Mxi
zB>r@MTVJe{pCktG-Mjs;z0R&O2ML@wR1UF`=XRG#%2hF5X*<=tN_H-*wBB038l%Qn
z7+?YTHYk^?Kz>D$`)xR;PZ58UOs8AgGs3l5L`z-II)2N_B7a)FmpGSA_uOO_rytsi
z02<wb;Kau$PAgx?(Nb`&ve1^o@J6rHUZd(PcfWeq?B1<5uJ|r{uVDv^2T2{nxt5S8
z#f0=#<MiSUxBEubcIC);gA|UPt8<#sXV;|%?aiBjv)y;t)np4Rfe5y(dIh}Cp2adX
ztjfy$Q_D{_inOr@SAv?>j{u|dtn#Igi+jM|^T#($IutHA>^PgArrSUXI7)`B8~;jx
zZd=G+d}(>BHwHO>C(XpCsYfP;Hzc1`oD@&6?7BzGy<FOXpQM(g7l+JAzTe@SbNoz~
ztfH#pO3XK!9-*6RCOIhEr-hlOb#?;O%>tqK6)lOCTeVeDW$S19cj?A=#Ru!(EcazP
z#JYr9=#S+7!}$}ZPjkuz;>L3v@}mCpC}NNIalxseT7P7SOiK^9I;gw{Ed;K;GdBk&
z1tv7PDLL%hZ#q<q3S?G&o-^zl{>EyQu3w_Yw_Wtl_RwoRpUV%9QbkXk?M%KAItN*9
zhBn&3xb=kXf=*n%dyE|0x4Gu?M~l|=_<lS7Azar&skzB4h%0^V)>)ipdWbUGO$?p%
z=uk>X@o`;6*y`XcB{_?W7l(CrOe!WOpd(;KkkB>Nz@CevvUdy&w=R8W{LCAP(Z?*<
z6bm5r!~_e0ZyP0Q_D@qH|L?2n*#8iB-gt38>?b5`8TnGpV+Rmy^dx(liE~w2A{&6A
z6FNnQw0hb8#i%lox!+4-{5~fDg)aXfQB9F{dm?Q}7P=D~CU8>HP$#C&*5@XC=;QZ(
z$=*kl{bhH(c8`i`v)}EJ>U{HXFAb;0be5y=T$_ad7&u507nD4D6g{eIHm_Om=N&cr
z&(o27s?QYQIGn6qkyMg_(Bo8-=tFxu4tel-<t@A}zJ7|iwXH5LAVPoli(moc4Qn1j
zTa)R3e1MoXcCpg1XYJ#zcoij>N}7LSnb4v8MD>v&$R<ODE^8c~n>dp^=C25-wjkf<
z8&whdsJLo+cV>xW-b~BWN%=BI4z7Xw;^4<k8;lY9`PD1;>4VYN@golIPPlv;v0@)^
z<b0+t*9SMns`igjGhd9oCe>tzvc;22PWe8Q7>|8EA8YieYrgpQm)_MsR_pns`X*n#
zFmr_%RI*v&yKh>54>4xATk1SHN}os%W5=b1PRWE4i1S;8m1^>ws8ydKcOU8mKdam4
ziphjBh}FdC>bAvmctO5P_xQF}-rtmeKbhH|n_V(3u#a%*E;U40Z|H1T;H@B}Re!=!
zumz-V*(gcX2R;_mtj<EnJs=Kmlo8A0Icw@|i#ryl^iMl4>$+!KFWPrs#&F$J`>W%%
z{nb>{*f!(x4Y10S?DXnHKQMI?G6J)4xodOx#$BSW-xm2w`NHA|c*;RcFjR_9l&bdp
zi_434i}ehEF|I+;_WNG;g3J;ZX@;@z(L=VQr^f(xkD&1b&WR4F&CIa(iQ7^zhMA4@
zq~h_4B-Fq8F^O28Z-uC#4s|Erxg_xco02Vly&v>%=DRaqzbgBy$Sb~p;@p$ql`2#B
z-tG#MLVYx9bHjyvg_Y|>%+GYiBf+?Vwo0?!l>Vq3j%00(7I4~}2~f6gy|<FlJwF#Q
zKxPktxJ_uB4{omi8S6HS6;c>7f<BqTA2n6`fvj9dT3@A#a_2V%Ohp7GC;Z}m_iP%W
zTC<Q<;^RFi(r8T$f_dm-Kuu`_oSj+m{!`e>H8%);7c#o$g_oSE=4H>%+`5-k*L~u@
z-*@RXv(Yb+Yh%7+RzZy(4ivm;=zgWQPg>Y2NX%Su7;BN;oY8#DRAHLhx&h%3Oxb)V
z(RZoab$*Vr$w&!ffEu{<=({*?c!1;pTfnulVck!{i!{Sp>q#ql>R~YUq_JewEaQ!0
zo{AeIkGj6jO5~W@+qs)`uSqlNPYA(pfj~VnTzR|1ws+(aX4Cf{gF0_EZByasbKc71
zTe8F<`;?g_22D`A)1`l|HmjMm-Ug|g(1KP9Ev78CNSQ<&Uhc8cLQ+Kza#R6nGg-5t
z>Y88natZH2efky&LRbD3g8qHG{mm8YKx=Phjy}^;oj<}DvfBmM1#Z;EO@6XdbjxpZ
zf;fUBRGU;=sPCVYc#bp>7rzC`<=&<O$Cj=nO4mH9i4~Ha1bBXU&Sy9%Q=zdrt3~+I
ziuA!5QX!zx;C@q9uG-)U0o0VW#c5+}g1<pR{F=l>L-E0Y&1Blb2CgDVD*BAorATE}
zakrwBE`n-y=s+XuT7be+jO2&wiZazOsk17J4-l2<_F1%vl6J6)ssG4iHqdcIRv+}H
zJ}+-Cl4`ZT>@7j>0Tr)j2CMqW?Sn19trm&B*cbQ*$hKCmC|7qawv0ouls39}S6p9G
z`p`;6{JL0wdJSQHq@jqIw_)Ch_6SCODcLZ@4lU0^s)ogUgo;gaXNzf7E@0y=sfFTf
zOR846ZT>^{A4omHaN+Mu460S1qRSH*cIPuUpxC85Eey(LZao0e6*pGapNkna@em)N
z#g|i+8yEXajy;trauvh*ve}jMeuOPacXRbLB+B&Ap09F0?&LGGH1AW#u_(QwaSfNK
zWbXl3LDx@&gpMA5T>9zv>z6FE5)lhQV_3t<Uw)?L)ShV8brf+G32J?;N9aMb5l;!M
zQai!DoyeezKebz&#B>j5S~AbCz;d_gN)~rTqvDvKJx!a0nzVB<E0VZ(rs4}NOGfTT
z+KvmbU5!8!7l0S?FsJeY{ytbuUQELF>d&I0ywWTk#Yiu&Xw@m2o~GmcL6f~ue_Yw{
zhG7UjCVI<XCkvk%+uQr8ks-A3ANLSo+258?HUEaswf=TmU?@$}T#8loOs^k<M4e|J
z+n+Y^{9WE0sCg1>1f0q?cSR$}aJ5qSN<Es_KhUnMtC}DGcsNLsYN#B3hyKmK;Ex9~
z0bjjDr-<89obIf`8vXz1G~*M?VG*1b-W0Zz51HN2<4v(?UiLXoKwzH@xI#o18RG@G
z$(-Cq*e-uiwUBF^-^N+tOKWZ@9532*;MUH5n?(6;=Ws(|5%<n#smR22QqtS6Wb8Os
zh~tDFlw9zj3(XmlHuFVsCHb?VEcyo!oZaQ8!UFm0=MOxxEA%-!rF`zg$#ud%@C$ho
z+g$IJ{3?W`XBr4T<vjADC{_;9z<K-POzO&-j%Pe*?4}R|s0|7((`?xGPuMAlYwZd6
zH2=HDDCHaOx}L*M*5L2AIdJ2gG}yUQM!fr`dvD%N%$dP)<~9>YxqPXb>GW@|ANAY*
zi3qNkk&S6BMQDJ3N1~)ub-(!EHw&scqp>nwS7~y~nPUClCndc6nIPy4N8>Wb`cMvU
z@|6S=`vV|M)84S$`f*fxmT9LB`hKQ_%Ik;)jidK69?WX=&M?>`)>5_|RV2hK@<YV-
zJVvGH*dfnBPCc2eAybrSSyu9ZB@G40n=Hml>22dT%u7N%>*<zuzA-g1yiU|afQw~>
zgl7BB6F;)#CpZ%5r2f&Bc|%=h@z7iuCiU=91btEB>Ce7h%W9PP_2Y6EpYWauS=uL2
z+N-7luu*QzI+4rk40G4sDmuwb3{jqSXO8c*^Bk%Yk3nk#ueWo&KqvITtpEd9`rY8W
z-IFZuf|MlD?)CZgP>JdO2rsBW&{D#tc9W6j$5-ovmXm<9%74l@gG+Oe9JZIjY$p5K
zwn8CNit+9-w|QyRCO9sg`v;T7xdmVm-wfh?d;XcyX(xy@E<XQjTVwHj@sepzjvPv7
zBOIYrd=OML_P~%ZXdoAU8@6js$E0W%zM!oSxSj6(z^v1FN@wz2IR3nwo>oj1SAX>!
zv4i3_u#)eiwYEZ+3u$?xh0EiCT%%%^n`R$hpLgC!pLv24(HZP8i5~<TQk9(DC-N1I
z$K3A0tl5X<j~txm8e*QvSXy+5jV-geof)puic+6oRNlHL$=-bWsAifo5v<Aduc;d)
zeMI2N+=jHz`qzCU05T1#2osyK349R!-A;OQprlNTsi)BJFhB)r)ql@w&HNx#?x^0^
z@G$gdOM_q5hbhZy#mX029i{L)ALU#ib8n$+$j{w@($g;%!aOoBG@hMk*l=W-RxIcs
za|H!XzU~Y5Cww6^eG5pk)e^Dlwn8*Ur<Aui$O-GnuPQ;1NFb~AZFUuzARQ$&`Jo-z
zU~O$lCpQ;(|35LmyJ~jz1itR2pw7`>3VU(%tgMv~=(@Nc$J%{^XX?ME&m$fPuC?cC
z3#}Jj8=0@B@-YkPU%%oM^s)T87s?lPf)=3^*C5L1%5jt=7+RISJh?w-bB|TcLC=by
zdovo~S-5a0(#a}X^B=>+jELnVfs#jCTbH)5F`u$58z+5ox)04ymcO=>Pq0b&l&`-{
z%$DLe-np_~)y1v>LN}%Z_TaO0{&cNR5hPk@Jwvp5DW+SvXC+uv5+xt^bM48-fh7qU
zHPr=y>5*w|4tvq2&)V)jziS|hzp8bA$NV5*6AMX#=0kdfyTgB4J3&oYnyb({VClzo
zLvmwWV#EI5td!?9X>3qyyqCvkpv=9xfUd6c<}qGrB9Fj;n)0<OriMxL%Q8hG`pog|
zkRsvE#`aFJ-;fSl;i0ZpmSMG`$2RMM+D}MyrKahaJp2Q|EB$6^e2cB;!JLTgT(nP(
z7;{u71d}=Z1?}QwsVjJ}e%L?@3^Tp*Oi@(ck<JEJE6FblsG8+zS21myxQ_VYJNno2
za;x7Rg#lOFUU6O6s{C_rG_-^}6F62-zIJ<5f}8ivi+wNl5Nzy9|Gm69hp5uH6uIs6
zZ_FIIi=2uVqlxf5{?WeiifGNUH*=K;We+K_n>Na`fs|IyP3v#Io>c~zBz&xyc%*oB
z^WxP@TedS$l^*%EbARqFe|8-`-jnI6(J9@^#*}uxI|m{mfwNj<5#ZEQXEtx_N!&xq
zg1-wj`rAVP7rA%#N3XRo4k2t_UB^B9lUE-VaHmPddx2A=M&R05MU@>r14GcHoT2;0
zxt_ZIpo}KdQ+6o!=`8n)s<S?@Y`ED{fT&S27MZvpWP=*ZFc~kbi57lx5*ll@4StX7
zEdE~ez)OeaCRyO6wft)nmX|}<N}_cbl7?n<h8!-B%!~tDl%?+V1mu8}ZGCjheJmx4
z;=QR;X2hbz*rt;Ln@@zzvQ$vjVN-sb^r)P@PGpkheb>Aj_-%exmta<xU&xwdRK`eT
zr5abK4KpUD^%S)8H_B<2`8NEHo=b=j0k|QvNqr$Appuub<{pQ90jPXg`8*{ZpmQT!
zeMgaTNll|n&5x5K*;GRCqitzV2~V5;(N8tJng9*5MDLbqizbWc@Re^bJuoUwXq~GJ
z2@?|T^F%zdxh`t5KsTH0+(G8Rr@efgz@Sz=rCp_=!OgoP*}Ge_wd<;(pT~RY{?*Z1
z;{@$oKeE?$KeRbLAZStP1f?cj{z@oQFO@#{Gbw`K^eS#F*Sk#f3asGUi$4|zbNEbW
zj(pwvL}oNBwtg%U+`k%-g^Ut%kPtCN`XS>h%F&ve){K8%+seZ49|w1a+}cwCHTWt`
zNwl^s4mn6XQrnY^{fbFRbO?uioDl>*P2^rM%uv?Jy%xgvWrcdA@nlF22kAVW{*R#p
zr(w)llUX{dUXxp<A30AhU5U_+m=)>y8f!6F!PR@?g=tk>@xK9I4{S~I0?hkZ-(=xq
z@E{5fY_OQ}M2K0Qqhx_YxECLsnZ+u|!T+b~Y9baW&}#pcy<8|RkI!y}e5%RwZ+qGC
z&nsdVhj~}IPs$8(#V(G-e+qYK_ehp-rsaqN>jyL*uTJuj2-`;~)r%C5C+8EWUupdm
zgo#ldy~bW}*+24T)9;^Ot=sFRT)la#!}_qWu*E7H=F!~R1vdfO1tfi#uxP!rdy;?k
zjjXFg$XwL71g@Bp`fp}o`VPt8^_FKoP6uLCnD)HIO+-vuhg<;UY4y2R!w-q?Krof@
z7drltKl)+(ebfG)<Mc}5Mxv<%z@14%Orw9(O;akPQTqv+ugB{nXB{-Jhxh=1->n{8
z#dDm=RKoCnM+1U8U4=Cb-{%t%S3k%my0yH~x%&KQJLqu;u2;#bS+YCqHZ@r63`m^F
z2?^R)bg2sSBkseyHym-~P;N9oSiPcI=!#eLbVabJ6T|5(r~Guqsd7OCJ=5<$y4rxF
zQc2k^!M|s+Jl%Sy!h@|HLdVmZQRnJwHt9K!^Tx~)H*9xLBQunj@?Khg2oLCDeL%4?
zBGJ12swVKnQCLxvx3`NaOt-;(<$!?nC*;@j2^6sYbcP}eo-=;%ey$+9i5t2*5UHTT
zSj~U}h^*oo2F3O)<ffbnv?K2n9^aCaA{SNHip(+^9iF7yXgS64YwV7vzv5*CyT_4L
zc%1OQW$J|SCCKN&5saJN!7!%RckAsw3g;9|vsQmQyl+~wvf{Bxbe+sF%@-S<Xv}_E
z^7lLgBUy}TS^wKwBcJcPdOqJfii$5nC?o#WB#Q|+X$|JvuTs28`!Y`(l%W9*2_q)K
zwS^<5E@&PL-&+B<*zF_9x8_5lY~Ql4hX(XNp@#y~^sIWwJIi<p=#+_jBY#YJIld?*
zBQ<<BNJ(z<f=|)q_n=u7IngQNpC$gQml3?gzf}^sV^$+-sJl>sng$5PMIBE8%Ep_>
zAGw8|SPbCsg?glns`U}WeNP$Nw0Fuarxnb~4{t?;lcYD9w-2?g6_7-lDFNqHatNUw
zQb4T9%2dOK47}sTutNEI_8{=IiE5#M6L5YJziu2-{<j~uw=O8QfOk5V_#Z<9@ikb%
zuL*HfxW6AfI`x)HlsY_d5MuS0bgnJW1R?ax;N5V-s7IjRAE~&_a=^0y&c(5R8_Rzs
zUt-8P>CtRx|GJT!uy5`MGfa_C8NupFnrwybicCz(xDNuz6K>Wti<jhuy;oT=B0Uv6
zok@OUl^3kKg9DI5pEU&fqG!-xy)(9WjRDSS{2zmDqwC@<Wx>>E$jTy=DsaA_%F)Pn
zIqQ#LbnD#XvYEvKsgn~B%pWfS2%)K05XaWj8of{YH!RzvGKZ!2GsKvA)bsewg|VZ*
za9ChD)LmWfTVr7WzGL$5)LZ3SwvCI(E5W(yBM%*e?N7;$wn(V@XI@XQn1kJ)OUgf-
zf=u2nPi~531q4<Mi}ajjX~tg%(8c%byH#qDv2}%tB}Lpu$sfw*o^$-2F@zOB1(Y1-
zI3Uh217RG;MFnf6ELJMNEtOvL6q{D1ru<VCQlg+ajxs*nm;X}(>KqNTWq<zQqT3pz
z`-uc+N!w7xfNBrn=A_KuXThq|t`xDH1s$0VgQO|ZgyhF}E$WSEuyA=rfgroeh8Ue&
z<uV*EA4dL)kWmS~Kpdv#II#-T4Ogh4LP~;u8H{jz-qqeCli-E)Lb}NfXSw)NYHg(I
z5U(TN{olFY|K|%}{BQ7oVp5mi-1rc5H*!-orTwknpXBOG$K-9@j-akK<|Iva659&p
z(T-+oa~$uJrtOPt1i!Z1tWC^?Xj4LaY}@AX_y7!BMytpVT&Vw+xxRPVi-)GUsT5#a
zR}}p;g+n6HHWC+?LNvYpamJSXoF#3zlW^PI1cTVPmrVuV){I(Ym9`Y>WKYz&!4_?D
zkdIX;0CK&<YruydNDP+7mSNV5M>Z#Ln>_%Va?+I@dfzeZ;fj#)uL?7uJN<zS`(}Ur
zu=+=E+p^of%sj?1!ePEkltaS3=&xYl`x%Fzvjm0iolym`)!#JmZD3xK+3Aq<{p@P^
z%<8D<htTRAzIsFHbl0%9bSI@-TQ|O$-RLRyAVbn|oX0^4vuNwg4zNrA1d=L)gyq)g
zE)6y{YBu@|D>3tg0JrgNrvb@T0hHB|&q@4kL3W|F?J7ePmm!?4t^(4L7Y&DWe4|^Y
zl$Uq>HN3s>k1Fm}Yz#|+v26wa1IYgNN9X?-<l(*En6v8H1w)UCM0bjs<(wv2Mg<T+
zTN~3qpSr3uEdB9QxCH0*Qq3(e1apNBe#_Qy8@d{I!4Id>LF>sV2{|$ci83>FiT2Z7
zZsUnIUwtzb)2fX-j*|3}jx&ReI#(b6&TP!wcNOHm{VX<vOU%7SvOG{auzIc=QVl)&
zGQ;mAF3EO^RyM)yn7{|3!bN{df4mTEr?fkI9kO%YGw}?3;fMj`WVYCF$PKp7;<nMf
zfP}#;kV@JzUdna9Q5nBg64BGs^8h)!=zsT_n+MCnq^_pu%(Je^L-7u62LK9sU=L1r
zf)HR8FSY-NGbK4P^<qnGnMA)=iK{CcmkJLSX|Z^xWs38&PSxaU0PXh@<}^FVQ|}w{
z4<R4+paHVLdiz@-x^oANx&CG1iB~cef=lJ(lX)#*TYH$yF+&R!WG=T;7F|?|qX!28
zN&y6at!(&~&8#t`5vq{V8ITr-&YgA8NvXzU8m1G|1regVAiSHQSY~k8=Iyft2>xxr
z)p)h%1p&-$wAl9{&!tA!agRpt-${4L9xG)J&Fk#ti*3l=zPhc$vG{>~Agf`oY3nL-
z8gBoMz=8&v>B8&c@qmCJY2uPfa<_ylamDIMit6JZ-_D+YwB4l_OL77DBb3dyXSIF#
znkkY{aK(Sp@Q8Im_{Z7L9jExFXFI@-N*9$VBaq<uic`_@Ay%+!)Gf=ZCde&OIC?2h
z=i$rce>XR?^lp3+Ej=#bRGJj&6?HN%4C|43-8y4mE@EiY);ehAo`r&EfXl~%1nR>}
zR&2d5Zg&@@J5-3`rK&q-mz;ZWPF%wskhFzLXql(`5Nx+YS+zb4dC|vLc?)W^av^t&
zapLJLi4g1MZ<W20(GU1wOGize1mFn*tsmDjLvcBgK7o4?_x6?Qenr{$)Q?CBnAAI?
zU4S4#Loq8;L3uK}pumz;x6i9tvmVr!8OJ#-Q;g}|a=4v4%?pE(S79!y%r%~p@vP?m
zqdIdgWWFPD{3}ERO)3hDP+CHnX=7FfUj8LcOWN#8o|LH@y-}XYQke2&(!nI4X}ygr
zlI&dWXdh?neA;){C655po%BKjXR}9@L++9)6WqwLV&t6@qJL!1*9wAJHOAnyT3Eyx
z4J}(V(6{j$%q^rvo7`LVxN^ovk9kBv)t?(@Xx<Ab<qBTSyz?JJd!rtzYwOXka;pV1
zD~-2lMjy%H!aeWb#Ko=cagW7+JE=m6ikgR6j)=Ay`Qfd_V6$sFeg1yoriu1C3d3OQ
zlT%*3mQP{8YgD}OqIqRCCK1OAXAgw-v5QK4X+tUNuH6GSQR6+M3xjH&oi@wbFS`Co
z46o{E6u9N7vB^fVM`0UBeIcaxE7Z-t^K9DItVThP0Vzotx;x<(3Lyc}X^k||TVu6(
z7>wqmx)Mis7mxAol&|}Qe~{$+a3G0DNtz(s?4%Fz(beG^G#hknrS*|Gcp_Qf@=eH>
zfR$egx^>e&b(c(7D^kuD797e>vsarNC65|LtnQsiSon>4Q1J4DiMMq0Rr`&8{kzrO
zrQ9MQeDwQ@b3Vc?$femvBY$xWu3U?qJo5KGQy~geub?Y%-U_xU*0BpSI^e5HMtPps
z=lL?}7Air!d+$N$#+G@|lr91hsKZTivzQt1oA92paHD6dP^ZW^^1uC*2!<B*OJmX9
zN`1eI&njP-ZCbHx;mKAiM{W{s4U-bZh4vysBV-WkR!x?`!3YYwqJ*JGwBFS9zgFTH
zt^Kz9gRHamtHhimkdXi_Ss0!m5s!w=MRcqL1A0^yHb?tKcI${VT0d$9(^wUSTAwNB
z8?EHy_V?j#@$g=VfxNU0$^WN|i=(dlhw|FLKOB?|V|PRilX7!}<VVU4z0-{j^bkbi
z;L~<vr0+5{z1Bp;2u4~p)YB%iRmdy`oz^-x^2F4zzsgYj%j_^EM+Yk&tRuUZTk+}p
zL!Sr@YmTsBxi(u-#rJIx%!YlSF@`tTq^c&LX_ZcfuFgoN`2>ID+qjIn1!lvd3Cg4w
zD^s|7JZq>RS9mZ+WL87t7=|0Qng}?ZIm|OPaN9dBYV|}}c-+y!e%jn$t_^(c63@s{
za3dA|#$8ZKL9RL0ONyDy9G`z@yUoVDG0zF5vmK+XT@h6MW<*^2E3`+|j>SJ6-YID_
zz<sYX-K&mMjibx0T)?m()r-j;ijv6`f}4f7S=3W|7JN&GymPiew86Fst;`6ua2R%!
z|NG#{<&R5=MWI!oTs=LvO?yq!-J+NTSfbkPa<tA3sWDHXII~xX8v(Bn;S()oE@Ob<
zuZa6OR-yx$5{1QCAwAnwU=b_6o3r}SC&{{rc$=HC`Etzx6YM^NorB1wWPWB9|Ah1z
zLBSqc>EGxFlFn5_+B9U{qic-aT4=3O4oSv0?rLs!fnAs9v(8K8hF^-blzq(!B$f-5
ziKd<^q`TI;QP5ks6a7{mT`)L@f>u%$2C2AbgZ8Xhz=J8-Be?7rdGOkYa}oFPs>W%=
zz5ae7et#kEzZ4cT#h}hxF4zzd-6K)N`*qmHw5T3XXd#gX9N4p(-0)GyZ>fezxSBLZ
zP+k-A$gk-}qpG^ZHXoS(WmARByOpR>iKf<<j!E}6i^LfD9U|@J2_j(BOJlSlS|!!p
zvlMKIF_a<h=Rk8DMBwvTAq`f7J}T8KRE#L%5g4beL%}F@dcv^gEvJ<RN%h7V6}$d^
z8tywz-{ordq%Iyzbrrl)e0gzqc&WZ_o6vSC-Acw46gX4Q$o8_MavU}#hZBs)I+DgD
zW>-2oGZ5G9MWAFZz#X$6CF|#9mZ=?LV&qRcC8+>VF=pGm_HToHb=Qn=gNliT=$r*{
zL7i&L_j|xw7d`VK`CKro9?iRFR>#USTXD5ME4(uFNCWfE6ES~<pz@k+TwihOoRq)^
z<!?LgsPFbZx29+cUNuWkt5i&vmBbx{p4mNREuDKRgEUO!@+#^*w1k>G9lmEt>N7Yr
za+x5eWqzk(5?f}&@X2U31qm@c9KS)W@yz>%W6R2{PC)P@dL3ygi$^IW?trctzb_5p
z@5L1@06%>v0KHA;QU?EC{|lsXkQwvOgZ|38mzcQifp?da_LV}cM8Aghh4ftPJJ!RE
zJkZs(7pW3+!mEk^R@7$c&;k{P;mioCA2YWw=|g@YjYD6~%>|%A%hRdDv9}Xduo!Yo
zpYFt3#MA2%#FGW|My(7;KK$#SWkOKL*{7%i74HQk;Lyc*PIyfBK_QZmmw_aGYwpmY
z8|5Q*WqDeey3UQUr}g(EkA`E`@4OV0c^b%z?T>5u)nR0et#G#qq7$GB1j|rQh|5Z&
za&s;O1QAV%_$jR<T2{j5{@}<7tOv-Mb5Syh45>^+QAiVk)doF!^ke3jy58f4uZ_Ed
z=8LynH$87S8NM(QcyQzP?*j`@iq2C>ySgY>o2>TN?bD*h<qOw;;RLW$9QgQ^C0|2P
z*6S#~D!Lm*U%o&;PemN0@)dCRPuacJ#p;zNO+rC@+nx%0kSvfZD&y<~1kc-WRoJXB
zh=b?qR{S5X-omfxK8*Lq+dwf;=@4$EySr}nCdOn)!zgLS9HT~u1xVi#Lr1AF28=Q3
z7$G&JWg}$3kQhie3<=N9dCqw~&w2ibefRoZ*Y)|l-<(0kqv+o%C&K5{e1)@o%5ZzW
zwq|`=^o>ui)L~SMhy~Vo&Z;J2wx&$yw>3Q7jQtpkk*Y@$;<B|BS=Yx;mlGNUSw9%G
z>ItKzb!C&ID&(XKgs>pR<}71}luowTlD3zLXkCE8NM!Zvmq)p3M8p?!boR1&im{c)
z`j-z`D&ljMDi3gy=H?FcCot8vnAcq;mga;sZQfG3U(R2nb+<#hwpdo2RJH}*1`V^(
z-M}wQF+RVoI;QOh^in05&S<rxH{WkQ-@Gf81F<SZsUWo4xbTlEc#BFV81L<GN~U``
z8La1MvocbEBVj=`LgtyH3nD?}mga&FKdcR2{{KL+|2H=Frq}h&C}!uL(tA-F8>ocQ
zWHmw*67Sp?wKWWse;{qI`(fb^6Mi*7^Za2LN4s@-nU)UM?*2i9v#HEs?F(aCLGT$z
z8~!PHW%l>(>G@pa>2Tx@S)=EfU+A|X&7Hl#hI-#5o4(o@^laWN)H{g3K1thd4u3H;
z#(Kvq>0m~w(}{mX#=Ut=CC_P93V5B*d$bxLP0Dik$zk*rA_6N*9I%NI)w=h(-!H4O
zVO^hJ_U;@0G8?=K=nfbQ^@Y@L7i}H_W(0^2|MPON$bA+dAkeqdI3D<6B989@(qmuj
zZTCFZgd&eIMrBk{1B2f@z^i8E#^lFLDV?)u%)^t8EF#6|_pj8WL-p*Cj&iVM^Y#V!
z+lZlQz3c}jbF()9obKskD5<H2;NFs97dSg|_Yaet|5l>ao^E8l_#xfl5>36#a)UqG
zC!4@yMT5+J33S@~IPIhIZMBcT8m)Kmhw0bUzDlpQ)+f`nQ&TL`jh-fhxe=l&i_>gY
zYCkOnP6s4ma+fogef%pjw`(M++{Wr<WQ?_-+UfnX_q8&Q9Y)}hP>diqeRb12yEBG$
z{|#5TZklG6H+V^*YSyf&T3Drfu-#G2R3YKQhk3i4J%?(M9#49c03~-v9aRmy{-RL8
z+oG;iON9p{5vZX~WZwvhwoy8axqS+6pHpn^zuBq&V|)3!OX+CXESD!+Xk?E9T8j7?
zt%|W{W{kF$5VKa7B#T9#T#@yK6GrMYy5~@UJ-?qAb63CMt+q2)wrWmc$zHR@ob|;;
z`K~#)F+^yib6qd*6s4ZiUrteNfA$jHgI^1sGO)nX@E$F09(~SsP3GVTme8edC)U4S
z3j8@a<V<r;iZM#IjsdcDMsn`(n6g?PX6`g5w;^(uM>1*}msw*jWLpB-$~rK06$PrI
zg><3AO?<hxz-`j<*g?x9vN3rhlV=MqZ2MA&Q<;_zamcHD$fBLD!15NL730}8ub9gB
z)w;)RM_FFoSOL^3$7U`Mnvx<ona;&Z`k4GmhY1dZ1v{d4uIQ#ZBcH7;>|`<O2G3R0
zRI@3W`IQ)u6t&d4yzr_>n2GIi1cT+saq6*idCGWOrgke<TT6@E*|{M}KxFK*tIfHH
z^{nEr@BPWry9tOZ27PGko$BpV#`k#cR)X+0zlcGr1#&FA8VPsG2+LQLHHqm6=5xkA
z3I<q}VeCr13laS%alo$*8Xb3R&n2;O!_|ormZ=s7=v{})K*y-))-(mIiO-6KUpvue
zDgS{)Ai{}>&f`@~tOu8yGJP2A8fi{Z#YQRR4XS>MTqkA!M=O#YR8qP-poH2Fwdklp
z^>_(TP~{9f+Ui)=lYTCVQ*)xL$A;Ktt-6Jw)sV$1ZC&XdL$;LMl4oKpt%H)5%e?-W
zm!0OXXtB})oZ2-DSEa-Qx-Alc+3v%I(&aGawa^V|*nC}mvde>nze-kmzg~WSAU)|C
zo{=oK@k5)FP43eT+3T#pn~zX65K>~|L6nEsxVf(>)@z|5*II}NY_(%mW`k4P0u?nD
zeEyaGD;*GIRPSi0{{OJ8FC@*&-Srl@Gom7QH?+`>9I5y5*CC1O6Hdg3U5?9hgRyL-
zLvyh4MxUQD^9WaVFj$IUQkG;EhBxSwWJU%_!3MrpwMk9F)jvQRnJ47|#P#7~bp$z8
zYJ!aOw^Qje>>s9|rC@x5;8S|UiR_2{GgVX79xHlvg!jlQm*mvv|3ZaK3my=!@F;}F
zn=@c09O*r#|4s(YMs+sxz3mx&v-;4)X${AXcXFoogX)U%hhSi5i9CN%z6-{_h<oZ&
zb1hht!>pn_k*N8$(D7Z&VVh(SEAv{k=y*|MrjM@hj%YPPWM-zjD`S=GsJ>pNHKKnC
z8=nqYdN+xJ5=y!22b>;~+CqBOp*NA_l!ZYX5aibex@Cz3tZGG8rDW8^T%VU!z$PuH
z)f{qrAlUPA>iQ%iCSo`~AHbcUs!?OxJ%=IHWc^m`!u`v%AEC+Mf4!nInJ-?UF%FEy
z#u%27iYX7bSG#j2QNpW4ls~2g6$l9+&i3U`;ll_sj^cHVZSq7<tI7cXqGjWp$mT(6
z`TmTEhZjRMIcuXlA#o_0{n?OR;pJWMl>Ym1<$suZrvG`%^z8O!+d?b-Z`|CX%}sQi
zgX{6)L0h&>2vko)s&-<-P?FnoY_^eeW^-n`zy{My2{s;YgI9m&ilZtDP4fE}uLuWU
z<?_~QUNQal7G(7AGuFrHEOIw7YGJxwgYM~ss79F(F^_W8=KA@BYt4gEbAXI}R^2&7
ztS=)Zvh25%V^jsJ%}%a=wf(P^!w>%);}fwDc?+^Rci!ziTYjEWd*zDKk^ShW7chx=
zLE-2@sag%4E!Dc#6s&H`apAJ7(v`+f@ka%xXE7{E;1iWCHP1$0%9O$8apb||rmsLm
zHZqK+)jSD0yiw$mYMYGUX}{X6&-cuxxX@d?-Qad=E>^!7buba3<9N1IlR)b;0p=Gq
zt^(Eu(Y39sN44Xp3=)#q1NA;G;#@Y128Gq(VA)$xf$#0X;G^U`N{GE9))9{)o!v#Y
z2LG4KFz43#CTN{uLe^r!e8$Kj0+rgGK`FJWxtICB2+=N*HE3_$W~<GY&04akTUBlL
zAz-;0{P^#_g_fD|iSc0*4QI!YCg1Uyldhjd5Q0w`{-2_`?2&l$um1(V3Os!x@AI4{
zYhNElYsEXt)z9D|5tS)g`l_U=F_AK4+A#STvHu*qq>N#Tb#R&9{^-sxP?)Owl*5o)
zCKl&lX<^fh8nL%oDyP6Bw47`62Ii+}#Yx!KUY(h&ym~Ths@lkHD5=p!L+G`v-rJu{
zA2)}teq-_fzI(O!<5rkdu<_=~n&;V+T5H|0J<^h5ioDei9Gt9;i8rw~{cPj7j{0CX
zL~;%ee6c>b2{JXyIGcW!d!teeAw_@{_rknjT$wRBSt`3VRlxv9N1QX%cWPv_ts{Tv
zsL?oLnr7*~Q0V7Bv8;Xd`%iwakf@KzRz%)4%sjrHu;bXmnbg*ekyR`WCbcx}!}Wiq
zTesxwk@~YU$5h@+FQDv1{;^1(dgJ|?=h&|G7xRmOsnnQwEoen9ZyoXv6Wc`8dPN1k
z%GzX|a)c)CI#Vf{byFeXGB(4o4Pl&s1&~}j%xmoVvWT>A?nfzFInM}c<jV%uXD~^L
zQ{Y}KA9~(h9@p2bEhUk3PY=SK--QvRdiA6rg|vF*@KTp_AFF%I40P0jOdD~t`~<g!
z%Ohpidglv}_OCzo!c5_6E|3SPM0;$~e;4jLO)K{5j7{ChN=b)&cX?1-a{=63mgQqr
zodKhO)_$6;eXB9%bFwju8PrML5b8g)?vv7wfQfAd>YyAd95sZ3;1vNMU4TYk*?%4{
z$Nl57_XE`TFkQ;Cq4gQdv=f-2lsTTtk_or?!&J{`0!2&?BT;1J-SB>spb1(T3}1zU
z?ZFWOVJ&Fkto}P2Pk9nw3b=Y&-w|d6V#DQB<rJPF4ZcEUv>kjDUneD@rN`uIe2=p>
ze7%J0A}b~UK4sQsXg()}5PkiX=!D7r#3ibBFGcR8#<I-#Fu*#p&Lt7zo^nT6$>5r8
z;lK5$8T&vbNskR>_1rs6VNMGcnE{U!%BPYaOwj<RAdnS1I>N5s_<gCA%!9&5IuxT?
z9%C~GI}ed;BkceWL2!h+AnO7%(O|%SZPJ&HI8||(3b&U@y+Xce53BqDil#z>YF5n-
z_eff|?F$XlazrT*-OxUWFh0`CNQ<CvpUn!`x34I(QGRnDrs2gsGiy8dN#rmme7qEa
zZN+&Ph73M?f~{Ns*4;ZPfQz{n0-L@Xbo0K(^MXbReQu`8EI^45xTD`@>nU2cbSt4^
zRvHnGgVmUaNc4Jd9=CE>e~uiE31I~oP@Dk)Ycm*E&Vs(MtjJ%E*@&qqG@l(xZ)>$F
z=OVli{N>w;^koc_t9+4b>7$)Dcph|&dn0dW_kpE&9r|tF!E*W{JWT8EOk+9ip3Wbp
zFv$wFu~BRY?`>GE$DsN?&_Xd{3hJ%60p}?9A$6g(TXeM}iJ>^ON5<1EBavDilfrsI
zsW-kUMUWx$6A-Lcf_iZ8yg!%0L90J)EF4s;SIt()T-e0QI!+KaMa%J59YN)_K+3p(
z;`(t#C_W)tre|r`Z7vjqi;>$b8by0TUB*Bj&N7=Lf9+hl4s&hm@M6BX`m`UFd=L6a
zV$UEqs6NNuz)_=kSt~|n>Hz|5K6XMeHo=JPHMMzM1?%Xn6^AW4SJP&@3EAV?&qQ81
z#?D%M0<Ejx<d!u^B0p0Vj$Du`+<MZ&DT6!Ouy%gIt|0fy_u}6N7IVdyG@c6q`4`j8
z5E_4<HzAWU?(h&P5(Ygj|1CF;JDNp-FuHenhf+lv=h9~g^<44rXrEwM?<COscq^^V
zbC9`1)oglYT&g@t0twS2s{XY^HY07>oH-KTag?UnB6lCKqi3+_&-pfs?am8M3fuTJ
zI*>zp>Y{b4mc%DLIKRQuWqxa3uE?qZ2?J2EbAMspb0zEq$}iDZk92x*e#!ntbPua}
zECW3x(8=*?NP^tGcpR$%k=D_^7YbxWb(rb{Ij-VR@q0?`G5osnbjjVDTJs{UgdM4l
zp63cslqKhGqystVykJMZ+7acrly&#zk_r6qp2vf0Z{faIoLA{WE0?8<*{i5hZxk9N
zVUKvy{V|evlZ<zBQRm~i7I12c<fha-UUV_M`Qe-x3q1*JTXDDa*7f=3#};7%qD5<q
zJS}#FhG%7za=Hzr_aw8d+4^G62MHIKzE8g`wzJP~JHG|9gO6engJNB%hf8~)@A$;x
z4*`THhg9WJG25+AqZ^Bt<&-BE0EO?3(r&ZZ#eDK>h9Y;aaB`m}2?Dv|OT9ZeHzY@L
z04MyqPI&WVQgE%+b$j4s;dc32r|$iq&7=yG?nzZS=^pd5wXdTN?|o{lzjXJjn9S`o
zfZzG}mByA@D<v07J^J;VbEre%5{$(zN9!4mW3WrmZJU-}@->3*?oMw>Rr2MQhEUw1
zy2A9&%nHfcrBwi63{j?Q2+BRH(5*cm^eNl!ES}wl5CaqGrFEHu;&UHYh-Q|KEXInl
zhFpd+g=fE&;nx;;op;a6=KyQ{);4~c*P$vYxGg7u5OSUSxk{AvxINC$%yY4Q&h!T5
zGu7$-uaq0synal*I)fd|!9TH%D&}u2-Ah%>TiLFgE0?%r!hJr}dDt~6yEm0(;1gnQ
zh?CNjP0jPJWbyxtmh`Dw>|9U?@|oybxVN!h^}IOQ?QK`kc&0r4-|U)Fr)KtLsbOvK
zlj>@fnUsCMtTtPGxoo8S7hUzS)5j;ntdW}J+&gWir07Yk_LJawg!FV7hr+wK8tnIg
zv;Mpr`r3JYasU^3flaqA+t{Wq1{L-J%H({b!uLqhmdB*gs?(KS=B?%09M)?DgPuzr
zK=jeQSe0?ovX{q+O(FjNWjX4WJEEWF?Gqx!ezLO0beKqigquh;%K1|rb!0%={wxPn
zEucGU9{OLjSJSw!YyG>pUqOu>GU0n)U8*SlgG$pOhv76DCXr~I_hiUF@%)y}o6nf|
zzLmB(<u8^Pa|KedC_hQt$kVo&TD}|2->+8_6S{11?R5$`%tQm|T<-aUGA1z}m^d+R
zRe9iI01JG4(<VzJ!Z0?*i+NO5<^Z1r9@&wZHjbO>pg<z@Zmg_cb%8zTUR2rl)(crQ
z<Ul7(oqy^Xbml|R6V@Ta%pE^W*H;x`mIKg|u>q<27Nf=peFO98qncc-^~#1&<<YTT
z97~IF=%y?&1~UE%gtUnZEJV(Yr&yqpiq|?cdnPR~kK%1W)*xpyTU*<e;QwqPb;0AD
z*54k~*HpjKiU@Uc`pfW!@w<<L>t(<50W!x~;Rib^UJQ5zI($r+W{!jOp#i<C(-_f3
zYRj2S>2IPSYkY?pCca|?9skK#lKs~RW5Su^s7ij2X?_ePw8l1l`yqaT$0`%c)b};c
znyHle)m81=hL;3%!bzEK!VAJ^9#J%pRB`?a!^s0yo}emEgEQ?-w2Jdk&@!my#JeS@
z`*gTl^kB&6DoA#`^dZf5W#VS<lE~8J`^x?fMhRcBy-ji+e!XhFdzk0Hp?tGc*idM+
zopwEkj3e5Bcg}rr>hL|HzoMo%hj#L$Ai2_3pM@teUyYSde5}c~$!r8-4z^ANfH`=>
zBcU_8I`o+n9Xj7UE;F2!F-~9iJRcO{6@0r+Qx&*B;@I)ppFsh>O_3aWr}ThlJ*gM!
z_qK8dtSGnHmLHlFrx1F>wbP4R^4L>16Wp;-Qah-QE$P3W=rHP^$##^_TUBae7XiLw
zy?CCq-P64({!{zOo-gw-F1BkZXOOXLR!QpJ6YtnC8{AEPZ0Vtl5dq#U<Tv2~S#(Y$
zPX(M}>6P0;^C4gRum6ZNELJ1Nyn24!bY2R0Im2;#uxv|8EuqGCvbfgXc3c}8F5Xyq
zmrber^W)o|$4{O}xwI|1a1W)!43#=NoZ7+uDe4Q}ftv5ZIDm%D>wF$@5JGY!;EmAP
z!dG3%ZNo(MW|=41e1bQdpO_n`n!fGs)36S4+}(RDJb>v15xM|_F&<04NV!Rd3hDzI
zaMG&wS+%r~(mgL2uUXTovYjK@7gSIqO%)-@eEW7yI%8uT=fb!W<`uRnyl`o1(=sm|
z^*81y+$gCH9W~8&Xj1;UepUku2^vi|0oxdz2`Mrh7kKT$<S*=Ws2+$BP^r;LLZ>gO
zWKf^R%091Z0obD~7o;C#6RcBtB#aIVu0}p0X#?PF`kMOkq7oeS#C|`_(H9`!$Mo?z
zk#Fe~?~#$0R}?!D|Nh$Rp^(zCCe$H0_a-&pcSrW&`*4G_LG}1IO8m&81<im`09svn
z!A#R6B1>XOrlUoo9mBGZQ3t8k>T~o8n${77i4N;SVVAgnxU<DAd&8}L3vUdZm^G|g
zQ2T!Sl9qO>mPl0lnH6@tTpaGv^eJk8{In0ei4*X(CqSyJ64P3PZa=BI&={C%=xVCN
zox9Hf*bgu}zPLSIkyk{C30NWP57exVh95vty87UN*mVjV<Xr9R+iPzaj0>XH!7Y5*
zhnPAqys*z#m;`7DKGod4qsUVxRcL~s9juW~W?`z4<B?PkfD~ayXRHakIbLSHiJR;L
z|1<I8nS@gm;b#!@7Qa`1to;kZ507v+zBbh(#>72XT)N_YhO7^%Hs4zXTIbB#%M?U-
zlbY5x8yNi+lD+jhv(xF_t4sxt%I*0xUgw8%3XiX{dQi`tae0hqf`hx`0taz?lemif
zHjgQT8hg3wD9#C|o%+8@r>3L)0yO6u_;0woeZPC*SkvVmytZb_tA!y|2qtH@oK1;_
zWqF8H9m7Av=fMYrroF20Mrc=TU_lPIM$f><->>`@-~#~$O2}@+V_$op{HBOm74+du
zB#EjEGeFaPvq?^=gWbg&q!aPY6x+|IZ&!${4uSbsHyajj{ord_>0aU5OJUeBe1=0f
z#v*NM8?#bbGOE^o?>C(Qu_2}=dHah^U!*pE-Yxv3E|hvz{hNl^4c&&P(5h?QvK7OI
zc!D=UH_Z?hu`;@ZaIl~b16Mq^aAdHv=~C59-PziR!8iGz9D+p4_@|oL7lju-c`S=b
zzvij<_E4=V35=kT1}CvPXHz;g5Oh&k8A#5yuE2J7CTrgTzg~Y@wc)-vGuWaVLE!oK
z(1N>{_|Ng@cc$A-QKZI=_Iq!M(6*azak1Gt8peuMZI)X#o~=9~Hr4A{>tUfymi|wJ
z{MpYKsW`NePEOK{itlgg(2>q#gPqHDA33RuA}#$lTP?D7z1E*RImt}dGzimq9Ys62
z(WZn>FwWEGwuE{Wd7g-FjUT<qb%svUBYPz9Z(4k0t-c&Jp-e2%6$f`GOd1yoXG&%>
zTm0v8Ai~fTRReN`ea7Mon1^oz>ISTRAyCXWxL$uBgCyr5!~t`wP{z&wmZO8mJ?lf0
zB4pdGU$nwJn`A3~cTUwe&+%rI7L`9tpw;4{dh!P~vWdwIA5lPfJ2&Cx=2m4ki=d1@
zMt!B{w5G;won@K+kYknBxCBe@H7LyZgh1dNw2`wc&878i%8pxdFY`ejogZYQcq}_=
z4|4PnXdo94{OpGf(1aXJHKraBymd844&E=^So6~QDM=`O_4p6d6E7Z%^M&>356r@H
zD|XK8Kb5X_TW0zlh%oxTc@p)f)uf4E#hXo2fxGJyvDN%_=Ke`t=83+bJL#pD7!m;q
z;juOpiy6aIdiC$y51Bvlzuwej)b@v3vy@4Y;`~9M0@Uo@$uJ6n@nl57_Fc)h|6dW_
z|GIB5{~`ac4A@^U^lt0@{OH6paeghD<b11*5m36}oAhL-8=V$=FwSB<B*+mOCJNMw
z`nRX%x*oeX7pt38>%17bD_&8gCAx6#-0M*e7P-J{d!xgzecnE1^I&OlcN#WTMZ-zH
z2lSiHf4BBG_do6Lk}j(gx=~5jA57^Vt;~GTDoOVaDDS+HO+giZN+?bzEaJ<bH@1G<
zW^f$hb;W))*>ve$@d+=8ul0JpPVxcY+aasWi0<5iE5r2N<Ss7;lLMY3q#|<oVx<u5
zCJkCqPF|q8E9fWHN}W%MbEC&u(pD<gv9>z*!I`*0V~dpRnnH7x2NOcRgZ+E!RptI>
zXp{#7<)QsJO?q|CCqi<RhRxEfboh?FT2~-qYl)WH+ohaRE1c)^P6gc>=#@BhE+h_m
zldm(}2`6pHGh2#F=}chy*$acSzYeB}e`PuDc|F&}cKGgFiE%)<L9rMQ_>FL#aZ)KC
zc50?njpz9cesI5EVZLlXOT(+yg?lzXXq&F7p3sgzrk4%|B9<^$8>lPA<@%pO<%~ys
zW#RW<EuB~s{$Ay!q62kHaXvSi)9RsITQciwDO3l6XL0<nSC^i0HYsSN7}pf)YlcQ@
z$Hc}LUVYk0wqa1^f+R}XwSFy*&73W54oPxfwZNL!oK*;Mn4Rx&fCIZ+C0FpeqS!10
zRGVaSqdDDm-|UwPv_<bBd3j6)`a}6lbx262YY$K*ocwI#Qtr2{uSVaw{;OIyho-Tz
z(EGl3&aHrI!!v4xI0nT;sIx37IRROjVc;nA<lDR(w**3St$c#y_vdE-#@3D-)qxq-
zRVEsHQqC58C-bx2IMaFkX^L!O)sIk3lF7Gp4_Ot+yP>I;hgPW%7Ikn5d=>)F2}OcU
zXRqVw;c+G)?pzb>l9jH6Na)j~+Lfoi<LNCJs~TF3rRy-UA?fYvvQ;!L)}qs+W^se_
z*y}Ciq!ubZTk)l6^K+|+gjQ#}%8c`)?YJaF#oE#LmEYDm*9V2uX#!r?-_F0i!IwlR
z)dhxf$q9Y2vgGTIutR#p_RH5+{#dPKxDo}zMsr&7Hn<J=Co-4CxI6A71?1tl3bm7~
zrHD)PCXL{WtkMQjY$Citq~Mg=EW2T<<s3)~^*`l{n1~tibMMeCVyg^&Yr)=LMzP~}
zgj$noKG*hOu@~Ylf7ouBtA-rLtaW$g9Ozwr8xecGpYAKf@!r1`4yrXqKs@N4<JYei
zNEVvR7}r{#^}0g!)g-)2Et=U9lLF4O`U0Bqozto4RT5IQR>eZTM|sN{;-tJH6&S&y
zc;rnoR>G>6sy8YZ{)U}sMh|Gpbzr5OIq8isgdYF<tWM4N%I*2SH_E)B?`#3Vl^Gnw
zkN<=mlWtZ<;(p&^Wj1AI2@*F@M5R5*<e0lU^%Enod46V{TlD*pOklJ{-p*wQ3L4my
z`NhZAmo43*V&b$JFW&MXuTgvhw0cScnVsF*7VNwi(Npy>%8&dj`tGyxdQTyNQXKO)
ziK4Gxk5z{|>I(e`SD#ag7KJb`^Ziw?i^dwWGct<lP5w)*g4dwXx9P!IMqBGbW0b-a
zRCZx!vq;J?+LR5f)b0W6Cc^C5QcTz9TsL~JschNgdW8OAx}wykap)93cgEIEaR3X(
zuYdd!C;w#rej~-O2err8R?Rr|Gj5Y18CreIv`Z!TW9}2(D(&h-Z3kd><2fW0GNI^<
zY8*~v|2(_7LL33tM|7PLE@uC%a*+1ZK(GY;-Ixb*B*zFUuhXo{^J{%f8UZI|CStvi
zLa~=M7t?zhd(QTfpCP#Shy0%wA?si`<c9Mwb=Ic11yQdY6n=tjtvIP~YAwi~`NZRJ
z>l0w}O1Rti*Qqg+VxvDdv9r*c{G`@8GBEn43}5TZ&n>O8EnFrMXgttqCdiFjmEi(z
zya)1sp~KXE@U5rPQ3!4`Is5N#NBHuTvyw!__>>bi1lGdlC_CX8W}|$I&i{%Tvm?+`
z@z0Qs2LG;t<!6F(6wZ5g3*H|x7Vy3oJRTl~b1pz8$aRYZ(=ICR43P`649!C|WRT0V
z%%cK+W8A76Q8i<*mGb8`Q!y$~pacjI6A}TB$)c7z!&GW#MtFOaHy_0E+PJ)AhaxW)
z1NbjL^#ev0c?3M1vXU^kvz%j=Nc9El{stex9of-QPNm=86A{9tWQ>~Fq0j(jC8|6d
zNHCxXk2Qv0cfLL>I???8pZE+0=Db0hBkYzXzh5x(0{<%|2aZ4^{eY498t8_pk<{}A
z_KewKVC|OFUU+6eczWd6>aPA;uc=*|-3(DD*nE#rod$#!Lh4?GS&vljob>*K1ZmzI
z&Q*I}VsU`itWaVDpB}hpoG2U#;ek?hEzYx_*37=ndE0d)Yh<+?SA?xpFJ-Nt$?Or7
zUtD~_bjTkgw|6`tF)qX8K?PT3H`X%;D=9xN`iStjSH$FnA50xg+zI5rl!KJJfisi!
z8AXnzPvmQ%tz_1-HP6pJOg(CLu;LvWF+D<PlHr9>f6rz%#&ixuw(OYBd6s08P7Uc0
z!Hjuj3*EN45(SR>#E<#{?uT!AT;rXMCg^pBrDa!({R8fTPcsriJ!_3g!}Kr%q!)Li
z>o#<{@B=xmo3rK>vB$}-MM_4LTvK=we_@Aufk89z;u~G=^$;<Y7+MVkwLUfvh|I+H
z&*nK42O@Wxw-pO&Fg#`!Z;I&_JFoS3ZlZpyzUvV-t9-Re`Yoas{^S?cVFz%jcd6WO
zfJZF$D~h83cg4e-E8C`mYX{&l=t!(ala=?7MV#10D_uLPtdDOx%Ezz1VUq;@I={o!
zvHC-kgu9``LM7>vQoK7j<QrqJRfJo_+U}VnBrSZ<yzioiYJ^cGn`pt0#oPI>W4KXc
zyi<evq03d7ZxAk-ewav5ksA{eW)^iuzL__F?51MPjWRd(4>Zg5;)yC5lBq`=HX>06
zhI>S@_%E^+@s*`-u$&8W9ugM*>AtH$v&umc-97#G7y-|dcFo*>YEuT(O4@=o;wLdj
zXFp~5*tWi~8Vp9TD-9^u;EmCo`vI7fZ(0Zb(ZB1<r&bpNySlayWpnL2t>f;?l#ZSa
z0>c1-k3ef10l3H(ssY1TET-VYNusnG&j&q=XIgMy1*?h)ztZRoOf`uJ@-gc&%waX>
zun0Mu0s)T6!6-Oh6=mma8OpIT-U;YoGY$;{k7ET9I6O;&y3TI^V;6?h&|{}tbmps&
z?TvP1vtea+3U@n~Xui)kFZ4OCOuT#ENZz`gx8n!~vL`;u_v0jN+fIAh+u5{F<gvV?
zHwL8`{b7;;n=hpp0lj$k?E8+gb5g$k+>?}JP5H#~J-oYrA?e)5ifsua+x*=>c;@-S
zj#oGQ6>n-<(JE(nY;xP3bAh2`U+sQ)O=!VCDF$v0Epbd$KJTXP4WNifp)p(?)~
zDtVrjz9Y*eC~i^y>1EH1IMt!S;YohF<r3KvBcq3~$j_lDTAS%f4~4q%Wj4;sTS?Au
zt2O7kE+=#{5};8ag|kzR(I~W*iCm)WZf=ZptHdeusP==Z(83NbIXvC`v2e%I^jYy1
z=xzO-+44{z)(V~__u#^U5SQ84pM<j($-d(J14%64k+aXN=Wy+z&x|YWFcNIrFpSLe
zwR>dxPGx)M`JBl1Anu%f%j-1$rG7a7u{rUsYv7lHnQw*`tbKv)L({bI)32Mpu`)0%
zDKT$dtbNVKPKYUWM%H~x;KN8w*qp96z80el_vp7)00gWQZFufam;*TM7-TA;n5>st
z-)`JeN)K~xy7Ha%QK)EL(Aj4*r&HdV&m@S`Z-CRFP6Pyvdfp8RqH&@n16wAf%V%nr
z(qwWqb?%YJoQ6b=?Dju!;E?uMjxZ6U1;6_62qWek(Hx;oeY*?Y<2j18S-(Ww^<(m)
zL)bDSSiPLDZCHBu1)^_gk7--qGN*<`0VMRv`K_}T9GgY&OQ{nbFr>L^qlL_pg{Vv^
z%UE>~E<6CqCjTZVR(mKHCI<!ex>QR_xNfJ9fdJoriya1^p`(SOhnz&6@0@Ck;<jpJ
zoM@~Gr<Y4iwRP|8L>P8n{d|9M8=v&)jbf*Cn5<t3f=~WGOxF(U&1VgX*TyXU!pe^e
zV~Hb}bw7<fj}{&S8zi%WN%Y<|yIQpvY+t#JbaelNd1>E=W@HpTu^+PAL=UgFO^&vE
z1TO1)TBG!&awbB%<JEFeqqSrf=QR54!TFGLEo-M7#kpexN0&TaSJ}{TOY7EU9(Ur_
zHk(W8adK;^0q{Hg`1bWI!>UHn&QX-<oha*H(RXs(>FYTh7S~cgX{y{8#f#VE5p>!F
zo6ibl2e!kCdGn>1!FfK22PnVv^k|<cJYi7k$8XK%>gNAbgZck}ga3C$=1wYe8+`Be
zX`A$TKz?=|W1XNSNB2E9zsjniqg2p&93$7)$VY&;4evW=Q~yG2`mdZDE@u=Rg;}@P
zG&ydC%=W|kowDgKScP^3hj+GmOour#%WGG+j_r4MkBR38kLR5yOxJ#UF?5$T%o}2i
z+(E#0GwKghnhHWQc0eiS8QlVuey!PXUh0k0gD*8<Pai)Qgi{bwb&150YY-1Ue`m3-
zX{#*1R*rckmI^Bx-(1yihcu1^WSv<uMgZ&|yo@y?=5kh;Iyq0Aif%inYP$~$W%?C6
zi@Yh+jG;81D{?><Cv|w*=3OflOequTZC}35jXiM^wNB9t-xD01CALwN7G^gF7a~%m
z45f-f1s@ai+{w?7E85q7{_5AuiogyL1iPK2ykAb$DG7tDWImE%Gn>A8>7n&$fvK`R
zeVLx<M|sZEE~R^h7qW?xBe~QzyY5{=Q&qJICHMZ;P|W&lT=dOQvwiSfxJI1Y;Clul
zC_Zgmp%(Hzn`1{fJx!NO7OlMSpF2zNQVYngq|d6%4Qiks8pU;@8r{b>y{g2RPg#{$
zR0?i`D&>+}Md>dE*P8+~E-&n5%Gaj(|6%%f`}{o^^1_htpZidMth$u^kUvNr9DuiL
zID^Yq;6TY9O#w8R6a*BjN3}e1D9<SsB%b>09A@$JD+$OG4P|>*j05I|nEOOH?775C
z#fRTv=f&Vh$y59-Sz@6s^AyX%lb72zhY{N$?K8@Z;1s3cE#BoJj*c};R2{1o>!@zO
zMpPb@5e#{Up{zT&6hk~@H_l)k{1>wQ&aMu2Qs#XOkKUucyy~(vuW#fNI|%2jvivx`
z?CX;PGS?fByW8wIuXo44o@i;yUvSghd9v}=Ewq02&(9Tw)}MXp(?Nx&PaLBmPOI6{
z>_Rt(`6Hc4k#fo?)_5GF%wqY`@a%(!BsvZ`c;r5g&#Yq1pn_HUOVn={y8KIJ@r=4l
zUqi5|RKIxnf>t8UF8Z#&Z6wEd#(=_|r8og8!*y5mTNEKhhflkeb9+yI7}BXSxW*{u
zEh<L#3%)I;CH9DK-Kf8M?BQ60`rQq9eDmh?66xgv*z1+H<Bp(h8h9saWnA`?>*0vY
z&+W}OxN#lLZy#<OV|PDH4G?lfir!^@bng4o$jauw<i;P2a9r@Xwr7Fy$qaBPRq=Ju
z6);cFvho}QXx6S8A7#4rU)+z3VPkA@uF%MR^pNXk1qMYha9O60mqwwzDNiE6gbb_a
zWw6G>p+@AU{|PgPwuc+WEf~b9G_<g&IrhDlG-|&sW@M8b`LR+kbT9U-KfVDd^X)zl
zQHm{<W*pj7X_Hio>g=Riy5TBXj%Aj%#Cvw6*Af(t2Nuf)Nu7aeytgyPoDHo*TI>*n
zBMXX@9q<djOgHrZ{DoXxXrn|NPSMp)<ZX5EZ)@4+V+0-SYnn_u+ip$Un^p|w3pI|5
zctK-WAs_y(UHWKk$Mt|TzVy$XmgjBaT?@2Zwtei2W0=0_;-(Yvh~~q|!3J_t_~1;y
zwrl17msNm@M$i2C=t^t;c=Vphnzz1jzzPR8ZLg70h&#-skDb`8JzDEQ;SU90$@o6`
z&Dyzf3;I@D4I?}6lE1^BfS7SK_>M3!yG1VeGCBDP0iO)@dNC@*+klFE7u@XLh_RA^
zo9$w8l+^#?R)7n7a`UpP82zDq-ssue6^)-qT>!0_q&C?!gqVgD)!!yTuaot~@7aVd
zHD@nP)i=RPK3x846xnP+CA+Ub!mcqD=$ibOdev*G&A#*5mmj=lvFc^k)@I2Tr7~mF
zIX;7gJ(b$@M8mlc6L&|uMspA%&UeBh%A5%p>_|&6)PpcKWmFJNXPqm4J$GGwLs>>A
za)se<nfwQ}yRv;T>hr>dZ?i~Sx5-Np2`r)P)?Fe4Go_vO`oO9Y0?r;Z0SUHl+=Wlg
zuS$$%l&Tdy+fUZL_jir)#8lbX3^u<T;=)nFDq-g#DB*CL7{*euAmA4BB|bmdfC(>)
z3mYY&{F4Y%RA`^tk&S)t{u*8_NSv5o?VpQ-Ig<7=tYO%e2WS|cGm*Fc?RwCc=rM(`
z#7Lyy#>tbU<m8G+;76L9AqE~R8M|ZOJFLq8FokfqAC$^S5NC3h5T4mU2+gCPJK0fF
zqOrHnB!)$>J2S*-wF+}}D`bGuvOeX7^l3UftZuv)+mgREL+QAhy!T?Li#c!X#pR`r
zi$_nNp$<!2+#U_eh9he*atUHMWS#K@NiB-c#(}C5>5lz%_DyVChARV^l-IhsvQMk_
zAC8;sNDcmYF_1rbp<?>y?ND0X#;=l4Fv-GZQndhxnB0eK?uU%k!@~Rky;8>g|8Cvm
zIG8X|4sOkp&d;srFWtrcN4WWs5X3SoXMU|$yPB<+{|b|m$^6;M3AQL=$`N+2NyMx6
zcSPOiu}u5u!wP$b$)F|T$fPBN0Hl7nzY7@N@?;hhKK5^AQXg6ndO18Gf6J)1Uovos
zvXiC{L)eg^Qkc_0+*ECn(<(g-9R+XAfq8Bc`de^~5(tM)zRC8f*YDn)Z%$uf{?el^
z?8c#B9uZNhwc6Sr<vU(Bo_Yjar!EB4M2)qU>g>1R3DDYHbK{{y{gB@F=+`e^GYb`D
z=D)bH{jpMrZV(M~&9ouF!ux?o^)0x469Syn``&R7BRp+CspU+7J$65tg4z{5mfGq_
z|5xZE-<Pi23N;9d`rrAdinMz1UCf~S(gC~(K2o>eI;Fp|J(W2Q{~%KC=KeYBYy(y`
zn+;u=ifA-PTA6goKUEA)%l00=u=ih~#`7Rw!vW(>%C%akPB;NNn@yOMZP7qBS4#X!
zCsfr-0!^atl^2~%+B+R09lJNL04VYR4apGx1@B&|0*eny-{r2<r0Ci}4{cU_TegZ@
z9#D@oXsXmS)Ras{NYxS)rUs9Y@@{ln25%*eUrX;GYQW*1F_PD{9xj;*C+k{Ce#~1B
zn+|FH&gXsc`UHzvvO^IGSYDCQmfvgV3|EP`Pkd9fUY-uFLjf=_-jIxz^pM0Tew-{U
zjd2if8!L{t+)23g*;H}jpV;BaXD)je!++_KHigt>*j-3EMZMBm(Z1jx-^k{%_dGmK
zh~>>VfyBYd-ky^(o5+?gm?{kVY!Xn6k<nRGiFQ`G8PPC{)FofK(^1*|SH+k~CHMDB
z4x)TsnO~2gDHYpQk0}f09yd~!K)&Ew=qfn^d$3gRXrA3d%40uUs9-PT@Wz>s5r>_y
z02x(660is(r8vYyVJM_*@3~N7^AS8$<d<$5&*wi(`LCwIkVIW*5;oVfdK}Q2&^@$E
zk}J$b`P};5^8r*{Pq-SD;}6U>i~wso_N|V;imd^yOcL|!GpQw6{wa%i<kx?>jckn=
zerB`fsj}X{Ulh+Ye(M*D4vvajDgp-^SSWQ|k7YIo3ATvo2REaw&fXJO{;jfsTh8D(
z$2PD@P&;Rwq@;Y)5yrE>|5~aMQvZW5uYBF1YCoS($}PWt&4?xy@!p+VRd@V4KZwZc
zn~=?`6JLeo`J_5^1J-j3wiJ}Wlr`iLEfJu29Q7-3PXq03hUt|Y)JX>(g?jrOfDWw#
zOnwy5_%>`GjXd6K&GNXP1gcZ-je=V|(;bn}?iH{V$hUky=vlF0ra?5-cPd-LS)jj+
zW*psZ$wWn$Z5ZP)LN$$uAV`0Drreg=TkPR))s{8FNmIlC@@G?z@G$$ga?UXd^&z@z
z8kPniS%&@2>hQX*{PFQuztsyYqt7YH?lns#nPeOc??hON<!+;a&iI{ZfKrONk2PJ6
zvxGP$$!=ZinBAj31)PELR#6purM9w%d$RqNKymv*3(sxT%J5gR*!~Ls0ihyRn3*BZ
zd=^Xq`gQ?gp=jpyV`P1b7-S-S&AMJhw;m!QJ!7vr<!hoB)=wRuut0&7t^P34W^^I@
zNA{IV`4aS$LVDQEvdmGHOBzWg)%vy@Xzk4)U030vR~vzmzW7<K^l_OE!mnqmR*@7U
zie5akIAMt{1TfO|E43gk4ddY)p1$Ax&=t<~I>Ig#V!W|8=IdC#SxDQNq;Jh5=DX$d
z?`hf`OUo?e^!f?XAOKNluO<HnuK+ILo<)yz0{a(RaSheR^k$42A5MU}W=Wnx-X7bW
zG6AYLnv{@|F{=B?VL9u^1uYWF6?iF8Jk+t|q##b0Kh&%J-SNb{jEzOT;~uzM83OmF
z_un%#lRiAe<}UFCs*v;5WR;vu7_1l0V{b8o%sQMVY^WF)y_#~o+?rk>eK$s_(&ZP)
z$2NOo)!^t8$SfIRCz|IoxZ80^cSXzQv<6n94jIoR^*&SmYZIHlsCF_%@%jeZf0OoW
z1)<|xTGB^C{nBcGl|fPLzrMqO`BpT9$`JZ3bl$f%gv0cv$sZ<RmAV~R1Y-+^=br3P
zW~<fn?_{N|do#Xc6AqS;DJ^PR8yCNF$tbJY%tEUhn*Oo2f1AG5t@*}|*w6Pjc<SG3
z|Fl^kO?ANI<)w*~q`^{g;}khB@SUctg6gkkO@z=yoN?%f%|;>4yjDIHw#jz)TH!f!
z0`}o3FdL;yR_F=|@)j+OFP=%yq+PzJbA%P0h`vC#ad@>P!%383vv8e#ZldV;-J!34
zPc0GQnCJw!dbUxKFR=K!_9J4(c}Vv`=)=xauMH(>;kk4#3sBHb=JtqP@B^O{_nH}X
z2|&$x8^f8p6c~~za$>{mpAYE_Zf^?cq3tk$0cV8Y>zw(UQfk@S=|JaF^Fth_thzb+
z@s3_Qobyz5B8TX7XOy<>MDTbZM8`jofNyV}eA8<pADn4EJ#>;1-j?>=*gSauL&+}2
zv(MV7EvvHT^xy3ejpEg)nClyhg|S`FXxdQ@v#dN4%4H25S=!2^=56}K>o@lYK4~;)
z6io0d^+c)=kC_hz?>wt&m6<DJc%iQ#M6z8%eVfmQ;hAn3*yK+i$W>2EU;SRhj@I~m
zf0|n=`0!Nith}7zn*(n5^w?iw7}oFt%HL1D0-sIf-`;tGMv4n|P<fiOL*CSCy)w**
z+Tpdueg5#Z61Dt5^ZeycJ-(`F2@HRma%d!mJJ;K)G`?Gt$HaKp)C=VmS%Krt$e57n
zTQE;CJgH60%^Qwgh$*Bw2=Lakg(6bB{S9nDO5J0ON;tbnkDVv4{BpjO)SOfkPpV_Q
zasS(DmYOwlv%`kus!ZYB^IXDCPhP2R#+;T8uuI_n*k`K<$G?sb3fO$!$sdDS7OrmB
zg+t8oX9XW~Zi)))I!}GL06vfXlx1NHwq5UGd(%Oyxr=?&Dw-=lX?1lIlTCBl_vi39
z^behQ+ciobW1m_&x54FFhxUDlwu<X<-E#nDN$&gA`nXn?KekE!g!1Aqv-MG4#7W<}
z;Nq9*xS~mlT@W<hZd>1(7rcwZ&32uEbfk<@nor<=g<8A8d}GgDRUD1~j$Lwl?I%5u
zVH|P|UJ!_jq(bMfFf*-=%r$FQa+gMADU}m-Nk!>@m_GX4jbR}y9+oNp#I665l6A)Q
zHY*J%33@muR+@Fp2|-BT6iXu{pSG^CIpwFCkBlCgXW0RCBcI^K1#P?q_`XNVVDyXV
zA%Mz}nqP*!P!~A=9psj9EW0wP-UelzZX=u>0Nou3c1amn>*q-8w(g$D+A?nen4n|g
z)L;fuMXrS$Rf&2o{Z#{P5}l8)A%FgQ_T@VTDUb^({=<ZVMU0959)k5AJfEsiYWP26
z&0ndXihr!u4SSW?>mz*6%&msVlM60!sSi&Tg``Rsd4CR6HMgmCFtGnF;tTR)Y8Wt!
z1HNdbA3!R-w_u)=ywC*Q2%1xPUw+ejUOsEpWwi%F&Z=l40_Sp_Z3m6z%6{ya-i=gl
zs<x{La-_t*v)Pjt?sV$md)=y@#FYIGCK6dcC1FwjJaIs3J6dcTe=obKVMs9VNzbK5
zv~+8svvX%Vb*FO>eZx_m*!u{?vGF<9opvocJHsD$<1yM$=?{}zS9eba7ihb4$iH5=
zOq^;dDVlNk4upaLk6;FdOM=9LA6Vdoiq<Uu>77xTb8h+>8ntZx9nAA=n*oS+&otFL
z%+G@HWOO#=t_$0#+>^Wr)h0DlDn3O18z47VyIbnAZ1yu|lE%|NMQcb5jN`F1-c%@J
zB#5&o%;{*5Y*I!BQeZdrf!?$LZ<NT+q!aVa4yxF*HUFM#;^ydAgIPva^X2JpBu*qv
zFQ?&DZY*Ck*3Nye-BIq(UP?;QzNL0OjOd)Nd0r5M2`z))IDI$**JX>euTQBPMzGYy
z&*y{|xZ41f84ui(9uLg9zBp?)1eZqYC)!DS$$7R$-di2Cu;_3lw>%f|;F1t741bp#
z6OvFwc4$bvn^n*#T4oUBa`rlNemivf6*8<tqgXKQR1c?|$IBsFtsPfuv}ejHX$5Ka
zHA_L4mA>h+NabS_H%6<wmJrnwf_b@8Yfb*zy~o@=o&pKH-(9pOmV;R}#%ARhx4GsA
zytx4$^5<9wzN2LdxcMAQ>DmZ-*qvpUd)E8jwKkt9bm#|mPah9V#QJH}@fO26>9Gl)
zSR?KjJbl{z(>|O$AfJ>oSugszk{xZqYDOU^Q{L#l7&$PKdyoo+q|Zw;8ETQq0k<qJ
zZSy=z_SBC0P0QlqTo5M14THAEC(Opr92YQMwR)lhU##T!@)&o0WAseX8BOONC4-)|
z4^yR*Ys*7a18>NZ(?h4)^;G4#v=wyxmX|by0$GM9?G5TC_ebolHtSb&)<>-xqb=A>
z>W(YQk8Hxb5hLo8SVzZhoiGdUmA@%Qf6oy^ndxcf-1$0u8ke^1zHs;`KuzWk#$Z!3
zCNnMI{g$l8Vcx2A(D}ao_?kY0aDb+L9)Vmg9}Hb!N`7@KFxM~e`_E^ox42JR_GqvE
z+HZ$M@ls|YA`iea5rI1VjA~DH%K>->9kUd37v_y}GKNJo)txdG!FmP7nJ!L0`@?jV
zDcLRcr}%MBv4RP8dc3tBzBmz0I1YK)ZdJeKsD=+d@xjl6a4me{h2cMzFOt7lyju_6
zdp*IwP%=!dXafAoJ8T-8Swj%4$x#BUeQGpq)apbqWF;zS1xJ|)H>?KErFNMuyo+6F
zKd%1$A3)>c{WNpk=T!&f*xE5f_#Y-&oDU9%Kg7lS8@87oGC~k5Bku&aR1hH0No5~{
z!i#^u`uLKpo%-TP+pBK<XW)M`J4rwj${(h$&Qf4VHO+?J3NG~<g*#eighX*35u9x5
z>Z($Hi^&*e@_T7mq;$SqdBw_fvHja`eztm?zSi^dgXl==E@TSV*3`WDp<QEZ5<59*
zzZ|iFH>j^mFrE&kDL%cCk#D+LkpG^WLjQJ2V0!cG%=gdxAhnb&LDkLl37~x;1n2>c
zXaM2%bAszqBB~RQ5Y^^ONv)-gk&Mxox3c}1`}Pzovmn(eCmv))NM1;9xm`Sl;d0-A
z?E@hI9DXW$YmA><vT*jRg+JSVi$@Al%-ETY=wdty*Y+mY<F-}0m}1$yl!(my8807H
zsIwx!F67EYaXRK$7WyE_SRfD>*5jT&PEK_8Jen{ZJ#OxoK)+qW#`_mqt^TY*t5GE<
z-V`9bXKn6Hw@8jhK!08Qx7VL7HS7jPqpwNre!So0+JAa4qlvL#tChV+W~CrVF}Z01
zAu=+vifY;==!)GjuH(f^FKAl1Zsy@cV7bCMP2pOY=xnh1Xw1#ejSVD>ss%#+Y)u0c
z!0}nD?&9Qke*W7H9IUd58Kto$+XsS^p#9=~`o}*^Mk|wcfkB-bxgSdwR?00o6R}7B
z$|nt@O-tb`0~x=jTY2CR0l5in-4$4=(A!WeDj3Q&e7c*RU9a<xMI-Z(4H9oJH|&3G
zpQ3H3h>l`wF}}MWy<e)=BEwcYN%Q2_LN4`LZ3iTkVk4251O_J_liAAKx8YMhyN4AH
zKYur$r)FhK+<|GO3cVXGWGud<iv@)i^Vv82H1R_4*=I`AoEiq&RANzGGQ~gRoTl&H
zX4rQoJ>8}=ps4Npmql|FPTgnuvWWQX@rrusqbarW2$e>%O-<;h|Eu=R{Acuk=b?0?
z&M>++y=U*8-=^ox9adO#jy}7)!kh4v<o){qdu>0;TIf|DOs+5%O<|DGtoYwK5rGM@
zH!KUm+WS)EEXnOH6U@K@rN|-xBU0|gAse5OYnJy!Thbh2{mEQKqEV_R2Ws+srQc{Y
ziT-gq`1z?6{rZmJJahXj7qs>E7)N=t%N*nJq;ZGZn593&(BrO)?YfMPXr7_DpgmDc
z(n9GHfKV~;`HQ|CZYPm;=>k<$SpJnaVzE8#`c}lB7)1Pl{3h5GrHV=U)FSykTXj3L
zEsy^CTGCu(_^@JlaPJ;3IpsO~DBARL^6Ly-_B^}Wrg$GHRC7Eji$_Gj9Va*@c;)<i
z0ju^y_??!<#_cisQ(x)YQ2x(~yXX1`UqXu>{bBNJh<nuBBsW;Uv?qIKNPaiF3UdRd
zMN#@BjINm8M{nt4g91!y-fei~dZoL5;&3YZ!V$ELagt?Vd0y43^pQjS189*=HS5P-
zJ9-<}e!@5<3^I>&g+^6f#rJ>l@~K9bh&46+_}kCVvLpRfz{g}yB=hUQ=Z}-R#h;ak
z=H~e%T9uR9WHdvTO(0u+a#&cA*ZZAT)3`TCj?n>0illBw4hFCmx;I{`E^aKl0lJ+H
z6&_*yc2Xi?^IHkwZ?o-kryzb>I{8W7cG5C_#&O<x)jEBz_Z&dB#dfYgVu8^aSsog}
zfQOwoFH2TgV-reGeX;>HXs>?ezKyF|I<h%WPOH_10{IXB2U+hO&2}IEf9utuRjpbz
zOIuaMs=c{r(L_<ZVs8<9ZyiSMil8-H5hD>45nF2(MTta-AokvSU3b3c_r1^WKIhJ#
zIr$@h<b3jezh2Mh<58k27G4V(ZQ&M`rhp3cprTF&;y5D{R6>$}*5~7z&V0_}kgsI7
z%g1dW^uOcU7rs9^N`J6kNA&)O+D!QzbH#-1YOJGXkglIH>3RmGB&BC8nO=s!ymfNP
z5dq4R2?A7yYA`M}5yqePz#Wgzr#YTq$+F7}*ENZ>0O#87XMk$FeM<DQiN5KkK%{74
zr0a1{SH8+ckuucb>#>(^lcJ+vE%@oX+Y+%OkN>8d5cyLB>KmN;^_3zwiE8^$`xAO9
zQOw>)p^ews2b5dYV9IGdlEUNbQ=-Kwj<KI_88?L;KddOsiBX#JQ~M^lenXSeYNU9K
z`cwI3_HjnIhD}@NDB13rKe>dC4nz`%I2od25H~NgBZt~*+Tt$ntPsbTUo<YIVxMIm
zneC8rzBoW`b!Qb?L`xOD(&Kl!t%4VV<T;ptE|_WGwmZFXd48vt?sdt~$1cJ`F$z4n
z3qQ$)4C{iQHJXAp4L!Bz8;D__eeTLoISb|lom*MMhGl)ET9v<Rrp@WoS>#0@pr)$V
z$WR=W8ZXVcl?qeH^~mmTZ;GxX_LO^JBtJYL{J0YJhfmarjr?Wl;p6kWVcl6qKj%GP
zf6!y~<MK5w2&TwoD0<mUgCsEG)pLOhN4W|=a#Ql<RHFfX!+G2_p~jn);nq871KF&q
zgbYFcv)%#p^VJv=eBr++wEwR%ZntlyBM<8TWw8}Odh`9O7R0M21VEqUq;~?zIG~)6
z?B?oFjwuP|DQ5=9!J9<&iG_%`XhWb0^_xT1AN3<)3826c05M_AbOdl;`ADE_$fojj
z=)Qap&90P%xs?*B8jD@7!`tjNj^*T?FOa8tNs*0HV^og-wtq`5kY!FmWyo7$sgk3&
zTVW__)=d#@vD4ZZP*;f_TWYo|Zn;0uqP)=rb}x`65^4*(Jth6#?Qt&&6iceEDEt4S
z$<6g^gj<9PZ`G043!2)(pktX+`?$^NYSS?*@y4+oFa$A{O&iXf%bd@4)@IXk(Cw>~
z_D)ZTdh`{TSkqh6zHrZR)@?oAgpEwklHDs8h$L3>yCH>>R)>{e8$D{TQYL3IhHZvM
z#!glu*uoBohgzPdE#!=&uuzN>8s~f}I$T+@>^Ef(>9-)IXAGU5u%$MZm-N#XFLLxe
z(r4|rc|d0tBaPB`IZuN&8CsIp)5o;t9NO+=jaIs)du9lbBX^;FVv~sjO&goSXD!K?
zJZyGI;GZI$b4~5vJp>Y(w{nIJ8Nv!R!HBulz^8Ru*a}yULZ^Nm3I0K9D8s)NHe#C#
zLR3fxC;ZLjM6Lh*o4BE}fqv1wI?3(GQFygdZS9_4ENy0wd)2iCi={o0R)fVZyTXkf
zQv_=ZHo!J(!lE~{*L~>u9c!tCZ{XhX?hNgb|7h&ZaE-*YF>R;9@4L8ewGTmKW4`C0
zW2b6-_p_3M^5xg8xSE|LMuFc(n?GqkzJOM9#sBlX?7EJ{lHucg@!n%}j6h21-egR7
zY=E9J0V1rC>BKI)t2d#|*k1%t3eyqP*8!<D9sA9ya?!?>{0#RR`bg=kK2-388f9Z6
zxzTbZDh(*p-pPdl!=w<DZEnfOcC9{tm*+FMo*aw$%U(kBS`aQ<$AT=oDjRzSYKl2;
zf?I?H_HxAjtGSRU2zz=mH~DNJ>ss*Jm4SuvXg5P+Bp^7K{|(mSmZ2CvkG;1{UI?OS
zwk0^VWD{@AYSun<_ME1S5aVW08Pokb8y(X_S>g$03iz5Uuwjeum@S+HbagYs-3ueq
zJ^ZduGFp;A=|2^3J<6253wy+I{Y}6c{_6I_go~-<P?Hrr8st+3sts8&j&ArSk3|+l
zY_{qf4=r(yqW+*2Wy%IKz`j)O5;XHQj4V@f{(!LWP#nwkaYAS|#g!DBVr<UXiuzG@
z-uij5(az9+9;xRWojDSn3qC=s^=v*X)608PZxRhqU!IdM(<~taWsW0y7|V{C`s_zm
z{A3G+o_v18QU15!)DajQk>vjGIzrwi>~<%60^~XC_4pXf(6*U%_-WZR3Km2jFOW;7
z<f`b5{us?<!jR^J%Hq<)N^VSktJ@Cs*Bi<iJkS+39wxC7e}9K*khRZ9vg|Wov?bD4
z0A(~6Q+_tuAG+-%;+s`m8-%Ts#%eIE#eW)&ENs%oPs|bXJAdgtulz@YHE@%Im=;fN
zZv-vj(}IjrCsw}iE_Ft|BTm4xrXFu|jzc)f#FYTi8<fCcuTJ;=vz|<OqUOkfLc>+V
zy0=BY18vkL=_{ula}6x$$|zDi;aWU*&O|hVIw?khHJ*c5eqwIRJT}p+HcDZr-Y`Xc
z!oK=|j{S>(5ap6vl@B@yP&A@CAeQEhjax?=wHFw=dB>M+$ZlD!EO>!oWuWblY|D&A
z?fo%{mmKl+BmbmdO9L5XU%eJPxU_JlT=k2CYyXrT#hz_EkJ6y#!Spg8Gc^OA;=F<f
z-0=%Aa@V;0`2)bgHfzcAdnhgA=_iCYV5IQ!DBf@hDG;XXHziIfgBINH6RRba)Qs9^
zvn2o@`boRPx?gWdSn;{reQ!18XGC0z%moB$)vhS%Or@-XRrQ55pENh52ThqaEGx3?
z+%tj5lPg?6Vyk9tEt=@5HEV7B%Uu#oPTiEZkxrd9+!mIkVs!P|ovG-Vo!sn|MwoBQ
zX?@E9aVG-i6+yt7)i&5jgLS1xj+`nohs!kA_ksVjwixdb3sc~Oc#71$lLT0ibnk`f
z{Ox`jyD4@R{!m(~SRg*@cM{^C{)IWo(>F1u-pK+0G^~a5`_i}g_dy1o?T1T=xavwC
zF!ij#;hAbdo2{tYm;cy=6Dj5{_Bcb|CE>>?wf6oD>rupyLdT^a%8WKVK@=fJk9m9Y
zg|^w^YqvO$**X<_Vw5c?v(`fvRT%{)Rd448s|lE%&2uA4VulT1@pe&to(GBT{%*9n
z(Y#br)}7CG0<piBsQ`K*;pZ%)Xy<XmGqLh4nOAu6)UFN6Kq8XI213`XXK<QZDciLW
zTx!;j8b%l~KDk8YdY`CziB-gG&=<%A1$DH(y89GKP<RbTTs^&gk)}5Djfe4=bn#_|
zH@-u6TdwQN!xSYB!v*G_ZTILZu8+k8a1?b_lt`j=d?=*$YD64oMA!UtcbjM*fm?Ic
zbZMvtAM=2Kq3E91ks~o+FPFiJ^yS%U$sqlVlKwMm_0GQ{ySZ~b;i_^B;w}k4?fKTf
zO9L$4j8d=rgfihSjQvEA8W24{g#7;@aQ{#CN670U;(~BQhAfrRM;0X8sg&%UhwfiC
ztzWZQI;Vn(RP&&DYmxDXmsv}dpXf#I-M9ZI3D!A7e;3$GBlz>l{FTYKQs%uy$dP_?
z6{b~E`YyRGe;Fb>0<4>$(l)%P6b`T9h^0l^jj;b{uJdm*{Jd>t(;jul(|?FPTphbt
zz7^n{A*TF@BHZ$s7n;rSeC@PW>}PD6ZT~g*f2M8J8yMR-<_cJS*JJa-pBgC2Plk)@
z6$3n!@7~q*hdYH^1uti?%i|Lp%S=?Qy*8r|xIehWaQ(`x9a_%UJ=;q+iT>Z&!yBa+
zbUxO<f1fu^SRoY8!EFIuzElCQ{V1wW(zl(027}ZRiK_n`W6prbu}iaAJ>rbPak_s3
z<aFY-FjA!Bw$4LF*fvQ-5+D$LuK0}_V@Oc4z-pzh*99V5|NKYO&ynJivm+~3A<Plq
z36h!K@{vL5tbHyepH-1o_Z@yuK7{izguxiqMsm&Wdo#Rm(B!$cjeAPP^H$JFA#`Vi
z+Hnky!DSPyP{p*|>3c4BkO1te9`+XK*=hIFHUN@=5H(IZ)Nb6d&L{#bZ)~qoR5FRx
zwKcf2ySQ0-?I{mMxjUjrOmDyCTGqEp@vBm#^72_ql=+Mey~LNXs!n@|{&jd${cedq
zb3zSn6Cs^zOI{Gsbz;Z5=uhlv`CQ)KiCek#{NZmY+Ka;a{?r{Y9oCPX#NhIpr)5x0
z2RF?#WzA4w^0y3%VTauYyXpMAG4d2s<eGouSeNp~eO+|F^7b)QJNo(y<M;RecIfii
zm9(}sxM!uEpEZ4XbFt`NxqrxZHO)KLd;_g>J8Pz@DtXDEn`LHXLH`R<k+7;(JBk|3
z#jsRF-I&`G6P!CgEf!$2;LUdliP2gRQZOmShZ;pk#l^N8=nY4kh^Wv(9bw95ol%U+
z26NpJa_#Y9l+-(Y;bj33)^9s^E4jEms^d8dK-kYw>`@!{pMN+s)EHa%a`eDO`Q_4k
z%CWs~c(DgQUlrIP+tbxIU}wj5p_N38$B8&1#AB0M@cZRC(UM}U2Hi^|9ZMktYLV<W
z>qq+}PmumQQm`bSgO%DbVH?BfIsY|Mww2AhRz~xcs^|O*ZR3v?pL_jJy8cy<DF0-a
zj!;^>%jA$$dX~>XXtOY-MbV#P^Q=Tf=Eg+{0skKjq)WkV)!t1N(v#N;@l!BOZ<9)%
zmiy3o`Eb?;>qhM+lSYqKu$Z{7nF;J}K?ZJg!wE@tK>p1#a|9WK!K_ns!rnN%XYU@R
zdOp71Rit;!pWh8z!bEn(b=Ymg?czENN;HyXT#b;dH?`**TS!43t0b>%n@l3cjM2~p
zJ(>qj4}vdF7F~K#fl5Gkn5h6J>3>Ptca6I96}F=~3hu7*i0q7g+)t%D_S1<kZY3O`
z|GK+VG_TjiWn>h8`SDmd`09|9-Q(oHvsi>i#pB4qWCi2QO_HYQ!qbbtT><c1!!5KF
z8l1<zXADbNAsR*?Sc8O=#s$%*1u+G+|IwVY8g*KWQJ)Q0sV2T4Dz=1optj{0fu8sn
zaMn3UOm7FjYBwI<D7yCfz{A|aP&f{U-}qbJf7C!rYcGLM^e4LnPZ_43$*4MAY|Y7F
zkb)=+X;fw6f?xjQju}WV0lofc1n%5gW8$?X-!_S<WCxBeF9Dmh_n13){@Kw_m03fN
zr%*{RjBY`?5oov5jLxHKppS6m?vXJ;yeJch^bN^hbpG`2;~X8@rhYzGG`gR{(I<hi
z&;(#*KoecUKU-wngj9_;c#IpIj^mhon%6Y(G5RP9Vbm3W1U2?DEk_0nx18ix=MV}4
z&lN5Tq-_{3a$fK7W?)uPrY|>=99IHUT9lhYSY+no--fSzdZ;G(qW+&&BuJX@wPK@h
z0oy31i65BG!RDw5bT8KDI=~+;fZ!F`W`{LaNKxLFsA?+v)-=Mr#8ut?#_PK6dtbHK
zIDX6kDz1vDzAlyhItQTb@36REIO!15>xII%Y+g*|kv6lBsu0m$;QV#ZyiD9ii5SkR
zXk>M&cpeuBc50GR4y<~3Wghl#{mi{oLHAGN?LBlgu)pqece=lsHWT;KCjctq&~BBn
zJza5O&%g$w!aUe}i#?sN%9!51)2)7%36joPYY_T)1VW~az+ReNk0gt{$>DlCP%fGg
zV^;a)kz`c|P4pG(Jnn1u2*3bv>T*Dhx}}`X-x!xJ%q?te8B(l}^n(~28G;F66{#@A
z-P2kmRJx+v7kG7WQDaMF1onGygYUj_I-i2WUee1az99<^bACaMy<*lDTC&4WF!Qil
zf2y1fLy#T*xn8_A@cR^G*jzNfACxhf9tzz=Mu-=RvVX;PynI1jP<ivt_52E?s%Mkl
zM>_l<#ae0>D6hI!-fYF00XG{O9B{iNomKM1hJqcJpKo3HY-XdlKomc^%_d0CR4vE*
zYG1ct%~@^I%%=`Mx1Ct}n@<kq=Y4bqyUtqoR;QX=w+Pb#&KQrzKCmu7zw7i1WPfAJ
zv=(k;xN(HUFsZt%YPj_AwD!Yyuh~JM0=M6udnBvxiJua9OY?puuEldg>8J^OO9|a=
z)k4~=j1YC6$eak$6|bfZ<mMK>(>X^+`>5s*b%YW0mS&X<;M4;K?CSkTb86anpsQwK
z8I-NJp@a-7?yod)DXdE!)G>cyBfwXwAQF1_O3$lLw0b%Y52GGH*-Oy^7(%1fM!Cmv
z6J**<@R+pPV6_nzQnz0-ZZSpJJa969YVt#bmsbA3^UD+FT!xn3hurtZNhYjf6$7=E
zO$n=;c<XHQt*-^*y-)<`f)pkkG&U609X<AD^~WcWorNcH?UN2u^LOflujl0uctm2j
z!a`sR+_z=L1ulbBPl(qonh5YmR`l99Qpev12Z4l@5F7GhfQRQ)Hy{7+zPFw?X~lZz
zB43<*Nx69vKfS8XZjJJ9ghusLBwJ5~h1#qy!t=^=tVj^(kxWJY!3Mrz>GjlcYWI=8
z17#V^^@86sfa5j>Tg{D5mz+^LII;6qXNS4oRH`il7#sQ#Cq(6T6R;#~0bmh_a9u-c
zs%wsV8fHwC@EEV>)Ic>^+Oj>gPeSrbYK|>_OsK`&>q8%s4PV??dR_n8#mRkhH__uh
zv6@F#E}f-gQ8;obAjCv?w1T_^U9JVK67d!-0S1QIt1CqVB-2Z)jC4=Z*+9z#!B0O^
zaY6_Q38F(DiR1zR%^#5_OplH@SC$xiSZrsyarQEE7i>Cd?>Cpyv3ZlZdGN;GgF~O>
z$vm9@g0Rg|M%EALP1(Gu%$jlEu7lhhQ=5Yw{t*_d9QbPaZ;h|^{cZF)@UOJYqAi2h
zN^{w_WU_F$xL9nJf`K^+0^G%jG!#U=>jUgGPGztP4Q-f!si>&RK){~c77FNEgpdnD
zj+B{Ys@Tyw^(($8RuJP)W4guh;wa05PhH9<T1>+lDw&dI(VcnfcJXT#ex(8AA;Vs#
zpc%QDr)up=-h{O%D+ev6`(lXtI$c^yb$Hz~f<VyyF&(mT#nkF8E!Z&f3Eat_SHSU0
z_MhJgX#-DmR9pNJMc{z<!q1T;#_p|*G`(=_pqY5WPV-YlIV?+zQ*O-^-Bf9KhD3w`
z&!09yx3qkRb&-HQ(_@IbwY2R%mV#_FPvf|zp%d)j%6E43$zswzhzrLNu;J4$q8{J>
zlcgHe54XN)B02!kEtAFEplo7fJ17WGXvCNLE_d>Q<DPGs2IGPbI4udEv)4JxlNJ%e
zCRG<tF8X)maTgOV(_sPg?fzyRo%c%*wO1$3qn20Gqsl&Go`?75!s^m}Gmj>GmoC;o
z(_NM2Lc-MJA?nuPgt-!9HMoSxvt~T)0!r;MDM6!cGH;C+5n&4f|Ix5P6ggG85uto(
zgsMUtwb#wB*^X5Gow#4zxKf9QP4xcTIc9s+O4=JM;7V<H8PwXLn_dT*;m(`ULc06K
zl@1)E@Eq*Kncrgbf>V)`!4m@YUP=}-Db(|F=s4iz3@+$4dLY+Phldd9FU|7q?JhF*
z1r|(co;7KDPp_8mh<v$N_<hn53f~^BTFXZpW>gUN!_k2SEmWt<l53CNi@NKgta-I~
zEsQ|3EgWnk2EigC4~De23w}1~K2t9XuJ(Yvy4vB$w|5|1=nbr^Qtsh+B9Xt}byML}
zx89u)>>kkA7x@8%PKKl8dNs0+Q6<9tCHyi%{+4yVnSVM!eQ&bX`9#Y~_w1pSoA^NJ
z7cYg0+kx}2v^8&Q5ubkKabPEy)ZHYOIYMD}1MCJ&-?Mum_^(oV?QXE@o2hSsC%Yvm
zOhVa^KsomuHgBTCPR3*U(wYGFLN_Y%8Jgyai1NBz=biHHeY!EmOjU1=?-><kS@Api
za|(R$#J)(`jC!<gp9Zp?>fo=86McizmD2s$K-`S=h3RvlfkPU)KC^xRe~aRVi`vXu
z2vDEH2(xCgINxEkA`F_uSurU+`{MNKg^5}3`d6y@7y$@Y^t#L)j%w~21@i|49+)<&
zptZJJDsn`Oo&-s{N%rhoTdT7UeRN84xKs}{@cO=N>XK_^RjaNg5eLBRu{XB|AOsbj
zk81%z8A5&ebYepnORQX@-6ndIqh&&JBRjeUCei(mK(BKh3}ujP{nq7tMqGkN?G&y<
zd62#_ddwSiSZkqYHBgbQqvQ7J*KUR++1%X9EAP{M^W~Er`VE?dyHR1h1rWDsMPX|p
z6EDsRel3YIg;I4HIukd1twQ;UUDk|k(W<eHQ>h86HoR73v}IkDwLvcrU|42SqF76%
z6PHp@0e;He<)tWN{4rukBQaIk0kG07F`S}gQkwz+ckRz#3^XTYndURnt1V1*-*){r
zrtA@n85)=Fw@_iyy0b)9JU{3a8V#@%LhW>_MS3S9Eo5t`#qEqGL;dTp;qfp_D{%c^
zG{E1%g(LJH;`v`Kq&a2PE~5;eo8hLwt3>^1u&YtD#L0z4<;=|Ta6i%+WiQ|&mA<2v
zupfM$EE()6lLwlQ$Zqe`RU9%J`zyI|CKsnH0r?-d`^}pthEV?&B2g_HLiwzOFKhqn
zzI8RReew9!_3->=iyq33uNMH#7B>3(`{#U1YcCV)%X4Z5Sp}lmlPR%Kz&s?}O+h(f
zzfVu>XP3OiXygek9tS~voj5#M^{=EE)VeR|u_uL>t$_U(2BbQgfoIo`+h!y3Houj9
z4CkkKyi`8Ayq@iPpw9wLd|WYC>UPuyNmlOq1%*=Kkw!&N$KPsQYVIg?Smr#+u<U8?
z-V5)G7I42S>lthD0gR(h=>-fYMsBj*D>H=qn=EbXoEmi~pCwAvm&<rc_srJY&;ruX
z2j!^Rg#rhD-q?^<evsdX>|tUu_~21kcZRT#7DG(nl)^=dW4h*X#JApA{kT8B*i4Uo
zW^c#S#+U!Y<G5Xtu$2iVWUz&#Y7m^Vw7eZs2CWomM9+^t9j|paJhiQklrBr|dfh&H
zs4w<grGhh`)@+EIRY<7(dioKhTe8C>6ZP8gFz)E__T0~{dy*)(Oa`%;DQbC5Q8^4o
zfsB32H?D)tiSR!~IV>20#6ns*dU%0*AS}JvPhLDjbOE<2@44%U)2K3US>vkgb%(KV
zar6u6EtcTGl}3`=sWMD@_@9vXyUadzfuCU$tFC=bQ3l&f_Z@SoHOt%*HoA0!B($B2
zpf+pV;(s&+a5yr!u0Zsqf)Q0}LqMFQcQVk+<2u|UL<AUZ?YD!mLJQ`bHb!e-?)#`i
zH>%gW8I4z^^H7vk)EgAD9_unbdqE|%_0#$O|C<o{Kl79MaLpOAKqsA38t2e!y6_gQ
zaT8~O>Phk2-g$Msaik1$7AEMNe8IbF&!<Uf<B|jJ0qx9um*}KNR$H#KeB1VAn+<Ly
z@|Z`4Y_+W)1ZR~y1YyxnzO!pxob$T~=mz=cos{-Y9DSH?tq}Wla&<0f{et4fyUZNy
zFzXeIks&{<0V3BjDH~h<iNre#Zk)0;_XnPLoF<tIFIxQ0w-rfodJC^y;t`rj8hVtS
z0*y@$7neG%gG4W-@O~+QguOwUiI>+VCT(z>OO(l*7yOyx2yV#V&V=eK&*ZK#W;oLU
z*v~6){Nb_=v4+<8TVECk3RNMkbO_+CNsRV=1phh;$+s&tyR?3^!!;9-{A+Z5^HTDh
zRg&ZG_v>*vbK*Bi=<Y{Z)_73^#|8`E=OrJNRqmxcXbm7|z&elaY|Eo$o()|TN>ayo
z;q|pGr=STPR`zbjY7gJB!t9pGZ@OBBS*MJYt;9*L*;v4bpZ4CDT*q1cebuY=Z7v<*
znx+QnW%MNvK1L2kqs0Tw^?X!Aa|iS2O@#(o_!h0X+eW3KFO}6PDyBB<$Eq)Y*XnjJ
zDmxWl(Fd_KB|Oxsao`Nde|Y*f*-x~E$Z1^K)TLFjB?J;`F9&D9QZ_D1o5*@%k~J;m
z@KoI3v(K-F7|n!+wpot-KCTS<lNGB@>-1Am=?NKhu&nc^vnBlJ9Hlc0LcVxRY+zDr
z+IDKPf91Uib!j#O=Et7qj6V}JZkp0{wFQn}r}xxqr~AbQHxm&C>fO)f!Ra8(#=Sg9
zra@;5$36GF2h{{BP4mF4Owk(vzeJOVu;!3Lc*vX!hmiUjZ=`ISa*JEL2if<t{M0ig
z$E$;icW=;M<+GnLy_;7DTnVbko-S#rxCkIkwKbhw0x~LyEwh`If%v-967oS!5-D9V
zC(w}1TmA`s%HONsR_`iaU4u_t3H6CHi9CcTP4{OH4Qv%R50q;EKw$zKRd986#EZtJ
zwJ?>44(sdOcMw<edpPEOQY6y<+?xz9?+Eknvawj*Ck3GhfmHKaBoq>I0LJ2j+$OLK
zk$ppFgJQB%Lf8kv8F30v=v~exFQ(-Dm)Xx(qdwM9*M;qJP%W$nWFXRdt#E@N3Ii&H
z9B>A;rW`izk3%9;deKPKg5^l&<n4O;{C~T(LIpWYhhFW#{Kz`=(G{z&JvYZ7%Ui$6
znc3^u>R~k{8Au2=g1DKNW4$)qwBdq+_>ZH>{GK81|0;UYUad6jxQaUL<+gR8na?m}
z4TE%vCD(19f{7j_&{k?w)FP}dZOJ6CrNE*EZbfSJLRL)YfEA<{9=sg&NlQzb7k}s-
za;+kAD^g;pkH&N($qR$UZf;<*r!K^Kt=uO~ORi087B^0TZLni9S$$i`X~~pwz)KpQ
z;J)vDY5D)EiT@7f^IpHzbET=QXg%Gmg}mj`QeIn2b$!bP=J&7y%|R>U<HfZLc<qrj
z1S9|gUT>`Oy1MX_U73kTImqx?$ggGN)2j)~Nisl9jL8APJVcNzOP<{aOXC&zEiw;-
zEX2OU*70BnDf=SqK0-tO^8-Yxj5b+0Vw-0E@Y6q19oK0i{Izavmg6BS{`05SN2WG+
zwn&qN{#&&N#Tx>ZHPpH22zMj(GN~_2_TBh9;8W5#;2}Q6=X<eS^dQpqE`w*Jd%!in
zrkXo8jM3-;km(3M8QIT|a5EsTZmj0`Y(3DIV+F;ENtS>!CIbT(ayc)}8aL2boMH9C
zegvO6u7$*9&-I}<xpnQVJD4HKU&#jl(6jRU4`*o~>$MBA2OA%(W^;zzP>HC6SqS!x
zsfzPv656K>TEOCvuJP`p>by!XYBDWs;z-dW9w7O$bh%^E?DbC7#;1;}3f*@%mAr*X
z?+rnc32-ZXQkEoiDNGnboC;Dl6Vd>^PESm1wzNpf8t}qXpaV*L(#u-+r67Je&U~UC
zkHjI5RQ`RQnfmvw>Q)(`zBPnM!fSAb28USLU_|K?oXid^6X7N&IH4aH9%Anu--
z&@9TigAY4H1zAnC1U2LKhgjW0Es%9zQ$EU~;^SfD9`*T2+uDe2uF#a|$C%J#<t?^%
zKC%2jtwWz2_=?Usb90`YQ{hoo*pt!b@<7e^Jwn8veNyRqX2^cdVX9IAE^Sz|c0Xq=
za5Ge|pyKZ|>Uw%#THLLt&wAQ2DRuQr-)Tp$W>i+SJAZZu19ip+R{Eb}1TMxs_-1!P
zoP}=*`Dy1%M6s~s;f@Q}XV1to<4tExCM!JN{}+VB`yb(dGe5%QA7UOQZ>iUnm*Y{~
zP7!J!?^2aT5K!b`5g>{RjVrNgZo|A<?U*piRm&EQGHdgk4mmM8bGz(k%U66B?tJXY
zt9P7ANwm8_AIj_Kmah=sH8<707u^c;bGjEShS?sOf^3{&Gog<0RLGi856Cl?!`}Kf
zS1cOPAZzEJLAPgp$<&+=Yq!U+3QAKrroeuxUT+7yjpw*^-Rf;B7V1$x$ebC`fsL~6
z$<K{+a^78bl>zc$_>^QEyHGgC3~%DZwXAr`(8ur3XSP^=441=fvQnRqzEiSqsu>MQ
z7Tv7-!sz5B@_m<5?^_Xn8f(+N?<V(d$hJCnM7DTzvnn`l`|Op9i|fx(FOUg8kLd;n
z(5e8oWX)^8tYx0c(7VK;YcZHz)bKsnA4?5T8yOZnc2ga>=dmVm(`qk5a)9Fa4$H9N
zarKvkR~Vx7@6VeWk_@>xUHSxLTH%KRyuttYSfBkaJCmPg25Ow@tg$%V$B|n^1L5h(
zO_1R8us(E|por`vy<SBq16a}!UOr+yIAnpADbeOq^~$~Ezod?g?}jt54iv<0GIuv_
zKj&0OzGT+Me)>lu1ho;|8=p|7qaM8SOCtd^*1wqRlWAMHVi!`Q?B)wf;h0Lurq=78
z?#0Nk6~S2+5B=wghAMlNWrw89yDP+a$K~0?Tr5~=58^vY2-}mBo4QaA!hCU4KP9!(
zAUJBaXG3VVwDLeU`+@RH)A8{qaljFe2V=ev;%_eB4SCqc2k8ZM=TE#skwLU+F3phm
zSV+EUuw`pb3%elD5J_A4l<tGhgdR}7pHY5I)N;a&cG^Lzf@6E{su8tntj(5foN$<y
zGzU093G<urel(+gJz;su%mo>7^K375b!mO=Os0bpCcYkZeT5fFDB4Y_nS~N|zopXM
z#&#Wc?|!omZ+aPI`Ago|s>^I+=kt0_?6&l>wS2gwerAj+_;a(<i?(=~06=<}-{9z1
z7)}(XCXRUiF?dM2EFP8#XbL4{!=K61QK?&Z^S0>Rgs0}?YB4I)GNja%l8&tyMTU2u
zthX)+J1igXM>x;2-ngeOemCpNWSwkhuBYLc6b=1u+OFAr$QJXQU&%V=z@46Va~r-Y
zH!u<WrL@w+2vd|NAJt#4S#zlOfbxogp^Jbc?(Eq#caJ7e--s>VEH%<Qt#C`PhOf3D
z))@h$7dXk}!T)G>HW)YiQ>S}lcps<jrpVP_&EO&}Jh>=Bg6{TOOz2&3Hbz_b_Ioy?
z9Z4_GINhElpyek#+8k&X*H?AUzK2<>?qiP(eF4fzzXv<F92)*6W!i`C^HoIVNz$~h
zv6mL54-0DwIA#^Mg)K$|)EIP~4eA*#c}!hc=k1ft{fGS<7@Y*Rt`-<19n9o+)Xx=!
z6N)Y(_xD#Z(59(Fi@@)9R4|@dWYvBPYSk;hG0@R5N$&ZtADh6VNeMjn)eqeOSRSFR
zZ^}BzqpcuK+{Ht7SZR7G7(I<R=YhrKES~iUe3H9=Q_=6e!q7tR9EyN+JTYkr3fxU0
ztq{*zEd7X=9x95OK~QvSo<~bwxdzLxpZAoM!ckv6T^t5k#0AvCN@*fyX;_06n)BeM
zn7WaK&8cM7{=l<wgsRz71XlyF2m$TWtw_{aTY|c6d>eygGpSdH{ov2FEw^MZ?9o$8
zQSbTfdxbmj_Po8sNP&!^cxKLUZ@7^uAw)YO?^J2>u&Tk|uHUfHs0@)LSE5_dG+{Kc
zJ3<`R3GokD%K!R*AEVc4?5!qbYGk_VpWat0iO~vl+0tQO4LQlv){ql7vQhfHun9-T
zcW57Z?qWc47bG&FI7@4DiKl?6u`v~)(TGB5)f|mr0jVepqa2b2SaNtj=vY59Qab!5
z&$EUo>UuZ(0oyEVA>Y8gG!OqFwl1&q2gSv`^%iav+N_ebe?8Tj>e?hNu1iGfcTKX<
zZ~R9Cbz*gRKLpX*Db0@I_P2B4eDg&!4C{{toHdo2EWW3PWP+iyIg|-Tik=wKOmgP@
z1(F)%v<mmSJBle<n{g0@E)44Duw^!A-JY(Dp0AiMS(?6~sz!$3j(G0xcwmS*rH~_C
zr}E|O_grBYswWaH(S~;R_1)ap)Erx*dt}Nh9z5}4{!o9!nr>f`zGUn+f~M=s6u1T6
z_AcWRu%MrA;!Flx#_yUYm)yXxsbO~b+AUS(ZfUDhPWL71Upd{JtzPe^759klwsR^E
zL8)8&SL8;_e`g0J7^8p6_eq=$>5Z-jK;^~v&pvK?h~1O+?H=dYOi?UK(Bohi7ItL(
zRk@t|ROwFgHy!5KQq*vHOJCZ!uItKBIY0oBXfUStU3<4eiZ;G~N9Nh*KB;fkNewQy
zFyTe{BT1Rx;g;#(lCsFAy9TM8pk(fk)mk%XzEUUK&{(5S>PNf<G6wLkqlJ@>dHBQ{
z{a$orl#gVR3)`)3s*4uM!7tdWg4bIq8Lw<JdP{7+4hd+Dc&zTA(9h*U6`88O3xWFj
z{7jsGfZ*OZ6=AqHRyap*Y5n+?FA5FXw)Wgf)@+5DUGCyqb{kS}zpflGx*`)(x=H2`
zrWEflr)S6&*2IaJWHoO1EmS77-IAtCW*)0!NS3aVkE_kqzK3f%mWq3f#|g3aJaYqw
zm($eaD}uBE;a#RCrj;YXQ6G_U*jKD??7d+=0aetSO<Vq*hS#MTCvH7K$VrY98%FQn
zP~b?9K_(k3SED>U9UlP@)@pfr3GnsQnKejsJ+C{aoT*V~_-=RXDo7Tts?Q`8MwFjJ
zxl$Q|y9MECi*<6OX^)P658;T$zbE<nB#$dXr<+z^D0}o&0~f#i3g@T1mR3#+Up=Aj
z6%3z>EwQY4(&<F?af%v$XQK&X)6ZHmRk}>?fr}QH%AJ`RlWm;n(_V6JqYMQ7R4hL=
zt#OelI^3n+9G9bEH4`t@0(`E$y^%a8%KRbi`=#dkVutXU%+MXqB=_vi6><~$BC$(u
zdty_dQm=ydSuv7Bi<Oh;0TJeRH3RMH7iQ~LH1~f=PFN9Rk-T?{ot;-W*WsG4$(X$G
zZW^_lki8m>FcoU=GsG5=YMYD78?5#r7Z`V}JnD7k1FZBkfMD}R21SLm#?-+rYjx3N
zUt&WwljNj&<5aM!qrZ})>B9%Vf}ijp_1!6PkL;1GPm7A?Oc}^<6Xe~UMts^qVp{~i
zv;Rm}nBZXUhG~q2P&>FgGI71vY64i1Ro6^#xFgI?%`ACIcyj*gJ0#-dB<j<|Yft8G
zi>0HrAD1TvxG^dd7!;Y554Lx{ymH}w<vQHaC!ZxWCz-t}EZ)-~9*Vt>5O~|ILUxlr
zao+F5m)%EcPjooLz5!UFW}Wy$x@SKZ_o=9Ku>?rZox~4tQ>VA!iP*=~noAP}^%JRO
zQ_frMb<TpY+@^zE(j1?umdy@bpQsI#wYf*;S?i9xjbl%_9mTn_bCMpMsQK1bb5{6%
z=B04Jv~tid<k<G*;~Al!_7W%b!h?Bi;m$_ZbPBQ7<fE8r;<L@M66?e~#WaF$?eLi#
z!%i*W!h$wWk!5X`waPr7r$pBL3zC>@cFwIeg}-HGR0*uh<+SYCT#7x{r9z+CmqWec
z!$r$irXK#0Nqpl~+u{Ihial&UCHgaPS6OOoCf$^GQgTeLhCOafv&s5DQfMm4TE;m$
z5P}JGBV8;l$Co|9>#7)m|D&-PbMXDt{{NRm(;!YS>n@u%ujwDP7F@QiZJkm=CWz#M
z;_0ZRBl49c8>UvF<J%OrSkGpwo!gz+QE}2b)=U-+>Wys;d=hUUnG%_eqv91@jOtJi
zmXyJcM_QcC%1cW9k|O-C0wD<{vESG>%cNI{Ic=f<mh>ewRfcSkh@EI6E{%DcUD<K@
z7(%|wOQFwU&2=^*PYSA|*ul3h_t;XpK%>us!4l1DW|i{Y&vel>+q{RL|D$;ycW!j?
zRVJ)0disjA74-QZ@pBTyW54&jjTzeLpYRx3$qIeV`~CR&bETp^cg$9J@1L8Ct!d5u
z0+(;*hS+w-akc%vsDNkkLO9yEZwAVql1#~%6wGaH1|9zuLwMl?2x&I7zI0i8%)hTZ
zqjs};UF{=}$YO{yB!|MPIJ)Mryf^>FR8n-*SdzOMv%~#7TkFqZaKc|#JVGrp^MPQ`
zy^z#)hH+oKCec67q3h#+G)$`eRv`2@SH%c~)w1G)xwqjM29T@S#?g1VDdNGC7jHvq
zERb-|#WublPV(o|Y+9@keb)HW*{Z+vDsqBGkUdSq65d&m5L8Kv5j$1`TADnLpR3@L
z?3bc%dI%4edcSPsB|Y#x_#<V{ee4b|HhXTgfba~D&l34D3P*>**OXs<yB0Zo&Uyj!
z*dH%%Hs3L}a69ePeLyUru>ZAm4OK?(X^NH3UngP(bM7oWyBx%R3Qrqct{yuO|K}K}
zPq(~iseaS6v#TAW%yd*W$`7sd9EEif6VGlR-3x51%{5XR@M60BWb%D<M_KKMDoL!r
zgoCn^`&@c?@e7=5A@`Ytt}>3+z;#tf#46a!FxT>H#PiwdkD+SOK1Gl}QSQg_v;UB^
zs{KvsUQ2K{BHIz#C1Ak?me{;e4_uwQr_D&srD)%=Gp|9r;pOnaBr`J0Zqn#|s7A43
zwP6ZVY-)y1b~)Swl(Zhn)iKHY+2uc)?Qcowi=1>+GBtWB;TwQ#E}JRs0|obOER!i9
z(J72E$>q~e$~5!wky)bAj%bIQ8Kc}6NO4c=0|qTq<=eq80Zy7UD&+Q-d9oa!uIs$l
zR%%n?H)K{ysM+d`jH3KYzvm+T_J`id$h-cv8KJFsMguDW7P9>12MhV$U?Z`zcfCR)
z4-|T`7?TS`_AL8!E~AW(Gh`29In$AGMO(oQVj*Emam~CXuC6c^oy4xIM_n{8s}$AQ
z9!aF{ErqQ0kSoWi;hZ1yt1`}r_-<h}4?%u9J(Ar@-6-|^uJ&N@NEgbyn>78==Svat
zF2t~-snWS<5(RBBH&?KUwAkGr^Q)*VQ~qD@)s>1BCS{mpkq=s_sml1pYaOMUdnnWu
zk>47n^Pg@&ib%kxq(Gn>DeOsOUFQU_ZUktBD+GFymSJjAQIM@;AzZuu`|9aq9ZI`J
zxaD!W=Pr1_32Ad;eeaLhpAF%P;DjlqcL6@kwyjx(Jt2PQ%tJ1pES4oVdAjBVt}6yG
z=QX$*C1gNsydYr!38+P{VzLL7bgF79EQ8Qug7e>O7n)`?BY(w&ov9$rOYXP)J>w@9
z{yl$ujM?#<bJ<Xv{TNV?V9hrgKx7k{V2GCfTZG#_{P@JK?g7KU@21$T{-v8p9-5E=
zG7QN4dCeH*M7g{g!+*<u2Eh=hL#5R(T0w!uODe1jN{2}hvea%fwFpsWcTxCrXu>!F
z?vjKNd()-jp$i7&28`s%j%KEqfETl>&t)&NH!JMlpLIE|$i6YteLOrm{Yrjza}JK3
zr2YNT+Lz&}h}p`q`%|u~H}Ns@S`ySfjrYjypb(&N9}r{HT<N&bq;_}U7}Y9PMCOEH
z<aX=<!kSciN{rqru{C+QQJ~>)C?=e>*p-;}#x>NVwZMs;_Q+uU+PmMdHgTyWyW)&5
zDO9C%Y`}K_U?ht2{v3AX)pB%(**b4ecGuNb7z}uN;8!|{@f_H<P6GxuhqQ1`^o{(K
zt1`2_BK1;=bE8}1&JP&F@5*{DiT4}n*8Z!bn8Ie0{REs;BR;3dWCO97kHemrljx*N
zix!*-*ia-{)QY@LAm9jV0|RcC(sgy>UiPXa?;Aw8swkp|L$d5-JJ|Kn#Jfh=%R6Jw
z#d9%=`Sapo5hW2GeS|Ui!tV{#YD(zwa#ub6V*MnjvDPdXo>5uoof5y|5;&yT+|)F>
z$UHTv+@-xa$7MP}UvS{vkt*^JO~<`KZ^@qPXhMvlzftr<#P5rx=7sSF@Tt}MMnnXu
zJ|BuPM7L;%+2|r3$m$?*<TeXyDNT=BuTLKlQ|wEs4$o%f^#WluMxTD8aa_LltMha-
za<h;8A^e!51+ewT#8DU)n*CD~D&SaP#nqY@lG7Aqrbh}Zt9|L>e1{bkLe0iGT&y#_
zxgXA);eDc<{wZja!yMbP(llMTSvwxo7LnOUlsznPE@-;#NzOYx8*AJPBd_%bc9*`q
z^x5mpceJJZ)?;jG%j$XM=T$z1e*p)9y1QNb#0{Gu=Zb9n0zy~nmUO|MPGp#e6AIEK
zRvGp5pnh6Pfc>mz%W;!KB`f!QT!702$U*+0Q<rLC4h;yKpz?w+&aLA7=(PLeRyZu?
z#2gpA@77z94HGo!g@1qIm_#4LK6MxPlY{==oxe-mrLMUo9KdX`?jlqzzje<bXc~H#
zWzxuVD01`kAaG=GLJ3+!+>JFKj(Y-_;l7%$y5eM$!!eWZEA^@KlN;O;9N0vF`k&QK
zP+w!M6QT}M>DY}vSPg7^4f+HdbaK?>xfqUPdNz0S^Aq2oQzoDEUYd=LyP-*&5iEut
zO?_r3{*b`d)5?~K;qFua*JcZ5q38SQO)DNYFXm5t#l#I+0{5=DepqyVXv=**@HEmo
z_?JSrWKUiTo$`RnwEqTmOhm%#X5@GFd9=W$^tBF$#n8GKm*v*!FVHFZ1ky9}8=2<p
zH*@-3f}Ah8pmvQwCwuP_w;MCv)s8VM$^7L={<<d8l9*GWDYXT&Fx<WJ8f^&85S^Gh
z+-oVv;vs=yWo-Tov8O%p)++kla%`NQ1$@F{>D+N=TFai~>O*VYuRhe8Yvzi~;P1HD
zE3p-zzrWPwhZEAPfh6*-k7eIZD-Mp}-BPLz(#o}any#l4A}n2$+1Fc4AXzpKPK<+>
znpY0*gFVV+6|HJbWql$y*(o0ZUflL9@%$kbHzqR^H?jcwLw?Y`ggpSrFWt{m*Sx6(
zwkEt=`rHxT@bC~6<eS0I0<*wjHYc+$tsEyYTsR(xcub#S{Ukdroq5VdlSwy4z>w25
zy4A8Wy+K5C^J~vS=I%c)k?0iE0D2GNWo_<D=J#;(HPm_2HJuG&tSv8J2&{GM(?2QM
z5`$WSxd|3Pg&q?w1B;rrtQwzp%^gfP3ljz|?G{1nLlE1UPhj(+7Q$!lG!79qIZ^RI
z_ysk|B2-L_7uWzi#TumY-;v8G92gb3yx(9on}N9_@*ho;p03+u?rN*YthxR}jiH;k
ze0EYeZcJ(B&uk8#*t>qKHC%WG6L3+u@x0>gry4OAf!`$@DtsXBn$BL2e#Alt-eLu1
z*6c{06}Xl@Vz8(?nn!rD)w*i30BynvV*~ocvki?4!ngb$&)#0Ezcv9!c(S`TN`7F4
zi(D~K?rVK>Qbhg$?A}>$Q;x?f8ZY&0iD&;uv%wxay9#`o^BUX&Ye9yO_%5c($@Ue^
z4H#hB{IuO57;*mJnOi{LF$kxcLjH4}t;}`)K^Kp4t^h0Ol0+5n9>YlV$lN@a`r|S!
z(JuZ0uXpv}+qmx?(>8#NT>G4+)?9p;Kv++JlJOCCgn&jDO+b%~y`I)Ki>0NOK~2FP
z-!pe-r$&f>Z?XMOk++Q#c!b$Mbl?PaR`g?DPP#F{td(S*&=IBi4W<Bwxf!|X>A9ys
zJyZ=ubmdQyknoemEnmwK9ghbTR_dzo93wvhgEf)|ampluQB2_w(++JHSO0i^M@Qf;
zP-FwU<qQ<4uYXkaMpIDacx}--?^6Zc_=_9+2ZFYRumCPo(<GI@rNywu>RRySmQ)rv
zeq1NpudylUq(Lbv{tO>BE=@t61RA8b|3`BR+r}h!xD~&32a}ZA7&f2*xu&20;#tG*
zqrFYa!)hkf#?5rC8-yX|&wo19>_B2j<CY2O^2LG>Y%o5#RcLTv>J&UOkwJ(3x~fJV
z21cGn&hvV?+%*Hh5(pZ?W=%jpo&)5BYySOve~*Log4&#J-@#9GseTuu;C@%(Z+J+5
zwxCHQ9i3?;HQB~gK1Nq2H1Eh5yOO4mw{|H6?8OGOJ79hbqi7nl0i}YVy>kWYdl)!I
zF(cSFLS|7@NbNVnDL4~=sw=rJBdx6@*vn+Bg5ONL(OId|1LMW}>))IG#~(U<!`E3H
zTk_HXQG}#<x&=%!BIbR@95AZC43yO-E+WS0#~-$0SmY00+tChX2OKx1^aVIo)+PzK
zYC<m$65uA~c_)vAAx>e%p_}nD7}bf}A)ePU>pqf+KBCQ;Q*5>HT|Rv6aYn`Y^FSpD
zZTe%;hm)kpAl5GgU)TpU$WXUe<8{8Bq4>raM%dt|Ap{xX+XpZ+7}l@>zVCzZ)1@B<
z%w`+FM^_d`k%ta(2-c172NhFkhKZRtNUO~&T-7I?Yp~nM#!sv@)1F!2Q7XGp*+1_w
znP*HpnnvRBlf4&7L(+9;=2<`T0wBnBf*{l!l|OX*me)W+*AT22;8o65a!ZE2g9Kq-
zF0;$d<vJX}D52zvP;*Z)=gK^ESt=sjOlLkO08Nl#89-Ln7W)#m!f5y;=EzL~yeiSf
zEy)ZEH<}X)Ukg)8%2h2uw0_4h1-`KBOPI}b5zr3-6?w!_yU-<yX(Rn2y<_~~s!K~P
zQM)Rg5FzkVnf|R`s4^K=DsZT_J@YBc$_O={`mHrY!4w&X#>sMB7YVdOCCuw}-))-3
zFh8$N7MrilWRL-m^sYN5YC|*Co@Cp^aFAZ_$ec-CPc_*wPHxpQ|Aseyrn&ZW;qz}R
zZ9`RJZi6M%%w$g4z8IGxAf3uj-x!nC65IEUjlJa7EKiO6&UiQNRb4|YpvpvCE2(NX
z_KHcUR`9F5zBfsDW`DTP7ko1nSLr>tdSg1amFwK_IOo}DrS8((W)<IecX60h>aYEE
zjeIBAGjsLRa$xnN8}joOZ|tqUx$~bSoQ!8Ndym~!r&RUj2c(n;xbrZLR%f%Ckvhkc
z7ZOqVDL`VG%7S%X+Uf!~Z<nQs{G3E}r=vVwT0-!o*OpYdE_FrOL77Kz-?@S5*UwBA
zGzUkoN9fM*M(5u=_PgaltHZQfBgoeJXq2>Zy0I6<n0aYCqRHr9Rfpa682kFsLFObd
ztjm1pp8iD>(_Cy)4)R?`rQHYQ#Hd}-d}jU#8a^Em6-J%Y&#cGALjI#^4`kF~m2o>s
z;YS^>Zfz)x!SootY+QgEw+qAazu3q}06T;Gsq=_9We-<70_Gp`V^-WKrX9I{E$PZ4
zV?5#@*5g;0yX}KWuB3An%AkCEqj??M_}n8lZdpjo>yng?m7<6eVUyCSfj^i5S|?A_
zO5S41l*pjG%RQ8>#GOCzbj*9yWEH{W;7I(eVdAh%KT`(<Ys^|2>#`Z|@8n@3Vw#;O
zc}B+DTOXyV(J3Y`-e8l<{!;KaKx7xHQ%*a`N#cs9|AwIwM$zZfm6PjHf-d14J*q4Y
zl+U(M)9wXn6p&J*%#~}x-UP7{Qm|d(FQS+g+%iXUH5El!dJ80d(Td6>Qp)$PzN31u
zLaAqTG3FtMgCzbCbLvlY;lS$_WyjbnRQ@;pvI7LXxq99jw_S7CR*#ptt#<A5PiCMs
z2f-zi>jBrSb@r3QpFDhdmoE`Sn5O8=1vqHpy;4@e%leE_^(YpYX!RxKhpO~K!@7{I
z)JeznRLfeTXr+r*IjQ<|K(??7?>T(<$+u&mb_>}5=|oKNRN0`%PC{9ia#w&k-&PP`
zmhih?oD1ML8=eg}Qvoj2B*A8tTj8E3%NqF(AV$xMyRQ?=Bi87LEj78fU&1;7PrmCg
zm#6h}Rm{T_?!o;79;N-su=Tw)2t5k!lS+U){?8=0PgK5F#YopmnpjriIo5EFFw6r<
z<tbLd0UCzU6#05R=ET}>@<1Ui1GkYR7-TcE6aMS}TeP~l7%Tj7ienD=Ee!-@LGdWC
z9GGeKRjJOcWDjCF-z1m_nj!VDoZr36K;?Xfc`sGw<n?yoHpYY&_rwxIq)<0fs4wrO
zjG!Kqcdlcd58UI)1@B}CTf8bq&%^9F^}nng9R6j$o;5z}jE4$u-LAKd$PaJtnD47#
zjpCkFyA6>uW9w-Co>U3=q7s;Qzo6>?a3Hp#w$MRr;YoutCB!W&$Z-zx3}!+jU3XEz
zs~`eP;TDHkwCHdA7|96x;$n$I$7uI#j(6R@<PAaDZ(DU%oF;g?`3Q`Azd5#^e{;uV
z&O`h#=S6}MWk6#NnpV1!c3WX7V^dtzC8K%%oLPc0>3jp@N)a4P2AQZ$ROf;-?wTUc
zdhJMw{s5p(&W0Td8PFfrbC%kXS_DUA*~(jV(HYH7u}~#;{l}Rf4$t*J*%3xfxL=^u
zUF1QQeaEi?%;FIsMnr6<QE>zRc2#wrvVIi8P0?zpeJEifEUI(5ePUGqMsj6apDM|H
zXK~_?D;NE<a=B7=_NVii5xPC!nX{&Oluusoj<Oz8v!$@cQB?DXpp>#*)8YlaO3~R#
zvz80p1C;Nt?JxNZ51ywS-)4vP*R0nSL7mM(L9INsPcG_sFb^UsjJ|Xih>PT@*>PwT
zwZyeYDmJ{Q_(wz%p~A;Ec!)FayOm=sa)jTVH8nlu^VPX6OYU-x<-nm1&*%b1$^5+r
zR()_>o0TjyOvm_w6>%~Z(LU;Ckm&#?#tPpu-IQ4-YiAEM%A?E<KfAv$T7UzPkrw#>
zhpYDtXS)HzzO_2(QniY@ThyktSGlXTD{2$Osu2;J*zVS<-6FJRm57z3B{4#%T@)ok
z1+ljfd+U>D9M5|^@0Wc4=g7FO>paijSq3Z+M{I`JPWWk6y?E5eos@=Dc|IWCFwT>-
z_r|j*^kZ$NDO5~Pwd(x9G;M4Q)H|df_$901NM7ZpvY&U^ZCAx+EDGio^K3GGlk0Ki
zdfvk5xDJ46`>IolUkk(c@t<9G@YJh9^G450@=zrwsDRPIV2{m>F+ICypd5pez0o3A
zO&JzVYAle(tz#9SG(FmL&Yz4sLJ}XeedVVE9tIZqo1~uJn*!xnHH={!#)_%^!DgNq
zSTm>_3K5gtXgu+gU*dhQk@h6;+mS9=WBx((&0>2P1Jvy3-9@X0vJSVYb`*qwx3c>z
zVWNwJDlgA1nR-?Gtk;f~Lwjo~tf|4+i~wry#S2F|%h!JnsCfV39N_b3P)`2Bf&Qc5
zUx44O$+m46v%{rRre^T2d30QWJFcOMfG@#~S~TIucEDT^>zVI$zB2Ew=x}oKr+8nR
zf%*NM0IX7x<v>VrnmOR)aV@MVPhn8WDwoYR^H@bi{n>GaI}S?e>8sejza`}J^OAkO
zwovR(A$9+`tK9E@d`KfrUP`l*2&}1X%I&Ka#Gx@o(CT-^wN1wqO5!>eQ?yoL>$D%>
zXfO`D^{IvP1|h3S=jxYv*Wt9@F@OSY3QioeD&A{Cuad_p_@>I8a(RqP+Nc&`>=`wj
z{S0O;yQ`I;nXmivoW^-%Xp=<Kd4T-brD0JvWMJEr-J{TePi=?qO?kn}aoI|OYVq`+
zX7Rke^NmEP{A3_mp8tKPg4Xk^Y0+WH-CVqZtVtU@tR&qQ)O4=-hVJf^{x&wRw#s63
zKwr{B$jCG>n<ODn(1;qG3|(=<EkK+JiQl!*zQtXSrq@-ad6J&2hJ1Jt>oA_gV%?cV
zPNL5zw{oitBR2>p*1Oe#?yAI`lw`kFpsjmeW}k{&wSgCDQn4B{wAbw*c9{mg^Pxhq
z+kdI(?Yp(nOACbYkDz@<S-6UE`(#I$oJ25!0z1sguYyFRsH>#(ijW943gUtl{!PCy
zBPzZGEa};L=EqHm&jQJBlB0@zD=V1_Y%ctc`j|?S+cS0LLMX~9>V{S&;v*G26Cs#9
zlvEb#daH5t?2N!(Qju%bq<)SzRD87jL_Vzevz}i3BOYgg*F9RHbjoha9OhpwKU>kK
z1q}iSjz%CpqQOc9H#!Jf5_+a*n12_pgu$`T367h&Y%Z;#OUwOZ-`0YOoQXU+R)E9n
z{XwgP1saSucE^8OT55Cm-c&H~T1UVDh5?Z{z7rVXY?22biRCx5<}fg=szMn~R<Pmr
zZANg^jU!}Wa>MOApg&bDAu0cb6}pITDLi}h>0<Bd1wrjscdldfuV>Fb=`*R!26T%i
z_vF~lSx^>;kb21~UBSM^+3j3;K|M6PQ9+(;p(6cw2PbDi(Dbgi&08ZxQ(Oac?Do9<
zla%LIl}=KwjQnNj(w$qFliSUBIt)Qz1PXLLj=>W7%^!CHXlBc{bvv5s3D2H64nZ&h
zP5Ap}e|%yX?lI6cZoa=G|HmW5#4x)#?I!j3`VSXrp^~vDT}|3TS8E+=FDYO&!gjxm
z6SBk_-20wMZlbfP{(B?mo5`SCdEhZC_23Y->{29@Ep5=t3rAb(8u(M&63KSCAM+JV
zcFwO*(SOzn_pTyaPE5WE>)zN!X$?42Vj_Ky6t3UxLrf~sqvSW*Up1RW_kXL)e%<1l
zfp}kZxmTNuv&>OM(@L0>l2ZAoX{Z1jm=HFoCw=5$ZkGCkUP1FuW$AGCV#R;C^}-hL
zjk8=_jB;{Kt}6%NxI=?l@Ht=b?7sk%+N!=%dMDZck&C*s0iX8P%`ZGNg@l@>p+aGw
z=wRrLI{sf^zRi<+L6C<XFLG`sw*eBXlNAyWniaeL6~>8+kkHQFBA_0~*;BgziLXzm
zGrY{vG1pn{T|3ElrSE=SaKEHvpKzJh@qx!m<JFUkwuYh1k*(^Lc36+%Nyv(q<Vb%c
z3~Ai~MgS!lo;H0M?NHLT3jP>=`uKMAw$6!%t&QZPO52q-vgr3z$v%^0xGo(f8+s~F
zR+sy<G~raBXK7QxoYZDq(4hRWNL(cp8ItZ<Y){1dR$R(V0i18HT&1tBV=`Ja9IA8_
zq*!E|`Zx<I|K{_5C$-sGJsSDRY;`}lHFWAGZN>)Pr&Q(flq5tu@wwj6G;@3IK}g}X
z3dI+%ZX-?;_A}<sxlY#D@@JimO%9MTg=%0u!$@d+V`X!;EPQXINUfeIn=GSM6$cpd
zuqUqD+h}@0I8&rFsPAs1JwOR04XJObY(sWlnTs|_Vz)TUNn#_Vqq3_#+r+N~Dz9w9
zLIUw6Jaw4%Nmc&pZNr;UZN#?M$a{r)8KzF2RPs*xiAh$|f#-^hqp%Z0pYSiYAW*C;
z_5RXf=+8NBixQslQJ#bO*mU=UNt0w;My>6z6WDfj9H?Exq8q9gzpaVlIBkTd&zm<a
z*g?m2_Pi!Dj2o+!VEjHj&>1s}7)>6i83O~O|I(sIT%C5Bh^B7ETKG7uA;h+pFp<Be
zS^mh2te>xMvGgW0Gs&q+-?JdmH}R5d)JhAF>+%I^_sYcE?21}?<tKuQNW<+m2aNXS
zsF@*}2L>lsFc9_p+{*O`(A7&J$4P!~mzC$<X#;T)Ia5djXk;~EgAOSz?YR#OteMQx
zTq6Q9!>xE?zUi3t%>U=}mUvZlXEb%->SBcvNy0*VX$6KlGdmdi*>7!UL8c1e?dEGn
zYx?I|UWjj3>NR;(2AMH3>~?)ITcvKlT_fk_I&g@n>7XbBqfp97HR0UolZjt;w3CP)
zP@upLKVn^PyFc`c>fa}-!bX|b_5E=rJ@=pUdHXXG@AZY8rVAGvZL}Vn<>+md6q8dU
zm0*$Yn$w=0KEA~n@QW(i2Q;bqXxGc!jcp*<)P~{tf%SmjUoQSXO6skM{SMUU4U~mH
zG+mW37q-I>ieub-Oo`$bdej7w?#+AP0bu7q5`ZG=u*Mm_&`Gb^gmH6B;DE`^1UfQQ
ztd{~bE~Qic`lg!OB%sxvl!Jb|`)Zfnn%V4AK|_*7w9$56dA3yDEdd|)D5H1MAPLs?
zZzUrH;^#lb9RK0=GX?}Qhd9YP1v~j7Gd#@+Zk-tAmc<PI^WIMy%D632PP8>18;n|C
z+8TuURHetC2*pxF7Q+~ggU!{VcbaZ@jKE(-=!zgrt^&=Ic2rX;(ck`baO<JDwr6}+
zS&~ECeWC}lo4CU~wz66y?@iF#irI5~idSpI`G;jIM!^zZ(6-BQrbC!_pX9{2EEaLY
z>j&@~&`o|TqZz|~UrEV<y4%wE7I{P2#U({Xq{U1=*2|v24+U*vo&B5%VIzm49@7z|
z&Im5r6yLn5Vo}r<*6&6$Tk!POVUVzSn|byAC!Pqd0rK?LYjF7=9z!9S1XNt^N)MaR
zvBFFw`*M>c(UBwz!PeCVn21|vQQ6ZTd>`PP@b(VWxvtF<`wwsPP`z5{fBoJS9PcX2
zi9Si?Cino_P?}+8B`ib1r|f~-xS2mXlE*by{j>MJd<mm;TYs{pZYf}B-)uHN9U<RL
z6K=B7TqdpiPcMD^xsUNtS;ZNej+(SHF(4w5R_>Xb;7BOj@NJTy&Gh~o4^M607lr1$
zsR2{B<1sO%^n8ULToe2Cz}AFrnIY*~4==UPJo(dOT$oVAG>m9;!FVY)CrDF;agu#}
zN~Z>gVy+mlLZFnR+!=d-gkaK`a&l7qF9mrSboY&#rNsm{pxCER7Zvs}zk?!^1T_ds
zre%Ao##pnUl{j=k2h-r7CSV_1Ur6%B`E73qPJ4m1ylxuIJ;66SlC~@ZYv3Zqd75nD
z7Mm)aFt^jv!zr3Fr$kED?9avFFm=M*?=Y`>{3jM|J3m)HT0&OuTzNICUZeAFXysoH
zpB8U+i?b==em9qlp@d{{13E--@xb+gH9kXNKI6-QPlJz_0q;PI3{L|`x<$aF1yMlQ
z_p+Y3^1%7Z%Co$e@EV&wdGvVv5q#?^-NSm$wc5%2Cvy(a-~v@QshuE4lwDK0q9E9q
z^Jz|<@b>c?Kv7?GJom_g4?#}>c(S{(ep=8_oVS}B49d)SgPvQN-oifJ9Tq*oJvy6;
zF{QKZQ!;(v=iJ+6ww*@l2lWuhpN)Tat{c}KrMzhGI<yUcdr4mBfM1TN&vO4`m0`9-
znHWMbFll$(R!v}pI_Br_zRj%g**(|mlapetKId-w{^RCg1hf)uRsYuIC`5Ut?Z!oK
zMSpwWq^HAv!@NISP=kIJa7;z&%Uq95b_~WYPP0~iNltVqy5Nzgl&%J_8085}7;!hR
zDWCLjZm@~@{8hgzjl6$Hv!v|S@W%^t-5G2m0^*86&bJ}*zE0-dXjLtySUF+<5ACpA
z!7{8hsb0)@)J%-R8l7{nY_|U?qzve7NO@*Mr*N>C?%zSiw_4czsYEw7Yar4NrqfN3
z#_VCA5=>$rrwo>o3Z)90T?fcnvs3HMnCoF8;a+|-P_22x%0p)JY-gH6yt7}Eb)NDo
z1BG{|+KOBpBR)AQ@z*$fYzmp0KX2Jxa;Uzp{OSErub&85U#94`E8K-O^2Sqdn*qrV
zgOxr>57qHTft-2umoq6csHk@-4+5&`WikQ2$3si~(yRKjWiMu|@|m(506CWBKFwI9
z;HLiZ!?PT^X|Fkk51S9KZL^?$37YM^oZhZ}3wPvDyY)EZ<U(5LOOtsB{yDVurk!5e
z>=owt5%TM;(V5Rpx}WBQ=l~-gIX$*VXJc|3ril^jD}5lD=*)7XK-lU=L?3+d&y6|e
z8z4p&06}CWs5wMhpAtPw6$U8K&lg&{%bQNC?%Res|8KNOn4x(82=nWT?xPpny<(L<
zot2vcNq1#ma(|xu6Z6vSQuTMHFK<334P_KvtJg(vmduaq3gDo0ja)EryteFhbh+M>
zXd9%GaOYEiaYiwfvr|M#&j6d@n41%*JITKzwO>T{tYn-@9m-rm7KW*18E*Y$NTR;n
zKTxO$UN}@}8-1EwIeMIHv&Lb*&aE5)Z+f=2U^4|=NH~}31W7t8@hboE$Q%0g{r6Jz
zmUNC)1UuM@qHklXqqj0tJgMf~ZCt4~y5IFSQlm+EOR|Ppe)c%1pJk?eBiWtPB5r9`
zbvBv|c6eff^*v_$O8Wlku+=7QKjR6hGWRb7kJP%ln-uw={Y6(ZYol(Z8;Bi$Yb1N5
zf+5Pdz%VD69dwyi`Bp;K;OXG$_cDsLhCnI=zl!dYjuc%wHOuo+lP=+BNx&%e(2hw#
z-M(#~DJiTsgF2JNl!WWF;oBYE=7fdwl$ob$EaAM{UVA2Qw_nxD4G1fiAGz3Bky?y9
zh*ie}_m+Mywz~w8gm+ZgA7Cehg39cjIEe+51@uGT`r!Wi$Qian?U_=46wj*yT3XP6
z(N0N<^?w)j1IkyoZ{r~3GNwSwaV5)d_m~v}%R?Hue^}MtbqvpEye+4xe^&9dy)5)T
zB!^;L7Pv|x414cpr-4O;1^tffI-2jFCEHOq?^Kt6j(syDmqr`8NkkgoD<IrZW0XR=
zbD3X|CzOa{`9%x3;UTDk$|>e^<{;K-^hdc}EftjWmnqa?o7MK+eSe!QorwaWSKmPK
zU^fnL@?-Ekqu1Ujv$wovOOS}8s9rjT$up>NfMb0|;T}ft*V!+*OO%W<>=F`7$KFjp
zP}&kp2-Eu|GE?sNyB7|!sgCm{8CwGBpCJU>$uCcl@odZN(|+X>;h`vZIk~?K??am7
zK?X7{js^<)j*dC))%@DQzDd_2*F(W3XS}yHo9pdlKJ3%bVLe+iO$wbZVSgTQ8MjWK
zGktDcsWY!&mJ?psbxAAYKz%pMlJAELQ)HB-(JZ~&nm4@e>Tynl*z8v6nV1yn>035-
zV!6$V^Dxn0jyzH36$fSPrw?glP7CVYq7I%R*#i#N%*_2`ZKYt4m7kbubQ)3Tm*IgW
zRIY&De6cpp7Qa-eQ8@{OyMuxnZIGLKVS6?u^^+L$qK45-X~^W_>@ArxBfRW{J0RCg
z)D&FW#vBvPAr9`$Qv-rEC{$&igp_!~AXVO69v{XG1v9nX-8breu)eM<-c>L3$KGPi
zQk9+Kg699kz-VQYN>Q$o)wwv|^tH!RJwH#!S>iE?6{3F<(#njj>z`%0RS2Z5XH5lK
zql^BJD~54ylPt48eaLzuKW<}nSkNvutIqxYb7cfykg<-O6pNuGcklVoxX>Rgv$?bA
zC=l0vm22A}Tk{VkpH;Md)`lC9thB}jkoGI1D~fswWhQ2XqRXN6vV;xM!&TGz4E0dO
zFq<m9JOu~_Vs`QkPxxA2vi~};{!r{h<9u?8k8UrG_w>K!<$UNsqpbXTdMMSJmG$Vv
z@@*aStVHnLDPvMn*?Cp#m<N{7-f7>!#M}IOX5%u(B(u1z_a>KwJ1JFO^lJ$2GIzP?
zq(~^)ZX9qL*W%h}7u_m?H0JF2(BQvP%fri#C1^h;V6#FhvT(w3{lU#`>SN8eBgT}q
z*Ky}8REd4Oz8`~f($=0B*irt7sqMqgwci0O)py1AZ&bw5luY^8S2}Nzdw?T(d(iHw
z^c-8h#_<M-Rs>&rShxkR@#|#mcrdoepw6KX`UnkQ6~JQc40tT+rSM^wX_&VBJMj*`
zcYhy5KQb(vxSYN3GOv`j@O{3*@1!(PYWOXveYG{iPNI-4aF2u@ILgKy5K*c9KAe>x
zC$k=$u7O|ZH-i(!wF8M-aQ9|8=m0n7_VHgvn~!42bul{EKJ~+@{pW@mmyyh%@K>)8
zh-a+Lje8LQvC)?pPpa)URiBv?qS7F+j@dX~rZXp-8}ULAHaRagFy8n`*nHLdx<16=
z6RC<VtZV-1#`Vd@i%dT7a!zH^c)IaBJizZT`+Ri}qljcr2koOZP)P_q)kJs?Swn3v
zo4h$;;3Ysg`=@ej2_K6qTf5OZ`+~Q@Tf!ceq@Wsa8MZCF{DaDT@E?PSoq67U#ZY0f
zZuJyhT@)3vBUUjfkEZUUkHW^xD&9kTJf9LW9I7`%^=-$kHQm-T5wcPIOc#a!*=KMq
zS;yR{YkOOt@BtRqDUW`WGx~Y`*u6OmLOF(bu2|=bde@8ujt+DahN{kYyk^#mB$sC|
zJMy`GNKY#L6CKv(W%PqD$obQ@)qANI2Aghz<O4|GUPB}^0b6T!FW0d^6gKE*g+O+1
zV5;F%)Oc7Cf4!7!l1|ID|Jb?}m4w{7ZEoEzy9y59P6G;$*#(2kEtWTtp|Z!Tgpq~Q
zZQsB{e)6bASf)jD`f@huZAM;iQqpB|9RsYTXZoGC$%jb1bm{OHcuu%=gPnxjAY$>%
zEH`>7z?rD8DQP-o1+E~v9;gUDE|uExj!Ms`|4kl<?6_`}uJtoZJmUs70H<hY<|Cx0
z1Hd+%z48FK1_7@i4!kTWYN07F?t8W{r`Y62-)xEEIgR>aOR!{k-UE-9r}~dWUSg@P
zP<86!NH#SPavgXa`kh{sY5|<Y9stqS3K#=DnMv8?_bKbXimf~U$&5eLzcUc?Pz&~E
zW9W|2<QNW}6#{F(q0<Jz;Zn}Mx5`7r#@xuZ$_k-&+EeD}O<IN^+^PF>RFChkTk?05
z1Vk3TeOe7<t=q}GSst?w5COBJ=NmB_SWM^{M&G8#*l++<5b8$hG50*_P|LgZ!g9+1
z^)(H()YtO?U0G56P9VMM)-{$Zj71F6y^Y{n=LoY&F(4HJX*eZAB3xWId!f*p<1%a8
zN_1AZg2mtiFSgP6Fh~eFFwxC3torkYRb!rfi_BYr9oB_pq=4iSyvmGMM+DeA_BtVB
zF)cHHzbea&vls)BCya~+;xh0vTI6BC$oahh0z9Xw+wQ0!d$GY47o<Hy$(m`EDp4mN
zZ%h(bIfN_Nik|d_!v&tA%30v7uV`HVo{XyKjefRfQ?9DQN9TRP`KB3^q@^koj<r~`
z$Lc0!puo|Wgr&=0-H(;iJ#U2vxIbC`x_(5s^PgX)`5DExhpGJF(c?ReT&pFU>*k?N
zZ*Vzjr)cw<0ZNFruF^BYLV4!F{5~#pI3+>K0)^ENM`xa|86AKqbflYp=iSll)|VUF
zuZf*z8b#d!zW?o}+}lPAM&J3@Sll1M^HG>F%vOu+=R3Ah1Srr9Jen=us9mbu19zD}
z*a-oR+XWAgx^+#`&$i*&;YXDj9-WEz-U({wPd~g195dv|xnY0D{4awkQw`q#Qe<WA
zH<p|ACgM}Uj>di#_yjNAVbEImGx+U<N;Dc_)MJRXKB$cjFTi6YO$k4{MV|2~;7SVw
z<y;8lurlDpfEjyr@kg;&BSi9#AHmn0aWrai)CT%r2+DV!?a2Jepw5}8OH6=J_b-IY
z(E@nZHQc)Tz<Z1Icz#ZBaM^SrGSEENc<}LDoN-NqrXCi*jg+N^iai|=NPfft2AUa+
zY^t)zP58{k5qFak4MZpJy_1q-*EZuiif3xYI!-`)Bg3_PO(&XHsNwF8LX^CFMBJ1_
z<*R@Uo4ken;I9aQ%q*n#G=eumIYqu|iuz_^)qKB9JpYZ4Ksj*HPR)Wf#n(&jqEo}L
z=|!urRS;VV?VksN*X8I)5JW&<uQPLp(53x(1WMTuqF0pXicjzy5;PMlMF!>_IZIJg
z#>z|&SFKju6isT9e{M_N4Kt~sBha)5hwW^%Kh5CnAK{`@q1c2w2|j1^8A0Hr%3Vgr
zfxyJcA*O_56I0-*M2u|v8g#o+sXR62@4mk)ohPADP-Q{%h`TI_G#)35;I$!*Du@&v
zs&_`RNU06_OWqyg%eRgnlR%*!iMKa2#z1A~nVv(Js3}(#V__1_dWX4^S46k<Obj%h
zf`(Aq&c)!4dH@wnYXOnnT60B5J!Q+v6lIqb+y;>Wq5Z{JVqi{2LAcQdnIo!^Z@SLu
zP3%qgvxIVrt)>h(b}lzOq5bl(+)CSRc(J8NVp#W`Ws1Qoa~aP0`h5009P*Z(*Uz*O
z9HiXRGBQBKbQDfr$zwsr6}lYxm{|`QCt3ywVd@R%+Nu8JB;}IXBf)WPXEY@1qj=|@
zByV7g*F7T}NHX&cfxvyAB>;_zo<#cHgWpLyZ42=AoADk-xZcu)iRc+4V~r>4O<|~{
zGCYnNK7?0hQw2P(42EkPo!*<S|1O;}k}P*pDK5c#zq*F%Hem2OjPdffkCH5$l7a-H
z><Q}ott(4gQt>5i>I_^GGXVp?-Y0S1r)>qdBVE}y1$zMmH;`|2l_k$56Vh&(B3-;c
z=`NzQ%&&-OeKpn_s?bOV08K~o_f?+c=!pmhYp^crzeM;Adtg8D<C5-S6Aj*%#sDWP
zcZF?EOEW`Y8=G<sU%fx}1+oaHLpr>aznLGc{wc$!RhO>yskszaI0^Ail#c*RzxfZ1
z7$JVm-E!+MzMq~t^t$D_hD3Qp=Pd~(6S(*#!79^l2S__!;Obz26K8)II#0A*Psr?M
z(#Hwtc#zzcXyv$AO5`2fcdjY6q(xq-R=l5m41R>t%EnypzcQ({`MBiK-I82eq35RV
z1_19L<#|q`G_$8lCH%Hv`^-mPIh1x{@&cc4=3RHlXNzQ4z*efD(jp+8Y}96$el>l@
z8LT++OEB);d{ygknw}D{;JqxAX()KZvvO!Go#QL_NyfMOmVUWNR)utQP)6+4`!4UY
z1K)E$!B&0ILMN%&C3TYCC5ef2oG%O)-L5qG%kb@0!&HkiS^XfF&hQ))C@#_i7%6R8
zl`u*njOvGcz;s9LqpCt9Bxbalo^Cts6T*<H_F{Txf=zPEJ35`hRAw;ocK66*r_V|v
zTjgegw8p(ygJRpk`|`BoL&Mf5*Rp(g1m{0$z2O8)<vaw+@Jz)8Ihe&t-PyU(6GHl7
zUxD9S?fc8{dqB2yK}-lTw3sE4S1Kl)y<m0w-d0$d{`FWZ7ZU@;LHc|e5KD(G(#A!4
z><hWct8(@Cva5rTk&&x>aeo;uo^nj<W|^m{q}~fcRk>^#zSjfURn$_>2Wmy3vOjkY
zG>ajUiov<}Pn*@|Jmypq+eBVS_L^l#uiusvm5@-6Gy<X@8AyfZyK|6C;!ikk1x0?K
z?an3c`~BLn4c>XxN`jOe?BAPQahMCFuFu`i93*|DvTZF*{4e$9l5fYotwtHGw2ns^
zX(DVOC6-t&TTV9Tf3`VU)<et#=)F>U{t#k}yPV^UG2Bwie^mphClj5X^mSyVLjBRo
zaNhs%An59`M)~9lWV;)s`S_%T$`Vfvvzo*r`tNfjiem~itFdA*p2XlUphvn6{wc^1
z>+5fIlK%T(pMD^uX#DwNbME2Jj~>pl;l4vrqu#CVp5pCV$ycfrs(pFS+O7G`^8usY
z+hSzZ9KL#Qj-)Zn?b&bNe|=YsUN(*Xc7(of94mEO5!b8SUm#wtI0md#4?+cN!NQvP
zg7iC@IU=1Kj_CJmqVd*1n^<r&QSCm21AyO*nYs0<>X6mEf$~!Lv&hV(mfd<6w|VEB
zAy0D)Ei(R8`^VwHx}-qsXeN8Ip|b^Z0wC!dRd2LVX>ukn-KS*YTR?c%RltVpu2$rS
zpy0WZyPOk&{8w8~y?_NL>fFx54^@$><+E_bEpx`(J-G`)4?O0sUz_`~vE~2W*A9Qi
zacHp>_rs1WOjT8L+SXZWHol{&0x{7!0$h9>!ScXB#b(HL9EP0i<h0kzf7N@^8|;z{
zA3yGTI4h2PPxI+quq+*+r>2deT1^iOgt~$GTkfkv0MkXns;>_$Yioe_5Y`dF<FY9<
zn_FY42Y*QY#z~e*5fbbBmSUXrv>xQ3pL090t*_&hbw6XO-uzB>2LH+)wcyVGlJqYB
zZ)NW?gZ)|r_t4&-E!Z{d7~_Y8=u7zM{dPJH>3hn*%i0&7w@JOt_Z~4T7+UBYoNs?F
z_kvgJcgO3<$+GfXWj(k~e{TAiY;j1vi(hgNvM;WzPq$&X&9t#}800gW8IHVVg{<(^
zZ9m+Z!>ki$)Z;^pSDF+D&;MjbK-?b43<Z*dqS~^28Tj!;+@+g7PXGMFg>t#OlJ$ji
z>D?Pwt<&2Hb&Rb4MU1j>4Yv`&4m(Nrf<=wf!OtC?9)L|&k$_^?=+b?_(yn$)zMRk1
zON+O;TgMVwd+$tFNAUcTdf&a1tIE3&*XbZY5;wgchy)dU?{XMlDl)&D<jmJQ72nL<
z=tZ*;%xZ^#PocC4n~#O3CWEMa3GKE|10Ph)pjST~OSE>r^eX39OAJ|^LA-U*e|);e
z8iRS8>y)bZbkV5j^Y2v22gQhDC!k1=rCzFQ-#~1R&qBK2>(m2<OFpAFg3<@#r^&;0
zkfcdY`c6xG>;z<c65R2ZAz}MIDLmItR1%V{$klaX$jtR3lWkJO(aG+`=-%y{;n}m+
zSdu?8s=#534Lwr(wEMpgtAkw~xpnq8rnZ(IE}SO4UyCpkK2cur^DnK*0;_;IzQUTt
z21|+;dePvF>74?oM*aiK8{{~J5#KNk_lgY3C6fzEk&Q$HyD{5Y3qx?vs`j!ivkh+e
zt47kL7}J<bcgAekLEZ}f55D-C891qm7oxX32lN95$LZ1T5Hv)A@28(l6&?Iz+>qe8
z?cOH|ML-BDo~ZpC->guGd|-MTOB;-b@4%;1`j+Cf^sW1BVjlT<vQvb;=5Y3=NXYzJ
zYxkIKq~xR*4Eg{~e>0wubM&4rLx1+n-*)+jP3fG+o=t7+A6I@ZO2d$WWX|3=p_b%N
zAYoh~4-6m`byI<4gIc8Zp2Z}}1b2k_>Csvk_wSEgKL4bArfGRf9K%I!^?<EKsQ~%>
zG*T%3*+|Z&q$xRK6G5zBFoix_Kkl~vS`FA=gjErC&zF9<zbv*7;JNX3=Ry}qcRWpf
zOyyq*kseHdz%Bthw6y|N@<_uE9T*@X4OTpUuw~3C;X#4xj4q0f;XTm?K|d*Su8kHd
zMnowsOY1D5{Tg5*w!`Tm&ur;+a)VhFHQ8e-pi}gLSH=zmNTm`_dbJU0qXvOH38|fs
z_Ak?WYp1GFox%OO{X-Ksc0Ih|V*shop1Xs-D0mG?q5NHAffTZV^Jq_CBo%UsslksS
zH{~D#>!fqxmChnG!_Sk6t(1BYQk5saz@OR;+=!jbKk<{TsV1!BtZ|cLK<U}i&61j?
zc(+E}s97y&MSoWIE8t?1$Zq!^pP!u;3oZOnYdaU)FRvk)tt3=4M?E(!HZ@B&D^?)7
z+uO8Z_}(E3alEo}L<SiYAEDke&WXNYY>V0vnNGW<r84i7j}&@@FEX?X+!&{qMw~rJ
zwTm5yg!$!+%LRm@EBr@Y^7pr^t&36jfSL|(rQfOO6uEIt$G!$oZWl@UUhNTcHq65g
zC6XTRo@6&+T=G*U9}%DC0P)bNN^8=wLQ}JItstGnlOr!PVX1b=?f>T8AS>Lti|1yY
z(@)A(#!+<z7j`uNmGP2^FQ`6%9kZ=9aBR(40E4b=P+d2>uUJ<{ia|oOiv7Wxcp#P5
zbDl^IB!{1J)RD&@b51hP@#hDo|2L;}Qm0ne7Y?}1rhk_(GN%$cJikGO{$*%HpW8wM
zI8U3Tu39sVgZpzpR7>j4&{m|-Bxrpyb@TSDsuPCq#gDlOg;&nLV#BeN?1%_mIw)Pt
zw)7|!I(2lsazGv-tW!6}{dMnEzDFmMte#)xdmTKWG{?DS%k(ru$(c>yz*zSv1WeRF
zi0mDp3IX(9XM7#CT-k0ysWn)SECdkA>iL7Gjq{)EB)D3rx$x8PM>mN)GBF4Zo5V<0
z`GqT+2beS>L~kw24(+${J|V}QG9n_HH~<d=#dn_BCaXXJtHYHrrXEEmp3V0sVo4cm
zACu3Ed0ZO24||5g$z>h>#$mtH&ew6^6&$Ii>cqzEn<)gTEFr&l5I%+qX?yvXfq6G;
z$KM;&rpT#seXnyU#9m%U^?3Stj?>FA^Ksrw%tq~$84*P<rq8OV6`40}GcBRIoSR3$
zfCN=cvw<@={I?%{0=e*a^#6KCI!UO5BNGN+8Llpm)1Dw_Ax`x(PguZAx#EuIGu|Aa
zdrcsL8oH4l;$Nlc^T8{P?!OHL<)DnAwiJ485f%JK*yGLKk`UtunSn1q=ji8&OAaC9
zc)VR^K8%13udms26}1*3Z9@FX=c_=VLQM|Z`eex8CUA~Hh2g>+^ZlZWS3>e8kA4WI
z!2(0%(BBi&_c9R8nyzdM-^k0vnjY2uWAbVF$bvOw)OyV!>C0Sx948-4DCOyof6_ZU
z2Gk$g^OmU}-WmxUxAml!;4!jki?P`42IAM|uSSMBnzSkF$9oUoJUe!(y_%x9Mod8#
zgxwE`w6&NzvL;RXc8S(frt<Kf_;G77yP3?!((fWiIng$6qH?$L5}4*|?PkeO>j@$~
zP%8;*i$S{aU|WwImiZ;Es<|~2rBX}kkUWD`O_Ha#z3i1mZx&wh;pA(*qARfHlv{ll
zAe^v}7_y@PldKB2w#e+&L}lr(_2}7*7|6XD(p_PTe7P|WC5(?66mbdtbBl1O4$3Fg
z^#`eXi@h9ulNBz7Iu1WMkB!i`&CEI5%(5$(+W%zDk9p<#&M$Xs{L9|_-b5owvKB~S
zC)7}vrkb(@8|0v>`*ty;O_G?{7=^O?YR0GQpEnV_12*}MZ$cUKa1d1b0e07LXVBIi
zPZ@FRuAw6SGEBQ4laJTQ>nKMytH&QSnPgK%$akD!EED-(Zoz6yFLd``2d2*_`lcm0
z!9<<~TfBsi%HS%q>BhVM_D<>C42P>C24k9pDcF`A)H;1oBEWqKQnK>hvSj^gowsWD
zGxa})ntcCSCgpmIwYSeYgUm@Lb~i$gdPAG@^I^|UCGRS_O`ebh2rH4)@9PaKfqSsu
zM6^@o|6ld`!gxyO1@4?KGv*EZk0jwpX-n#|BdgNfCj*>o4LqNI*c<Yr#TIu|OZtOy
z1r+Edy;4p7ncd^F@233cU|b=8UcGT<dvS+4lxI$bqPQCL0l8-!9id1*zQa6PMN2d1
zF6XWnYnvQyFA+=O+mg@jzS1z$yYAFiUGJ>sTwTMyX^6KH-<bR5*Xo;6z?M5d=E^b+
zVwOfkm{a&znZ?a-A%o<z>ZTf${T8Dv0FnplFQy|@X6<gtSh)n>1P0LRipyM%9w=m^
z&GH;z^X-EMp~_o;!!X;cHw><m{Z}jM_%7MFN3u6N8?upshxsr3W=_^H)yry6t5URR
zqwl{;d~}ZFmMzjG^*p%?hOM*D>`3OxwZ=Y)kO8LNJ9Pfk#u8H|dgM_Kj0tM^-ToYV
zmXVQTx~hVu?RoXli#P%!EscL1hR=;Wk8huUz0cLe9#_*B<j1NgzV^5@vo2R)wEBFg
zmze0&Ia4I03(S{>bNTL^$IwgQre?h21K(#>cQ|3hAxq0UI(K>|=f8ifiGRCvV4ax!
zBdJt!is0?_UdfQn*mMP;Xw!cu0hMKzma2+v{5G+j&@eK*ftQO6(cb2J5ad+g4R~aX
z(F&?88%{%fGL9k1?iqDFw;SuW1r_PtnsNbj)reKNg6DZyCJhTOT~uco@EBe8$#spe
zsIE5D)!YY<A=jE#S0+yTIx1|vs+Z3s0?r`NIJ0ZQjD3whw1F2}aC+^9m)GTmU&fkB
zLG1b}%M<o(rPki*S2!Ppyp_HcbI7*)S?KOuRYJzu5vlOH5a$jMMR#FqAsgqM_XQ9D
zYhN)nuk_gmcYnBMl&k5y?f^(iA_Z40?0Vmqpax(#o$cY2RpakRN?Q%nH>chf4f{{(
zKhLtGUH$`W82lth8i<>tmbzI)9UR;bQ`*sJr-z@JF4=#*#&Ikbr&w^g{T_cW=uSt4
zc3#JGQ`jNIm^e0KphEdwRuKi?<XcSPp9$-Yed?R_z3jf8O*bY&wH7yHWNU_i-pdW1
z2{qNN<=?KI#k3^r|0tyU>>JAH65AEquzhm7UX^~ybsZ5`lvZMlNnNGFf8sKA^h%Zg
zjLy@H(sQcz<qBbEV<@swAU+gVS%fFgWVx97_>*pEq!Nmd5mxf&T+h$q1)&zQA$nrq
zV2Rk0>WGbh872KVxOkszJ#BgOvxOn|%{ytal9Y$01f`QItaYMS(P0_k9ttw)-oz0a
zN}l~@TueYca}eykUoQWbp;i>>kAUWw!_nxx<jfSM(LmZ5505Z8_=mtQe@fRwogY6W
zMtl1+phbk@yp7CbkDMtB2zkTeAS~W2Pf53HvW5n0Xf}(maK-DQ?t6VZbYbcAz!Y)>
zN{eWuiGIKKeE#<A^R&R+Krw8Rgkpz=|I;Uy336;D<&tjO4QMN&U)eQVFh~hXI(SL>
zW4ryTs-$QJJ)%@=K#>TgWsltTa>06qC(4f;Z^mx=|7Bna(tbVcP&XuLexTDhB()*C
zc{eD+zwJ6!0z`RPpYo(PUfem&I{58VogAyEuzsIbtdF9FRvxE|QW$!~7Uaq1UE91V
z{mTQRd>h6i2XepFt^J2$^OwOOeamLqan6M0eZJXeT(^aE5#Z3qOUyh4`ZjT)>~Zs=
zWSn4V=up6;qTb{26Y0MUEyhyfkBFxtCtA<tQOF;U=;z3oim%{~0BeSeoyl{X*Tx=x
z)SC`30{#9clS>VZAZFd59;$`^;7pQkcdXJi?Q&Q?`IE$}=brEU)|%YJP05%>r(xpE
z+`2=1^d$HX#B-8qdrR8ab2{^xS?tcnS?}i)CA$JMIk-03vaQcRwN>pMy_v5)6|$&&
zF{*Ymk-TookqN527_gckZNGwP0({(=7=ESDA%L~~+tUf_hA;M9u+9_bq|>TL9}`X~
z-f$(weo1NTGBLUBhSM{Tjqr6>F0`Ajo|H>7!fP$`v44Qz|J`-zoA1@$zYKipkbx+^
zYEp9RVQssSJ4o0Ps4sQdP5MPRW};?P*mGD%!XM(|+|odLT$^-b-OBE|9b5ZE=m+n1
zp&NjQj^9^6N`dUJ^8)v3YjPq6n7KR9Awwy(dUrhs&$?Pui$>$sq#R3|9(1oxZ!4HT
zY2T8E0MOhbUvg|HE!!tyJy^%x!v7HF7DJv-8y603w;=nZv*Jh(sG%{<qLnJ`b^DOT
zHI01U^@5Py)wcip3QO(j;|N7J>XcEIi$K8(Vh-dYt>kma#aDDvU#|0PXK=f3SyUS(
zlS!e8`MR+wZBTc4tFE(1d1J)4q~H;OkZdhtc&11D%dk8!-8I;I`Q5Cia+gV89fBC+
zAt@x+I@Ga9a+jkYx;UqpI|KNmO6C#wtxDw73<Hb5*SRcQQ=fsYSI<OVU%ek#F}rfR
zy_xrOp|V-|AHA?=#h{z9iec(AKTk)R@CpB_R!nM2BD5IBpL=|Pp4Q}Zvwe01cY=t#
z?{kvejOcA+B_(v~6K9Ygpz@rJO<>5!7zt3n;gr${8<07DEW_BgWy9K94mZS%W=iJt
z?c7jkLYOG_yRH{S7aG$W-zAw$9fTXqE39e<j%#VP`v4Mdzj{<nL{~B$aWnnJ^obH&
zjvLDj6d1ukyRENX`vFz%&YpAa7N_M6B6k%o1xdVbSGI~{t)E#@1Agok>F#2cr3!ox
zAVO|)l*8`>%xW47D-EYpl(tSTz{Rz!etblJa>6JFC?>M#_Fj(5Ecfkwf-}yPtV(b6
z3Cbw*3#B2=SA9<;cWh{>ZOQV)HvHoPC{NAE8hh4E9y{mE_AKMIp|7)IoYxr$9lh5D
zltinW*-kPQ<bMXKU)R47Q>Z`Q%A{yhae=d}c;62<7gtOkLAeI|@25_x)2rO=?)YRX
zu;jV~_3R|Oqf1m&eysv9*!*qN36!dV?Xd75;EuTg@m>8=+Ml4*E`{F{LGQFeeIE5X
zd>PBU4=p@t*?`w@j}1rmlKwD7M=UD(Wz(mcyE|y{4wK^@-=cnGOz)FbeP%7)9TD^K
z@nIkc7utFgL=W(4kG658(`xP`c)n&QgEd}vEPhH<h58e1uy4@chV!O2o*ef@vKN!J
z(Qf&ySYoD<B%2^yDdnFFu{=C)i3gQPMRED0`HPwnap~SA9E{y|>_wo)fEbmC+4L!|
zqhP_RMFTOny!zCE9aZ`-VUlkQ7a&h5%$I;PX^wcgiwB&IH)z}0fx>A28?Sa@ySZy>
zTxObMeF>}@p}ducnu;Hh9*MX@8|m}v$Vv-qVcR{ix)EX{7}pn2F=6%NiuXcm(XpaO
zHEaArdg^zCM7i0b&BzKMo1m9^%Hp#ryx~LY9E{2kq2MgB2qq}-W(5XfZjLapJ}NHW
zkAAbOe!EFMvgOtJvr`WR5ci>zmrzT=C6LUI0fbHB<^%WrUse}w)&|=v5b>s``}j-x
zPx!>#jf<o$mX$azIX;zSW8*MTv1Unp=Nk%%Yg@bbmm$-4Kb<7EKKXg;XyD#7?PIRk
zXtJ&rb><sM3EbK1q4jvxKxK_@@<r&0!M!G18sGZX>|X|UTV6X0)<1%~eF3nFwGw@l
z@$sz21O4<H{Rc1$*1P&PQ+iyzF1p+s7VE+4SEs8E%i#`v7WyOTqqS$U3rTrp#Z*Y$
zm=tW%205GiV%8cp5ZBs0oPbZ^V>-)!pn!J|Fw5ApaA&gVYZ$}vQ=1u1IcUh}8-~uv
zNU(cPvl=p@%tz5D=|~jz!u|6R+_5(Wrn|k^gb3wP#wl*2(0l1SE3N_i#^%eLn-ONo
zzjkg61!9Z=%G)z5CmCr(3+`Ga|HcDbwxQH!%Vp6VgI@cnbA+!=YR2}{pU>#ku-yM$
zsatjAZkE_+8jsl6L}9CNqXd6cZ>Wr@B?$MRR#y`uc`PBBKRWg-0N2ZL{&2|ObUR}s
zi{UKuPi5LcOQ!v$*B`N07~*9});CD^Xr%9SUa7Ns3TRznq<1!d(|UYpd8p)Z@};7$
zSvp$%wwS(h3u<#HsEe^QD941Po6NT^-<VqcZ*V%#DfPqlohv$a@QiBV4_EYd!80O*
z)e8ZSslocl`81*yJyZl$14SwamG9YRZA|B7IKcuN*2mX~;q&k$9aJ9e4o~`kmSe~{
zEfvmq`%_X1qw)r0$fig~7!X`ivyBa(w1GlBF=VR#rQEzU+-7s*?kc5RVWU2fkXN5L
zV~=C}^+@;WdD(T<@8?3(x#7Ppzh($AxCS@}n~u7T&$^QZ3H4sVUK>*buxpzeEmn}n
z`{vc76+`<Y<04WERUbN~4G%tPC4Tshyz17e9;5rvr&i?xi0MC#YCz8*kie5;-P}ZN
zn6gIV*8k*sRbq)cM_>pvRFKqw0uD#8t1>Wn%(;hez0(zsjP?{_NDC<$JTK2YX&Uvi
zrB+RqpnxaTV$A0;L_|WbMK~^GtYoS{8zOMwP2>mv0pV-Rzdu;@>v;HHcXia_xWQSv
zSXnuYuBEdwx_u$&Oro|c$D4WK26qH(eA9p+P9{@<o3q1y@4wm(QC$Q+zD;86>Zucq
zxA{h4*odIHx&r{+br}7rCcMDrLTF(1f*>H5Lk3b&y%N5jnN2xAkoH<%tuHkUQFCm(
zrQLgw%6sefPiro6oznS=@d_5ag2scxYuCp;rzlfP!OkKL)as$}hRKH7uhkaxO?1H^
z6#K>{!z=Uor59g)xf!q0o~iB6_O_z6#B{pgC*KX4=mj+Nr?fUV47Wx9%0);#NH~}>
zPbZJ)qqB5Tz{To?mSNnQ7{j2J>9@|iGK4nvX&v1k5Aj^q|3$*M6xpy2B0%E?FIx|E
z+Gv&CK6Ml7?<qhre3=c3EGKMIR(rK-!<uZ-K~2Hd-P*p_pLTh%gLL2cMBCgP>YuN<
z6g1C*zhdJ_k)6d=H67rx4)J5-`09}2y~QC}>vLO=`AMR_HmuQUk}^U#O>VvwCO^El
zbMr4ld~w}3Vy>7b;sv~kpxI)l^jihk-V5Iz1Q{jCke^D};I_iziC>EP8cWDn?!Yy-
z+K5v+<2t2IkHy-r7Xi;a7A~$Y!Ph>pMCBwH<ewnHBg(eL=D={Qe4uygg!Q=OpYGSq
z1PPAM;zM^}s#`hnFOr8=pDrzI7cVG?gqe7b=GTxWW!K?UTDPWuvJ$^Og6_(2o3d55
zf}l^cLS$FPgnMY*ztw+!IEsVJUkjU!m=Ap!J3FTyWF<je5xQ&Jl{?tS*`(PgG6s?l
z>zAh(jHij6oPIDL%02&8yecQ+hT7O)q@%n&!4hhhl_=D{@SITo+Ba5-#r>2w@~xK&
zj5Fv*s~5;cH;?U$Z((KFgSXGc2MZdq2r6~Jfys;`a&abAsTd)P@5~bC2Ks?IYB_!B
zb7Q3gue(~G2^OwG2rgGL0QK#0{7QQv{xntKmM66meW8GN3b=Ktz4AiBJNty^kCq%C
zbD9Zq3W|h|X4Q*y_^v>o9J-11M#gt#={I>*WgwMg38c^-QA5n>8Aj6V!D-E+o_T`J
zK=$30g-4~e^3gM_z1h41PnIQ6{GCM@8wHhOzkSEH;D>6*Sh_F!XdMIA9Kr1^<zs1s
zM14Jz15^=S{(w%;&QQv)!96}WaEJth^!6>VvUtFH0;J<@<B?7L*CVw-nSX_%oz!eB
zEr?iu#)UgT?SQfm!LMi0vPT$BOrUwzqoiDJ@;1q|l7h*0mo>|7*04cUKowVhO(>0=
zfV;I%Wsk~L_Nsli2kxBe5-aaoL+)f%d5O;edf7fm9ru1l$jA-<We|pr8U!oX=u}1V
zdlhP5emsmU(v6H4JAL9l_>MqK<`!ae6|t)3B4L3*Nv{A{s4X&B&m6W?KA(xw46WSJ
zxtbL5aFrPXzDyzmT821heHfnqgxgErxBUG;O76PxklHKXR{4b0GI9A^1<!Ola~)B?
zC<EoG?VFFAu08KJu<CjBNdsY3+M!pRleIC|grN?AET^<4hpc@o^803{V)7r*exDC+
z-E0r(EMjzVdx*b|9Pdtgzq$9{#p<~Wl~~hYp0u<{wOP`F`7gSw5f&R&$Uz$}d*Hsu
zM(xvxO<<uAD<|>VTKx!(7y$&R7^179d=KevG&)@dTKl|G30AVp0+{&`%>sChRjfmU
z_=&P#dumv?ue@W7+WyPnbQt)Lr&O^}M$>TbyEY#|U5U1nN|iDN5%bFQ0GA4pXE3uZ
zugrVXnJIY^jg_BkHz8VgBJm}YD<(<zqr!P_h=;GHua=wn!ovD{CMa=*Vd_DPDG*|S
zSGe-VQ)E_YQ2Dk@3N!jd;>vJR!K8qr;LVf4ge{-o_Tc*^WqP@`gkp^T1pr_LfwM6K
zBwE>4_*#{ODyn`h%5{=cBxN^(pNsxEF7MeGHKqAaSxD-1D3n8mk2}-9v_Kih$E;f}
z<HJV1$86SX=3>KP?a6Ts9#M<Wk@fGVi)-K0;V+hB!?~{J#ko_o1#|eSCzXfYj!Vu!
z1#LBIGY~`spU`7W^nSc+J#bO)*Y61K{-VBvsXO|-+3K@`7&%oHAAmGcx#!m^3VZm`
zqqfpT$kBgd>{X_8?{@BCWLsv`iN0+vYXSB8CG&y~wd#e7)CE$9g6IU=W-Gxmb?U{q
zmU4sB_dRG#%X5U6eGY0UpQW#^ZM-llG5M<8Re*03#{sLUpx|EyjrFS36Bj4pnt~C#
zYJ^uv(B72=A-mNPpkio6PDyRuTONZajn&~m&i(Ukl{}DamIf{3B?HEJ=FkgQReW9U
zQil9kVNAOd^cv9^VUh*c%X6C%6kAbr;CrkfpBB=IR`Y)XK2;1Z$(6+<w>ncvaBUt<
zIpMxm0+*8mJ4)EJQ-l6YZgR!T&L7+7j+9KlKPiyqA8>neI5!Jcj%=ecsUK?pTGTz&
zjO6Y7JiY8Jl!H3BL9R`b`u*Q|IFXX&&DqE&PiYJfOO0(N%j81nEN{3fFP8}>yGI0U
zpuu8kQyyiQH8x}l<#6T#5*K>C!?ZgFR*xPq$!>QU^`Pak?haM($E4Nz;fyV-PC>ao
zDn$Rzf?$NZE;d|j+HP#LmGYoI%4RFzSJShtmzr-94y@uibcn^A|A6mb>&vUt8px9P
zSziab_R*5jE_*5eAD7QbLQ%aBd9ozaeBj~&;~kEVHk!#knaR4gVMfI(Et8YFd}!~@
zSSvLX<9rpl-0Jla2|c;34|JCV-M+Z~i5vfVAw(;{);-aGa96tYHQT>TU+C&HgEE66
z!-XFV_P4G)wYxYDt=XVZH>O}s7&Lyb{@7SoAgu~0P@`Wp?qyg9{P+KpuCKDrNii%x
zkjFk*&76@O)TtSEFyk<%j;Fts4eoUO@Ps}Kz@$31;x4{Xb&d9~7FR|lMGNxCJ8Ot4
zmfu9(%TFNi)|ZkNV=z54LVbLQnSQapIy!P=8ls?K3~0Jl<Qd~UIyp$p-!HpT5Fi*-
zkoP}Xm5f0HGpPyuhf%a0U<0inQ6hHS?%p!(vp>p{A|K4EyXBcOPj=6gmXUsjI+p1m
z(D&yAjVg=nMd!9t@QftbkUP4<9BqT_jqP%%Uy3U;=s9o2IuCFvplCs8cVohGw+cPO
zYfp1cm{96Gx_79ZBt4;i=M9^MTv+1M(z3a2{7{mEcw5}gHB6<wj}aX?<sm-DwXMNf
zE(b<l58Z5$BDVpy0ufN!$!t@>|9lN~<h~RdtH!$3@Ms1dCQ{zd;?$T9QO&-58XwMj
zg&h^lbcCaSX)hj@Wo*3?nck{KW-XFmr1|1*n7zd9`({aft2_t_LD50{PRid?8;;cD
z^Dj6BMP9Z}tYhY~PCR@FLQ8rV@lnq?jYD1!&}8X_95KTVIrcN+5}K~I4&@53P98T~
zs^@C_KZ(FARTX90X8!gs#29*h>|F*tzgovnmr6cs$s`pjcGAhj83Gwa!i4+uVmKmD
zmQ;|}#ib{qC()@(kxrz~)8Go((a^K5Plp9F3;9LRz)bo5o$<*Iaf@uZb)P;5*o_~R
zzbpO3G;7Qp>ESU5j8CNX6n9i`$NRGAY6-uf=}Nty9@^S`%Azc^J{)9%M<~$`h%h6)
zC<3A5*8r|tv6(drrD2&(DjR7Vj(J6IzR6v2{2yGs^<UF%`2UTfqM|4&(x8NNO6O&f
zGMdRKl^orp@j^n7kQf~oF=`AX#;Adk(y{@Qjg4+b!)Wi__YdFC=l=c+dp!0&&+|B5
zujljo;ZuHLKg$Z3ys)=+CqkaB)|rwDRF`ZQxa@D~`|N(G{~+XkoD4F#q`>M}8UCxH
zgeW2;&%N_yn)EnHC*OdjuZTEs`g&xLGq>^TKQ6m>Aou2Xn4GbNFTqA&(}9g(XnC{l
zOn@_#ui2JLgH0HNrh!MYm*W4@GzIXkUCHpG!W2^j3XFWZU&^f6Tk|aKdPx3<y}+Gz
zY2InBJy=^C&ib%$Q4A0%c&N`ZzrdZ<CLh%9hdur%vY|{CU;J4UqCWXLU1=-Izis-B
zJ=3c8+#iPHgN@u*e6C4l(|oS~ww)Sdw5<)kdgZb$RcPo2=tz*hem#>mG@B5S2_xjr
z<&?ctB)%0FxSLo^_Rbd@%=I$2c8SSo)hT@zU>)SA*lg(-Ul=r&Yk$X{Sz*M0GD9EG
zf-{biR~H_BNC%92=U~l;YC<K4tUl8?+iiizZ~b}kKrXdQ!}?Glxa6Q^22KG1h=-Vw
zGw#%tHdUMJMwShmC28{uf+Kl4?&nCSiO-&Jfc9BERacMA76Mt<I12h*lRH%T{7pld
z@ss+Ifz}AUyX<JcBO)Ck+jw!Hzjgj_Nsa#Djhtl+PEUt|{!260;)F^`XbxgG<Dhv#
z8*TiPbs+-czLIk5N^M`x-ej$wU)^f{q^`#{h#-epDoJ8%InL3uaa;?w@^!voYrG`S
zh%a(x>C-BzIC{`2`{Xb1<p*BR*#Z{6@e5Ms4<&B5PJmqmd?^#FMS)QF>g=Em_Y6}1
zhCd=$)1!U>eZ0}x%zEw4e*AmJ{)H{*>>*1Z&E`n&R)n_S`XR)u{StVlgx@2nQ1K6F
z_pK_5MU6nP&rv;)WLB}u_WiwWts=iZC!K-Pj&x9k;U|^aL4PVB!yhNFSxk1JUbM53
z4NH1jvz6PvC5Ib!xB)T(gU0G-o$ImR|D+*B*!EWY^zV;er8%!e8t%60Q*Adl_VZwK
zWr9VZhP;l_(Ej=c;$Xc)9unH>1JYY3G~5+T#pdS*L4GNYawJ|&3A3AJuEsm#RZA|m
z;z-(ZXR26#id7A7w1@Su8m@zsz8%VytxSBWWXN!Zk4tKXw+1ltV%n~TwYrIqlnqBi
zkbh~qiuFwqq<nIQ%u=7SjlLh^^vw}4SPoHF*9vNM(tNxLVcnDt+<b9FS9h@1fbj3l
z+q}FmYpg>y0!G8$9Ua>ac>`cM`Wn!dgY|;Bwzj+}6)%}^$h8OB@jG<ZhK-Ais~Y!;
zQXn3;{Wts?TdT8GCF=drXm-iGccb$Z>*@6~C-gCCv<5vqgrc+$ubL>{DJhBbmP)fb
z5ZJ!2ZyE99fThQcEvTY%*Q?&lAKWpq5!&wG(CqlEh81Lxf<#4bQ%X}?!9)c9D>XX)
zp`y+$ZqttAisY7(_}uscX8@;9?u{Bp`Xrb1bRHrU@Rw$uy6ZK|Z;>lLYI|#agObm!
zM}URrEPCvPZsxiTif7&aKAq6~E(8`xx0ZO5jGN)~KO5h~ctP|^Tf%<=vG6@)jl}ID
zuNLV11Qy#!0VYlcx5{iGJc2e~{hOPwPse)d*ip0g`}VpG?htpnIRZY!4UjTsY;3v!
zu)MfIWSaD0*WH;+HLA(Qg{6$sPrsOO-tK?Uo@lP<y`J-Rjo;7Z$a&=Gf~&Fi0e*9t
zIOKTNBpS)!t#PYYYvGd;LvxV~Vn@vin~?K$e`63n^QJ84)oE9XVPzWT4APKMY<6~l
zat!2|20rZcdh%n-r<oye?I+zTkn2LEG!K-9U)B?t5a}VT*qxcf59{Bq<O%yd^h|N5
zKGZ07HG2?3<xDVJ?X*Y|Aopl-w{IwDmyk**I_j)UX+4pxJVQ{ds*fx<Qc7^)n#E^W
z)XILVS3Xi&{qOJplPQP`IRBF;l^dT_zGyoWSKe+eutLIze4*Bswf!3N;Vyw|7nUp8
zJm~ML|7?u!FnDdrt#qi0pmnpnXKK=u4skWIXo$+nU69SQQ?*HN+~n@TxO0T`6@_1`
z{_zabT7!3J;~fRseO^IO`ys;Xe`)ygpj$PNMm$;vG^JeT;s-oK>J}YB{+Q7!jszd7
zj=pH3B~yNFP%4W>h_a58>vCiSv{iYpF=q^639DJ(nh8m>N@<VnD8PT9J!GZy1`raJ
z4t^{o(!c>*gA>%TC#A!Tbg}ItS9KY_vW7s`TeuWH6!M8s;1J#|@C;yCSKc2_ovd&=
zYu&CnzuUe+_Qmk`UdULX&SqA^1J}2GUk=dG#vfb>7)1#z)0P54CABJX)D!;1>D0Z7
z+A#ezS%M_gKvPMRo0P^&3%q2L)emt?3U6Yt?dLw05$zQ|hU=Cd)dkd83z1TX)eT0~
zN`%Uugj6*W%I1RQUjGohe43=+Hx5}myK?^89&B~cF&8!ox)>%NHs?%*v<ToJYlzLs
zjG2J%yjrVY=zncqt=t2Isy40&<oYhafStJ|Cm*5H4H!bmA=c%f{mTe8H;p$8ELVlX
z>58Fr$msk=msj7hzT>qohokc@&-`tU9*zY$Hme*kEzEm8s1XsV^a@)K?%39K3%?#L
zvK1U)Dg0k9Y}+)WM?5xMGzb=UcAi{j+4t^$%Q^n%;&_7rR&oNXcHQOtwS+D4Nu1r0
z>)?=75UBRn3s6iqfH!}XY>a+dx}vuRK(<Wr6&vdc1nXQLl81saMZ@cmY!oPGG5@ad
zzSVq7pT3x11}A(CFA(dCc-46+KDLR>V>XN>`-$<IHR|Soi#|k9F=LXmJ{+}=(s^jR
z*MCT#Bj0Esh5y{?jTHW!?b&V6{VH48O%RP}0ar-WKg)6aJu8)!ubcVdIf-k?%iP^D
zq8vV3f9E>b5OM2g+3@Xe5a5~bGVE4azd{KiV<n?|dkWH5aH~3}SN)ne(t|m`Y@&Ti
z;<Gh*aW_1A;iYB&9Pvf2_B{Q~7?BTNt>#fcsljsl4>i3_IbD|47RF~bQNp+sn1JD2
z#drqJ$-CVH98(Ts{sLoidC@!>Z`($Q*Wb9L+r>r&oJIzCbf2;?aF1?utsfjGDg+qM
zO)q;>0-wk8#22R73M-d=g}Jtrl%%cAVxX_uk9p6Wz^6qoiUTgqS;<Rs64iVX3>9Vi
z>~6sx_t#Q{r*HflJ@uzsoaaMUk<oMOvReV@*iMtO^xaUUf>vj!%5s7n+-M2Ua>{{%
zGaV7pCDwIubzUbn58D$0+UVg~J82(_LD@a8M4(#ZKNf@lD!Yg@TiYO`gm)-mU3@TW
znX@UIzxqp=1Rq_TsMw*N@Fms=ORWi^590?_t15%8@Gwt3xfYoG>rtVC0$<$ammkaT
z-iC3di_pgw#&wy+K}F5DN}LTU{CsnZApn2po$?|qhFqGoXxSy&XCKh=ha>DEV?@ty
zciHbvl}0e@9FW`sU-N%>=jUI&vVmqA$RN7>M^_T6kHd9#IU`&AYn(F7_TX2VuJFCp
zkneg%uW_uPaAFx!&FU?JqRsrcH~rd6K?wsh60;s9X6w)8`lp<m?bbIG|IA#<6vf>Y
z6<3OPF@loA<8m99%S7Z^A&n^@ZEdHp_ves{Y}wJ6eP?Uo9l8AM?Z_H42i&>L2lK)d
zAqsu$hX&8xS1N^F`ZU}h+Xzj85ARM~-1>PQ7%?h@9jsTZl6&7!tf;G#Ag1WJMVhQz
zZkyOVY3DtGCxoc<d0v2*13`HqEn7xV1(Gkb^LGo)P+$|v3?&-Q*Df4#CC<X4fIUPs
zdVWPOepsz`PLeOx=y%DhoLHHr(Nn#iTm=thz0}aH{J5cOjX!lR$j;tLqRvF!0Mo>T
zR=(^yio>`OkRog|yR65p2z8W#hp6!;w|b9emBKjjDH96)XZ>pm4XJq<Drz5U$A5V}
z>gJ0PY1&C;?7iok_C6y1%8368od<xMRblYY#8P~F*BMht3N{5+J?-yLanBH%iXVJ5
zyI}w@)X(an{&<%dl)S2dy(5>4hz8h2$oDrHdSk?gk~XW9Uj`mXx11$+ehYGBTDQ_u
z8y0}K0xo1GMAQ<eiSDC$1+t1IGveubVAh$FRJ@h!iIYV`QxleN|0(~1fEYVt?RRgl
zM5Y*(s0CJqUg6>(?+aqC428`f+CZ^Kz+4W;oVNbJ1<fL&v|y^}B4>bzSq`d)ouGH7
zXx>B~Ed^Wi@oe4V222#$q^}bw5UJ-}sTw!U=lieK)7?>c@6-6b$NrCHkA`k`I!`KE
z<;^W6g}8g+^BTf{d*m0*#szpi`dirxbz#mD8BG6Hl8tZ0=`^yn8S7?C;ZlOYf=wn-
z)g_1_zXJL&*$~Hz9KWN<wkz`%=&SS<=~g-;g6yU{>R)~-+Rk&|l8N=;*sHl$=|n{a
zoffU|7`)6rUx{tvrpxp(8Is&$$~*CiuZs*k`b(1)Z3P%Ww@<h}`T*1uYS;`b=u&aB
zxl_!6dQgc?NbP?P+5<?nT4uR}jkXxE(}jsg-$4B4_!Y9v=EYx{>k7v<yH`c-K3rfz
zc625^oX(t$u|Bg{x4G*Fj~2bsxXYZea_n!;@Wb_7m#MOMDOmz`h5nK|-_zPHRp=ux
zi|yVrzAW?oOyhrK3=Dm)`np}%Z$d~Qpr^kyY%pav+BB#CL+<)}AC?-D{hvQ7YQn0D
zg{;;J*^nT!E~W2a;$A^%y8IC+57u5-N+j6)vFc96TTkgTO~A$<+$ou)NUD4rewb*c
zm6vvA%d~e_0mb?(d#to-v{Fwd8(dazgFV08BR>8A8Wwb;e`(mLE`SKCbYrc${mJp5
ze*Ji)K7UPg6`!7Fcg9e0WQ;D#F`Y?(W;Af|pB$8OjmC*M&w@MGT&DinE6!W<X%^A6
zcb)$<H?&|ACRk=Pl`v&Yn62OH7CX2f)VV_&u-CY-lS4kfdp380L&NWD<o7^1CSAl^
zKYKPm&Kl=;;VvLOkksVLJgz7FSV+IG0RPZryp8ru>R8=$tf4VoSD6TWv$ut84M;Mq
zK5nt)`giVa;WOP}a65_DjuBu7W$egPDCo7^SBAPWywV8}m7V72;25v<NlXycP`ezx
z8_=)V`L*<f^8)@g`4Z~M{2SF-b5oK<O>DR0%Xij9CCO-kyPt-dSC^p9V2{mq?vj<9
zm67$|aGQIFfPnsy3{e5z-Vpw?C#uAsyXxdC3%5c{E2oDfYC`x6*MVNE>R_)H(_QA+
zYX6O}XSs^DGT*h!EJi9TM2)UI3Gb^-L;?aAb^#?Yd7<B3<Jn)+kZJIb9B2d>iO@+-
zuQtnFTwpCI@w$iMQE*oOWBtH$Q<~#Co6D`}c@FILmw}bVj~D1t?b1H(wWWMb+Nqd`
zmP5)dDfvB_R#ew$e{?w-_H5TJI<GXqxT}$icIR&Po4e>U@L=g$zw>7&D`u@&o*aX3
z!k<!GV9m4rzGwbG!cIYSu4YeuuUD5!&9mjW)8%Jr6~<6i)lIQ`{?*Sy;2OoZJ+ik-
zveN|4-TK-k>_XJP_S*VT^(8$a=CNt(lbwAmyG?x{z5cdA$qKU2+|!{MS$-iiZEa14
z4AIfw+%)Ih$B><%pGd(;MZY&+5R^D%U!0zXL6;a)Exz_$=a4!C{621RXIO-D_(HiW
zgt2q_*5?4Y@zGAq_P%R)eL$@Bh(TFV)J-|`fXvg88FStFOU?Pw`^SCVmL{Hl0!r_0
znY1%M8JN&|Kj+4)w5AS@t4wW!&#wr=tySW76NZg@gGSoy8g|Bwz(GFt+@O|tvCaP9
zjRnppk5{R$IMpBfF($DLoPp2P#CsQVzc(`peUPl)O}7Q*!N>KK+hkecMu~rE(1^2<
zTG)+k4o?;N^d%jZ`1KO0;~qzjy2zP{ioxKI8+|&MB#?>yCNUeTZS_M%an{5Lcc-Qu
zd@X|+_j=UFhi3;Plj@#g&fZ%4#CKK7vRUG`X%FCCY;!LY9}LL)<zfkhIRl7$<X7dK
z`J@#!8VM-O$F>>*7-Wj(bm?D?mm9`tX-Miw#vyZgkTcc}K|RnYWMr6C+XtV7kw8cv
zC5PjTfo79I%?9hK7{)A7wz)GtZZYY{g%noXhax+)s^9gM__-dx1qe3^ha8rY+Bbn+
z+;3_~#0y<0-iMkix^jndAJoO)GgSNYnjl732E%92!xmpZC>p8BCb(E%SKh~WP{z_M
z%Th|1;v<B`S{?kQiLU?X#y6c!m59_?BMAFvNfDPy)F76Yx<gGw^tLW)SLFQdZbH2_
z;|{88ImT>H-1sv?+58RP-@fxKZpmC*E}4YS&J4Ft|F{+~S?fHv9yoi^GhRhlEyubW
z5?z4F=A}8HTvM;#krrj#PU(m2CaZ5Ceh%{`85c@N*{g@+=Yhw4)DJMnMOnM*pwHY4
zt=qBoobIv{v+j<Ce&9Ue3^?F6u42d+?iw?se^|S^10E|~)^7k5a<!lF&$?RkcrA4)
z>1zv5ERbs-&rBE9Xh5+TV`C_;Rn|W#e&o1A{D*OB&&N4K!B$MM@t+UP(01WEPq^vY
zPMNkkQfEzT$L2=ZaG2okb;5ePKvR0JMe|+aQg89|brA7j<V9dLS0I0QgR&%1_ONTD
z`|63Z%VBO=grs?B<;RQ$xc#tFIvlh<X^1h1i^E7!nr%y5Q@qcO)pv~o5?$pib1T-<
zz}C@R3={~ma-tQI3b<|5nhJ3}W2SyeuZIZC*@!g|B}vZoNE?H;TM(ZnZ=2!wPa4fW
z?3FN<GCPS{WS<F&s<DEg3L1k8K?7#1)U=^TV+(vW%qC&@Ime_8S+;a`W(R!MkWp!5
zIci!pjpj_=ngm@lNWd{LJN3tEF#WTn+teG#@!ofF(Z>>7BjlO`8M{CP`RQ(_%((nn
zj^-3;Nr3oINC2Hkmz4u#{9v}r=>1i^r32}XmE}|JmWC_8c#HZKFNU0NNVmf57QYMA
zS9RU{Qhrj<%$cE#8Lbgd@zCFzfn)ZY=RFUA!b@pIhl};$bKWRnEUaXbyaDzCUOt=f
zd$<^@)=&Ryc#)vKZLT4;JnaT%p@G~lwx#{dE;w+6?Hnu3(aqN8%S~AVV#dBMS*N=L
z&05@iJV%+Vi-Y!FZfk8z2ne+2;H5f>R-}02v{^GnyaWbro)EQ8zHm#iH?N3#ecmq@
zML$S0(pe`5PX_yGA86a?HwO>(ylEYDf<PuOjHl$PW?FE1Zr*!+Y**Ufm`z2+zaP->
ziRMjBzL?3O3wmD8@9nPF`fTFRzacO^+iFJB?WE|a!FL%irS0OhR)o!~uH4X-wGXy(
zZ$4b|%GYH16q@}oLhDj?M0$Ys{=iiCC+_XfBkRyddFx?E!RD*Qs|VJ5L=f95MyS6D
zFKH0na+qF0<PUXkSPms2=s9$vbd%;r!uA{4>!RN`t8CGYeUsj@H-a8`14dsH;(%ME
z44~EFOi^6CX#)|xv<n*P7g1|PKWK~tdAF$(dubGZT|q6d=XkOFSm(I<Z`kou{T%Jm
z1KkmdWvjUwncyZqnZMs6Jl!&&3m7yzY(a0P*SOoEKr-EQw^@Ig&|cdqOPwg7kI?3L
zGM95DLP{H;+v-fZfaV_oAt0AfF$A=v|L7}8S8J6*p4>A8Z-mMm9NW{+TpRNIBvp|6
zFL~f|1C34!>-Sf*`8Ns9(5>a|$-y_vxkwVFZfrBx!bu0Y?S`bZxVJw#D>?MH0$H#Q
z^Q9J~ZL*w{2)Ju4F64Vk|NMu(H9i-L;7Rj4&YuK=Yyj1SAcD1@-9mh&Vgssu-@Sih
zb=!@aw}83)`h}hpm6_;H@yMy}IVqV|Z&Bijzr$=du;}jkjnJFBQawQQUu&;CYVR0q
zj~h{J+)4w==S-f?=xc0Mof34pg5k$?%bpue7O!&Mj9%q3$g2++m;cghHr`Xd61^5B
z`0_|U$I7ka*!KWuK_ZI<HbgzUSUv-tN+=U*bl)IEVJ_3lGQ;ZKT%yhQR~r`zq7jOR
zw<vGyZ>=6@IGikVq(>7S^E#GF-rqkkfdf(~hv{2F`>Ntd_jM82Nl|q}D^TgFp2mot
zhQzEDdCGaIh`g9RPLa4BqC9r4Sa)Q(p500lW7LxHjyM0czg(+B;#FyXK?`_WJd43M
z;4mVt0!Y+P5h-$$OK(3%_u{m*TRh1^R>kIm<#2fEA{nFKuM97PTc1zH^*aUeIccSO
z3Jmt)If=|4I6nJ`!V2RnSQ#z*Re#()gWtUTXF3J4)VOsX>!Umqrd3`_*bsJ>#EevB
zHx;v6HOx@gaVI(;jSF6JFn=X%Jg+!T=C0=e(-C?9W7hfd$W_AHirZxoz#mRJdHm{j
zzY{NCgOH*PRMzEOI_Xl+_}HYi3A<8G+%^+?Zt+HBV6uH!XR!Z82iTP0XxMpseGGc>
zo2a{Tv(UV#$&Mz_g>$lTXQ5Hbwg~@yWr<N(kk|i3X(1O-NM<%0icENPsO>?PqSEiT
zv-Q>-AwzlEsKG}k*<fW2zwFJ1MHsVtoRqGc=`WyjpB(Eo*CqGsXzVLjyW1K>eQQrF
zNjsknr`_*HF#q+#acGGTamnQ*pk{OG7o(>^-%#0b3P*}NEs^>yG(~iEOSj1)-YB*9
zC2<#3jyw!?@1?<H`@ZU1_3FZzQWx0$KicJ2rS&(9ZIS-Ojk+!G)<}tnDn5dT1zLQ0
z$w=Rd-w5Xd-1=>VN)6k}ZpgJxww$a>kU@kBshKqD0qW1Teg4H=J^=@P$b2I-?$MYm
z{6a{$5)MsVKRIuF_mk!R4_8F(-c4~&`J>-I^<aMeH#gYJw<ylp%i8US94pl06a6R8
zez4dm4G}2PO0Wj1a->XncctWynh;dNHoCc<?Lw!`9^aB3%hwmjV>%Vn#}gJ7^`Dx}
z@BT|@QQ%Fy<G#Pp**NpUxH#r>1z+J!*r?(fR%7WIVgg?6raE5Zw6bkdAhMEHk|Fvr
zw-htB_fl<>iM-R(>P}2D*jf2WaFIk^%Ay^cqFxvLX%utbw_qsra6CCcb4d-fv|3q{
zjToPL(gs576^Hj`3x`>Q;fxmfGK7uI3b0XIV?E^qRmkGQncSt3pBd|oM6*eMEzi&d
zRevvbGfDN%J3x{LHy_6hIy8c=C*(tmAK!L-A={p%@+tXjFr&A2-kcE6qk#0*sjNJo
zSQ!yA@snZ_uz3ivJ8=Zv#)CvM?tM-`rRtRU23hanvVBSm-)KZ#BW|v+8M&O4;7~pN
zM@F8ZMu5J+qT}0OE<DA^A+qs0Vx-Kq&PHpqP4mBF|7jWj6!N^HJ5Rb7CF{yHRBGIC
zA(N7?QBK0iLQGB+i%9Dzb9~Nnu1A<X9gBN)by}#XoT=QL5`4;spUu5W5k4N=tMHc3
zq?r%bCG^r5GNHM7x|}eV>3hP)t_&_}`*uZyS2KKGH&Gh2wf8}`H1;ue|E2tNkr!u`
zo%l>{&+>Bw19;>ij2&-eRQxI7)VPe@w6`BwaxvfzQcBS}b$w=8J|GqEtkH0AWN1*6
zIc~K+F`0NnsoH74xT?4k#+leo(2Edpba*`9H!L-M0+H@2mi;+U1w0+(r?!2+)aYJi
z$SUPOUuu5=cGv7*H0{xzhzB<Kodj0w8%NfL<TQWS(J|<n6{L2gq!hB|=G;Q7wn~E2
zBn?e+3u>e8In5bKB7_5kOvBd8V*5bVyi(m(3Kzz79Hu}C$RDHnH-m7vmEwJ(;wM6R
zI*Ys5`?J?{irTIr@-lT`yfh81X2o%@#m1pX%UwBXUc#sbQ>{_hh{|sHw4;J)#b#+^
zn#;0eyvRI}5dVGZjj%$XO?v4wZS6d0<(#LXHQutmG&NG#AOi6ix}YLwYGl;)-Yhvy
zDr@O2KuC62pN>utppu^l4}kY&;FR>cLK!x!(jCv#;0X3sDCWKExA3{AyHO)k+ZeS|
zwZwj>VyPZG#ve7B4|RopIbnNNj$7Wjzpnh?8JDJ3%QtxvW~x`|YuV5!@~F(TBgl`S
z;OR4}kilqitLu;OM6qf4$K6VV%||3ba>N;CRQ2I6%|Dr%w{OYc{E%iBN;^9`Rji3h
zA($N}S8%hge}|^ueXM}*`*~_t3Av8KB!6DFwzw0EROHiiMNMH9YDRqu7;BNmK>B2Z
z%Yn|3-F&fr)1lq_kja4PlJ68NL4JOeTyELg6kQZG@^3<w>)M>TFw+BA)NJh2=1TG~
ziJaE&V&A~FKk^G|SKDhpOH<25p&}+($m9=kBoY&2*EPlx$u72?+MY+w$-Qw3x8o|+
z&CW?M?TOq<)7*8FoVFE3#Sx<oAsxNO8UIW2y-6W#nGR!_POgw~#8Q-!SNzkP)VU@T
zOOJRzpPs@0J=4A(4A*rb1iE=ViCf0|IxR=one!yGypPuSrPf!zE~(*Cz!cSG>v`ze
zH1Mca!FdO&)Zp?`R%kpDnoZrUpXaZwS796)L?*jTqe4{8#}GO?zU$c3$T|M{cxd){
z{>Ulsf=xnr-O<;1wA{L$1c3RdS)P09%@OI{DC<6ND@Gjax;so?+5fmtw><CFt0zx{
z|I)~MjedR?e^)1K=7v&=0a|Cu7`pybz<&p4Lw;23$o~?8;9MI<J_^@7c3enz>^jCX
z_I)HhZb337>8P7^V^TaTUP*NgJTF%Adp|8Kq~R<4QqMF_uedvmE9`g2eYGpogk#|R
z$t2Z9(R=H`Kwn?JF7bHqr$OO6457!YwhXWGssvPy$Co&rPE@(%q_rDh#6#%kk0iw4
zn!NMnlNCA-e1+y$Mpf+T*hTaJ6H<@<UpA<54y(?jPMc}*DTMj>O{bcuA)+84@S>pe
z+{@@e&&OlKiA3%gg>-L#pU9#@Y!jw`pKW+w^xCvH9^+rE@_Z?>L%U6NUW~pcTdrW9
z`>r!SthsQ#ahJG;ug>seT7oXMePT*sop;dKh%t^y*h{SZ5qFvSbHOt!nl1e~?H5>F
zCoV2e_`spkIM*6wWEI@A!Cb8Vbw8v)=GSu$Yr{PI<F*=^A7=U7te^S}sJ)741ReM#
zLO4#^YC+6a0V&v+?TxQa9sb|uk*lAikfFD`%#+-Pf*t+|$tnNwusbw~9XqKg9g|A~
zaOoZ1<LFR=4aSuXf`raQO{tb=0Su;oC5lpJoPE!Nk2tD%C2PKJZQn3&4Rto~a5C)z
zIF$j`7@Zsu=vC)bceqQ}SJ#7(Nj-M#1qIzau=&FF$nYy7?yMM#+|alfftM*i;KJ<4
zBu7JPTOwXGcRZdKr?2^x12>Y__BI?^{duNXQ>)9Wt@}DRdntR#+Z!s>3tYZT-a++o
z5Tc$Miy0K>?U2L)<{3Aak<WI{_blss?G>QTS^l#80l3a~bG&5FZ}{DNpJaz0c3<AY
z&HoPLSsOd{=*DvMYV0QFCdfyI*n2!=yt$vq%M2*?&9-0Ys+>CSB$@wrdwVApW=wBx
zh;WuIS=!w8_z)KdL^DJDT=riAo<|X3f_U#s<iP&h)Y!V%<!R2Z`F<%_oof58%>G_#
zjD!gKI&*<<TY4#tVL4Y-YPzGB(HNyvm-ckkntPXjIBo4ig+Q|J6*UXR6n3h4LEm2P
z>gdgDa*_gHIO{c<t{j*YTCZzk%QL!)Hd_2APfx)+t4n)|y?UAtb>6j6=UR_2K86Mr
zI__lS?lBTn&(gS~zQR%1h@@G&hVruH(S2BbQ-Su{K`%gI*0D^Wm@xtwSw6rpvb8<t
zhjPe4fz4GP)~hS@&aE4!6)O;|PF?Cin$<*<k<FtPxC6_z!-X>S=m5@ND=Z_@O#~3e
zqC&Oh9MyPf?LRg+ufn!gf|*d3=tBVr=w_rxCgIZ^1{xJe??0=zI==-U8vJ+X<U_mr
zG1xAzK%)UbSaahnaD%2nXXG=0!3eYJ(a=zin#!%MLkgba|AF%gr&RCD1&&i18ot>O
zQiu2}oS&>C0;?lrQ)VF%C013SE59|9Z|$ZT>H(`pR8CUb6UlC&>{HFeP9=kCDe~SH
z=ereNUe!^1vk~vs+V<A}s1sa^ny|*yecht)#}y4FUZ#+CO1MKm2rrjMIc)q2?YBvr
z{CvXr?>OVSq;w!hH9OxH*9Rt|zDYHbJT_B#ToZceCXbS-;f;W!w_%etR$^nDsf`&J
zVxX@^6?I7U;ND8Xd*usyKkEA?id+h9S>0^g+(eJ(A!n$)#qp)BTvNV<)dnmP<(>gS
zo>{nSt%==?ao4O^*&7|QP)S<WwgkF92wpu!qVk~Gx^{=*M+azZbTPJA|IMkoh^pJR
z#7>{N6HmNP{&ao}->I)^^9A7LVW$x2$Xa{#oG?Lf6Jy6u53g}4I+M`;sKcWA)_n}V
z+3G%RG`9!?6D}#lr-|`T#Q+L03w#Z({tDdJo6+;d)>sd^WVWQ6H6~b<Qcn2tCKD8z
zkS$2`hRC;nzS;gqdX7W~@Cpa{o#_7)bbB@aQR=eAA^tDT(YCH~tNQp#`GI>*dv;&t
z!KEA9r1-_Qtu+ShMp#rX9u*ARJ{ycan>rB7Ur4rUa5nu91Nw>IabG9t0<2_PJVNu^
z+Cu}3s|%D2q39{Z_SBj~L0)4J8c;McFZ2oj&j(7{#T<1UsQ>EMb{`MA#k9~&{%gIq
zwS?b=sstolY-tt;6V@XJqcFO2soSBVM`IL!bjdK=W?UWVFAZ+=lw3Qg+HRkzNyl5p
zvC<wsT7&SQ$4}x)shPZ*f?vNf;gUbBNFDOF*!aXCN*~7xQv=gXX?8U_Y<mr1Qk&|o
z-#_I$MU{MGxt<y(v@zlmsW^k6McS@AL0nfcaexiIqp6v_v-XX9d1!7lb;Umn*|&*7
ze|)k~{%hoU{(pDeRr0l0#(h`#8h5`YnR>JyW~@+Vh%2dI4-Zf?xYK>dzhnO=jvz?;
zY)-CKoQ{%~>norVWLw6IWgYf4P9L5b9UWaNP2X#&cjVMk=-+^C|E0ON792POfW6|n
zN6T^M8MMOcPG{K1K?MdXmz9Oq)TE$m2p%n~Uu~>sCMb}7GCF-h>&Hg?X{)>Ie){@<
z52yY;T*zkHULn4*(4%J0Gl;I+M_<c=&zC|^*AX5W;vK~LWryeS49fXI3#vg66D8@|
zf5tW7F{c-k6IOUULZv{17(eZcL#FOqi54UCTg@RPDJaX@7K2($;<?>LSSH*UXH|8y
zPKF&|?U9#3bsW~{3$dd1){Xk$){FD8x_nX@-$KDC>q=tgc4{(6!*BvsI*F&8sG6-G
zb%fpoqw^5O<eJ%|&B-$mCbo+<<wn3ssQ;;)#*6JA2g>yzYY9Z&={gzW;!O6y;B7La
z+n`$wp{}Ra8>Yp$#p*9twRzoc>ppigrR78Y*T-%AgY#4WBcfK1haJ*8Y~2R2-B{;=
z!ZX3-GQ_q;_>dj?eAG^kt`*Fu4BN2OU?cUzC5Bsylb3t??FuLAe&E_KT1eKHZk_ri
zz&N(W%8t6kIy|6;be>fO1-MEV<>n*#JDh5klH3km=7Y|twf$zmfB%=v;D4zNhQ?)D
zLtli26Nic^K4o`Qb67}dH6w4O2qj=vroHd;n<Sgo8m29?IN)EJJCkvqdk~iZWGx}G
z&#Y5fa|R}`jY+S~$i6hUs6<jNLEC&-8>f^L;1}x~9x0~%K+YkLy-iQ)9+v&f5smFc
z#2ssMQ={&mXL1(<8Sc5~bL##W|4D|?t+y0Y6*))!{K#hK9yMq8>9I}NBW#*S1N8LB
z>EvudVzXn1uwbnI1(EM@BjAvy2bG|rko?NWdyVpR_gSV1$fXe;PwSa%)0reN<zed>
zbW;6f?oLK*MGH2f%eL8#BaG|5DQ7!VYza;*pwG%8!2~97t7l;E!l%oFqY2w3%aPvz
z_L8`*1KaSN^idp0xH-50Po)}zHGoz#S!YZt{8UvQV@!FPew2hqxYPC&or%kE*sbk0
z)|KnwtN~&4Xp0~CT$-JbS0%05UUNBFOrGSx#UjsI=Zrg6#(Oc53+fDBR0v%dPq?AC
zCAJjyWxz{{kiis_6K<AzNsZ&Ak0F$`3no}x<v-hK#osQ3n!qCG^&N+~*oP~B|L_9b
zE?!Hso!eY>OnWlZ^1W>@y{;kytM{C&9y!w((>2OL0KKTOvIqbgH6==C7+JE9-m$Ka
zkN#{B-*)PlY3_-ty~xtgf{x21oF?53=t{HAQ<3xG-W0X1^Aq-&?BCG7DF65<w3<H0
zk8D!jOYwG@$=C;SU$7nbMQwZ(UNUiU{3HB48+sG-E@b;o(P4rmUo+U2!;O!`W<9CL
zS5fAqyXnFS8Z<yfuao9)xot6l=*tD7*GJ|o-8CZiNSdfLrMv-oIP|13N?m^Qoog8P
zLqOg;07KsEJf(*9m(^UA!AkGq0KSfXl}XZhNH4SI^Od+CA3dR!1_&L5(s)h+h^fM)
zFt%i#%n!V0`m&0{I)%bvjLov*1+)U5rf71M@9M;gdJAbLz5!~Qu?VmJfD6%GfA>C-
zL9sF(c1!4`QG5|Lz6hTNnkucr>N^w=?cNr%_8R2cDrhSVNb4A;S+5&H^kiFHY?SL~
z)sqV9qPlVN;rH7ac+8I@BymbLkzJWp$nP<MDL<;OxBpjpFYQ135nij^f_fC0gMA*z
z155Mb8~?sFzUpA4Jpjdtg69vQYny0f4d=R*(A#r~gmW?Dqp6NN#z>2SYF^<ul-ic@
z51mszv^AUc=3knn>7p6YjwfNl=k!=Lp7x5WW3w?8iCvyv^VDvxhizZLM|ku(d)uKT
zXKcwvs&sRr9W7uy^Xg+(BCO_*^GlWFiW~4bXMU=SeOV_L<JB;`L>BVixYe`YNtqO0
zS)6k4vAAoz-qfSy4U^X{V`fWRf3r6pStygsVV9l9dVw4)DG=IOqvjrD*%JgTk<Hke
zhO_9#=7=L!v8~OOc&L+mQR)hfx9P8^8F$mou2)A(zYEl0l2Gyn3n$E^9HAP({CtzS
zYpN1~&q{*-w6&)zWuQ6*6}9F2xvX_iGgEGZvK~RHKs2V_O|t@(^^E2LtsMKZdTSa7
zjY&ecl<G|d-s-u8_Cw*gBC|#mxL3P^=$dyBO07q2SL{wyn82Qu<`_Cln@v9U#+U)y
zj3C;fILrEfW$tn8X!98h{(A2@m@oZj?%TI3toW|F0yI3{B(0$XkPZej2b20ykRpO+
zMNa+KNQAJPNGYlnJ%0dR&DH^~R~Ja(*0PD{d9l)>WKG_%ZJ%-%my|vhPwxk>R>O<}
z?|5H#tptP1;EgqU1?C!qMIGq&0@6G@CRaW1O@FUM_RQYiV~Br)Wq$SsSZwp~jV^UR
z^Lm?~6zg@7r}E#)7!BLVtmKK=J3eh5PmM5hqc^=b?K`9QYFXvDO3Ezm3C)QN$fcE+
z2P>`Pd(lF|X)ZnCSiF(T$VSF^r`O|Wxk7Wb-=o#(bu7n=%{$y){H2lOeyH8W7z5pQ
z)Twga+QhX4F?XutxfcpHH$rNUmAmo1pJB1DhUc0wHV*>m3}_*z+qc%A*Mpts64%;<
zLQA!xvN~nrbuN7hj{PzmI1YtpVUgXk;nj;fUrPuRgN#rUh=sKWt<iJ;mgq`4>d@dz
z=Cp8`{@u(j&KPZ*EBPiz0h!@G_i~NnKM0Iza=!`|Fy))>cz1a({HM`5e|3+W?JneJ
z2LAfw)#-qm9}XUmjy+VuPAOgF`d+0cH@RFg@m*#pq@{)gzo*Nd3bw0i`#M>8HZ<QO
zs%(h(_I55DYiK$A(Nj9E*B}o%%_+1;R)qHq9=>huR!M!hx#qxyd)7S;*2sT9$Hv4y
z7HY#yC(6Jhw#KT_z^U7=r7Xl<)PCc5YA1ExEZwi*>iO%Pq*$5b->O}IT>NdowbxaK
zTha@>#HfLZx0T*IHh98N=EZykJ8*VarYyoZU0fZQpnRr1;e0;|wUUcqAt07YM+gOe
zkrj7**RVg;5vB-x9q6_7iSk7<6ChU+X4p|zgkGtKXb#PAP|-j|1@+uN{%nyCx!DNE
zUEPnffWK8ZRjZ4}01RVykE-^1iq|%~KdFrcbRU>HcGfIezM0>?X(wDI^bwXbUm-B8
zDdFkMgOi<j>u3Jz7QF)UStWpI)S`K)AJy?{lxnH1h(>&#%br$Dn-^Wk-sKcN797d3
z9I0lPNDx|9Kr+FP-noca8yULw?c4TpH{!@OX9lm-@^+Z#7CUaY(S_{)lP@`563Sza
zxN!)!+f{?p4nOfVd#NH-Y*nBb!H#@8EitdS5_j&{Jy6Qd-!&-mvtL_rj`&u^u0?n%
z&nZ}Q2r<gRDKl;#EZc8zIW{*_EHRez1E03rF0vb`uphURTxPzrb(?YMsXpTt5}?Rs
zV`>~D3S%wHcTn(ME8~wz%c#vL2(Ufk`>nUT3+zkcb}4vt{z9g6>r6jx?@V)NaQgAg
zUz&&PIbT~l?z`l?yCQ)<eCH;9`xnA#a*y8k_;wa3E}d7Q&!k7M8jMXaA1hjd8^Nth
zR%{sg*@edQJV@TnYEJz>PB-`0<g=@!f3*qCsnq-{9prmi6WyzSFM7zQY0PkuT5&7X
zyc;thjgp1jgTEK}0vo=ic_02_B7>;TaLMS>LXOIaX=e?P(5Dh7A~&IKh3M#?GWY2J
z{P^s&_u7zmU5xF{71vbdPxpU)d4=mp_Do{*0@imevN}%e^=zqx2I$kJW1@(;eDf=D
zX+YUxjpsm$g?3NDpvs5&$EMKvTEz}_FY)Jc^LI@0oc}myW-U)G3s)iC%NpQ<bB~t%
ziH<r#DE?de*>rg8D;L=+taF;5-|bRqdu*HR4(3P}GtLK{ue%=CV|<p>I*VN?+nx(f
z_BXXb)J#&8%9FXx2$S8Ds*c8eL5=5rwNfT~=PHxcCXyKto&Jh>ez|7%X7_MeFUJH+
z?SkHFtK^=YEuh)Kl0Kc`_|UHJ($?O**HQe!P!3v@4aObVgL;$}YGJgCXt=g^$&O@0
zV!~GSxVH}AFN<kp`nx+-1oy;lDB!IE35U%|k3V?gIu{R$Ge7b)Kbk-BlwlhaptgY?
zrkZZ9nv0nCyM{2(N5f5QEJs}0uHOQc)kDjYx`kW<FsT<fwM*Rnc>*pSNNILDP}Q_V
z>khlmTLD(XD?Ymq(*G0KgDidP<+gZIZEc$?(;eZmX#G%Xq*fB1Q~_kSqHgJ#2BZJW
z--1d++*T@6EHiE+l~bG@GDYjDu|uE^3me;+4r-+bxEAy$Y2IX+FoiXQ1uOgvEzrQW
zDZ4o{E}J@7GR1n;hUQf7C$AURjxtdcJ?{SQ64YlN1bIyunH@=O!itU(0{8lgjXI=u
zkB3M|h#<y0-U5<yJd-_PF=#5{F*MyABuPWVKr{RPeb_6SFxtJI(3k(e`qwVC9YzT~
zJm0#cP`>{2Whq<GHqarNCr(y;@7Emf8&2~@e-i`cbbGG9G(=}<_kOv<F%h)<*t4U(
zv#M^qcC$<h5oL$AK&?=?CC^PR8q^(ZKF50J%N!%;!pRl}gf`{5v!C<f_uD>SzN3N!
zR>R%0Nk%VjOWbXdd)^#SeH!Q>T_)m-j<>5Wv#QjtS&p|`GV<D*18A#qqB2Bl8<-{(
zb=p$vO?cLwTNEXTJ*&hrK`ZBi{V<1f>I)#^R|&$aG~~vyAtYSnd5_zC_?Ohsl!^Lo
zAAErA>y{jY99H(iaoM|yY2UQotJy%r=RyE-Ix=g(?tUbU3i@=WV2rbu-DLd+xy&Vy
z%!Mlp&$AV@1_I*7B|Q^P^2~*y%Bw{ex$a0x3BIw`dG(mKui#O*s{}s6jQT&^@6G46
zVX*o|rM%qSD&Req|GC@s<jG54gd~9zAjQ2!^t;iqZ*n6eIFEelt~XaL?fkr3@O=d`
z+-Evx!^1^xqkXNck}3qPJMU82)qgs8Q6gj(n9d_)E@Ea$@|{g*lN&oxHS*TWIDdF1
z0`_;ghCkfQ7;2@*udR!TUHwbrtsDDjsH?o2|9O0cC2md8k#2FysDr11x{B$%5M9JI
zxX;Wbpv?S}7MfS$6t`U0H5t+$L&?mB`@`b9iD}p%rv+7+u`=HE?b{*pzuVMrTdcOI
zA}V)SXFoj+qq$)bE3Yp^`(!p@@LRCkDK{7p|48WNqyHLogoQ2!L;WZ@6RVpSGou^0
zUfXryrnWb(Zx222daZs-i4}q~_BVUS@H)7R5=3-CS>LkF=%ZIj#E8l^8k6^&BMc<g
zOra4Pe$TcMgihk`_`JjA2{e%mOT`@N=>MhBxvr5QJ8WbTngb3!z!hx|60NCZk8_~Q
z)f13gb7mACktELla~&{YJL1y`<(8Udu1tLFZ8>3A9QM^b%zq>Fa7vk&Fkj3m*p)Ln
zr*q*E3c=};9qSao5^apeS<~m|B3%hl=+;MH`My0zNy|w-l1lFNswFIgHp(>8n48%L
zPnK#{!fcw`6LD>U!e(&eKy3}Z>UV23mv4z@!MMf^t%0JKguoMNbDbwY`97yhe2%|!
zHGt-m&qF(7kCC;hGX7F@u`hl-g<6klgZ?sf9)_EPS%@Tj@Tdz3S7$b{I@4;$Q#u><
zkA4=EHKh0Hi)D>28vW6RsHyR&Q9Qf+vl(<~=$`>?%GzCH{}5DIHu9Hi3b5s?7jO~e
zyYcMI8H3CTw#jr>M$z*WHA39p_gy>aC;hYCCpPZxk)LCzZ7q*HL>5?4Hi%ZPcr9Kr
z*OHo6oR&qAKLIvD%Frf$w`Dlw@As#1XHj6i_Kk*baglZ-FM;=NWlDxvBdFcl&2Nib
znt_>Io&4n{0H@^CwT=aeclfo!VFRECccQ<3V_#Oum)cv1w)9i9no@Yr`HE*$q#iJa
zeSH7{kjzkTAVqF_L`A1uqveF<)V0vmZ5%2GXM+DiD>*8%??}taUKbdiWg^9~FoZ9$
zle8=!|A-73wLYrDz8QwVQrz*R$<}B4p(e(YlQ=8cjC}GlGTuL=R%NG~;WuYu*iQGW
z=BK?XUHz;Atg{6<!yp}REJ^2-ZwtSlaDc4``7ar!;YCbNV8q5^h<iI3zt^&Q&^9zx
zn&R+8S!!1B8k4oxr&bNRLh1p~{Nrj1@1u;i^ql^)>dDYRSY=1b1{|n3otu}v?7!BC
zLbi$!MP^*6E}-`DO_|@l%d@)A<Zs&ANCQ08-oxq`ZkX3L(45?%@`weLz56nT2ZODT
z#`rGvsqEmEgSt95G<$n2b@rhT+27hq<Tcv08lNkUM3m4f;1`CUCTea67<Mi&e)=&B
z|E`=<XcQSd0GJVxSVH9l5!~`FCM$?%m~EHUj8uXNl3>+PuSu&`Cc4j$tTHB&0LRhz
z39jG@%lvCj&ECg~eCI@n$PHOoiX|qbf*&GRlrn`1OL7D1imZA7q7Ijx`!$Y&YLtCC
z@}MoCzQO>XIeX%+-m4xT_5eym%bT_)f7=1gaw~~nk)Kn+S5jUwO!0W@d71V0$|qb~
zDor5Ux5i<NtV+gt!>Wgf#;6u(PB{Bg&_P|;dMG<p<B7zzZu$G0O-j=1Dm#6MCOu`1
zU`glu_n(y^Gp7ZV5zX})hkq?y{c1hKJY>gn@!qLAdM<u}fiL?>+n1pl#bafrG6ULT
z;)@s;5_%$UMwQUG4N8d%uJo3D%&&zBT+*`r=B8~I$g4dOxHo%~x1-(Rw%7G|CBI}v
zx}ob?g)^qqOd!PGk$-rCbWni^6HY|vF`a<un09rvr-<@?+**84oz@VUaUtTAb(i(V
zpwmK}A}`eBB8!croS+L<b(JZvcPT6N$=Z6XZ7Pjv5}_hFx1V7X$jaKi(9N1rYnwKN
z*u$kHQ42hwr!wv+-oitRB!cI^7%*%AYg*Y|Y<L9Ancg``dAZHL=jalgxzwuSb5UG9
z&0YI0nPNSO9d2gViWe9FVF;BLLOs1=E(l@9kbMBoI73tx*R0ZVHcIMfE0&pduWDW&
z*!@h}ZCJr~OIyDFc0z^a6;Pr{EXx+!L6;|m(&EK%$^PNqLREAYYj*&X&b=(<!K_vv
zd1eI?-g*Sebb;3~-VQA)nNI~$tg_?uUbH*5ESm~<Y>ybmSV$myUD_3}4ZTvaiG?Ab
z89`J1^s?V?2LykKnG$_UbLXq<TXGlayJB_rJH-*a_Z!fbp?@AK1hH=tykQ2I`GwQ~
z)TBbIQ_*S0m;r4;1alWa3BRRtxgEbwl9_CKBm4)?YN>U`gKhAUemO!Og2=mMxtBRS
zjRQFZ-g=^Q!rr<zw>s8_CTIeUU`VTR^u)D{0Rq7@1hcdpdSZKy!%Y`eFNtifuqw<<
zaH3+=7;LOITXz$tux=R-O24{(IW0-f59zHJeS%F}^k|tZS<KCO_1V&|Ucu!KA5)+k
zdGSPY0Jz52yrI_Ckh+)M5<786bTbdE6XB+Af3sq~1e((ViDE&zIfDp(&09)hu*3|f
z$^1tc@+N}i1OiH#4t%luQ}0#xgCc$3dnBc>=x>Bt0vChG0Ag5CXu)})3ezT}An!hs
z{#ZVqS=0FbR>pIPzkGbo=R~&KX`E^6l?gha)%A~T%ju}FkqBKoav>4Sb-fAwuKJMn
ze!a#GWrWe=2F%aPhp^T6v4^+9SJLqRLj3XTNlkA1UaVE%W}_0Aa6&l4XHyaJ%b5q*
z1QvuMRG^47T?UI8ST~J#9``DH`In}rTf=0{rl@Ofd(_NrgV|7L+myYZtE1%j<7B56
zA0u@F&!hq$23&zu>$Bg#x|aRq-7IE~_sf=q+VEq^IX=a^`vJL~e#HtxFFM64u!ZXO
zpG)VF0r8G#;XNB?=shP=#em$~(-1jxz}L^&xxynO4{P*H`zG6CKkTS&dhUihRP+A9
zg5&`~dq<N&j?kkR<zgqC+xgdEX2M6K05(djfYiIyF^*4eqL;%WNh$0u(e@GxW<3I%
zhbt?PuAi8U@2OUvilJ&j$5SNnL+8r0`K4i{Z`#GvU9%-<c#<sB^}*PJ#?Rdv684!6
zepV`KHg|2$Dj$>?l5flA#EB0a$(hxP&N#+R;H2L0qo?JVh5ph2vK1v=@_C85p{?c_
z8awAXZKL&;U2W<rwqLqUQ)3$PUXz7h!h~=|6hO6<(EB>_Uz*RK6Q6Uld!K=f8y2Qy
zBY$F&8jd_+`~jUN8^W>~dzhbL8<N8hQ|r5J9U}($!~8TF6lcA?0*cLd<-X(PKlz60
zcDL-^tJn5&+EE67lQdki-Z0c;jyV^yV+OmZJ`f3xXu2!2^$r*MmVs8JGt@)5=floB
zO@1Sp(lQocIdZuyvO+j>bM!tDhwL?h>(4viH#6K{T~@4y#Ge-2o85i4`7x=^Z?0b{
z)7e{Ffl9%6w8Q#_9(3S&zgVs+50s$Dg`2xKl|ya^6z>|pW9#R4px#Z@Uj}ZT%cVTv
zsIKtelPj*#xh2A@D~4`}^m$nRo(hP|4M-N;|8VSadndU*<|tq6WvP<kgwKaH!Acgb
z25dY~W}zj`*KxJ^UgW{dGm*I3aO-k>!I2q*F)X9gt-^xQ{4Y&|tzAL)lCWomN0692
zYp7}IT9AK&{f)emKX`EC<6pIesXv4s7q{EOQS=;=l|}J_tJ1-#!(ks{yXU2#WnX&C
z;9qJCn*j*t6u6OF<F-(LBW2Gj`RoSst>~WyWzj$9R_8nqiV6V<x4mEh#<)7e$VWzF
z{J~UfSt$JQ-{1e!KD_eJ|G+4(EY2{hJy=y!YRrFiM=q$o(5B%F6tQ*OJGd&xDEK6K
z<;rrox|H7)mwg7soeqKdQ$Hya6Y0(Q{yt^^{nST(?x>Fyy~F)(9B&jg8iJa8yt%`F
zUOsV{7PC%Mg}nt_mDc~$l9FJliuCR@T}I0_gbkB&8k6<fJw^*Qxxa?j&BS>iZK8L-
z7TlY$Qr-R%#_XK&Fue154V@cyJ&dJk(TQ#Rv$(j}6=X|R<GNgGGf=-%dT2XdMP!%?
ziI7>>zb;g!#I&S1=VfwI1;<tv^A8>BGQH6NLTewDQ7L3NZaj(mcrE#ZBnMdjm0YJ`
zBD=ss?p>vDLKaZe2=?!&pz{Ai*L#Pv*~jnSdUp@iL2cUFyVRa{2a2Lr5UaLG6ML^N
z+M2bZ2=3MhB1w%HiJ-M=mxxqIV(-{{^~rNQfBe4B_jsN^ufMM2xaQ~cKF{-Y8Y%a<
zFa%vcMvt+X(tb=yh=`l6#k>>bZ)DxTKS?)R@9W)F@6Q3S-3ryQwP9$QPUaFnj~wQG
z_|pELC*Vl7*o(NhSE|_)_Q%bd>8roC0X1z8Rh{yuELtTBN$XuH`c|nLB7#t~@Ge#+
znqdsL5n$0Azm(0aPud(Crg3V;ns-7T=Znd7Ago2Eq_I6WQ|Ck7%TaII>x;@uv5a-A
zll+IQDm}mqpW76v#eu&61+l;he;oW3zc7I;eBIq0{`UH#i^PMVGWb37TL|-;aM1?$
z%k72=ra|KPlE$2)9Z7`@VZ>$CZrP0tJ9Fb8V_U@55ju>mjqQCC-$qG7OrdwUaY=O{
zJ!Knp=isd9tYnwc`(tBpz|}Lt3cAw%ZA&Qj@_$(m25$Ju3UU}0eOH`T_n*u7{xGsJ
z>7<b#EMd;9c?{4V%r6uCZ#_%U<5(%%IY9cl&P~{yjkPy-@*OB!CekopHgnd5z?0f2
z`PdgOc3DZ?WYzqbzW-_6w2gjsItMJiqr-cwR_h5fUfK*Lwk)M=cFBLb6Xo>0?s-aW
zO92EMuj$a~o47@u{?mx(hFRvIc(CikbA=2o_=w7dLS8#gRuS<I>yz#Kxy@@r?C^n=
z{_7hmLetqq#HakLajM-_Nvi(X>pW8+6ZM~PPZcBCPOHr3(Q<4oM(r<4b$8-aSx+u=
zZuAk@AqB=ZCYTSR|G}cM&=)OM{kOCpxoAKTU1y*#qO$v+9I$4%pTAO>bl}NdG;O8D
zOlLKJ(m*588!?`veN`W)yscXs71z}=FN!!xqqrr@%}70$C9^War2q?E-Bpg80nhe~
z#Vq0(x985RH_rH-5P$Hw*7vXWvcoEh<1%=?cSEovizvw89(`nfwju69poomxH_SJa
zL#<cXW)BQ+a0WVPcSY~atiL?*>~FW7nVFZY|J_jbB^7HhnvDXZneqf|dq+FCCMX!-
z)4#W6*xeNb+&0bW;vMO|#C^V!-fTY)_q^-Imc<|8gI+Q}bz?+;H;49ih{is3ZDCgF
zBNP=xI<uCJP_frkXQY>#SbL9U;$1;x(7+=)TH!{rsg5>vXW_AiElLATNW+r>jF2cE
zi|UNVrT+f@#qEO-5DJg9^O%G_Tl%RG?kT)M)_QYRjAdJgrg{5bw>3m{7r(iW1Jds#
z)E@Wq1T_0@EosR>YHRD3J+ZhZW*V7)w)jR1zrkvF>e%oqf6BE8=u#%t^Z>g!(tYBz
zS3``3*8yH+@lhaNsId)PaB#Dc6!gFWK8w6@j)g9-rKRsm_Ic8O{nFy*yGnc+wi+<z
zNR{g;G!VmA$ADpr0#SOz;7!6B-g*t<<N#e63bs&mvFMDZUW~E-tf#vCJLt~^%1S&I
z6%`JKpeV{!c>2@Y_>e3SeK3CXIFouPMOXvVjYKiQNtej{Y<$O!)2D|^57d5~CBN3A
z)ytql1T`FNWt5z5dGAKn4QhX!%_WjOAxBQQa1>|MYES9wd7Z~aXOm;D?QFl=fcrT-
zi1#M%ENm|#0r<@jdg?2`epBY$#>c7eINhOcglu4wXI6Mrppbk;fYw7S31Ft>uQggf
zze_@4rbxTBdk#v`S$O%3;U?t_%!W(>dT-G)>dv0w?7|bzKsV&|hriUeHTlxX1<yZ^
zUs*)(DybS!gO_KQk-^?a47UJF5)h<{Uqu%f?*Oc+;R~d`qlmN?>3UYU4?ow@VR*<q
zT7fm(5*Yw|Xpd>uU-np`<=r`f2z9r;sI8$Z0?-;Y*wMupE#`V(@zq_fU&5Zm<|h9~
z{9CV0h2YlVwacf1{U<*3wNXCVWfUq(aGQaxJzCL}g7F$~QHo9psu#zhOd1SeeLu?S
zn2p%8-uV5r8TFTi*~$q+K0|iNGMyMZ<EBQ<rm~o0hoh*yy;lHavqRI|;lW|l+{M2`
z|HlFG54JZVLv<;8fJiP3oXJ-Y99SP0fvD>&ACirC9+sUKiDudv>|*lEXRlx8bImW*
zCdj83Y!v`T4KSw7jycicC%bAnF}Fxg**bkeG0HWy{fs1*o8&P`?O<1U;><KWSTo!+
z$d0gMCzIa=DtC&Gr!2dF-;U6+lTtp;or^%hsCI;@4ik-I8uG0laqib+(-)Tp_->{?
z&dkz-k()k^fe|8`{?$x#i5u9+0R>oeV)79NHKewEdOu}hYQsI>!9%#Y79PzU-j4Go
z>IY*A=ax4Qs9+jJw2)PX8GZdQx?>!zBdVt-pGrJ9oJpq9AHi444^CrdPou3#dVETY
zi{(zDId)VPaL^uhF7avq4=bZIe@^bM?RnLuFin5cv+-zme)}P#7@fT0C`~lZ0TUD<
z3(*!E6h}rfd`?8d=wVQxtw{44*IB7+e1VU~#mM_9+*+!W^-Cj}rop=(pp2lH^<4+q
zV6(40z%?;)T^fZR+yu-*-uC_fcPub_4lW_y@_wOkPaC-U#p!cjJh-9na$Vmn5nxqn
z-e!?0IXrECi>6?GfT|$t<(*tb0E)TGGZ5k?vP)*c;LYYjBH1;O@LNurkLtBq-JyMg
zgg5;lS*(7#@Zg_Hh3maG?oB%UpIv-W<Q`AS@1!0!(T1s9!q>W*WZLrHAsC}F<Xrtq
zTULtgOZ#J>xXaxfdn(^Hl&z7BJNyCMKnN5=qS$wNVjFE?Dsd4;zlNU4a-0I<ZDh~t
z-&aUW;)~Z)xb*6_<8%2JWdn~JIn;AH0?RQ$T?@m7ibPY(X{qBnm9jUO*gR>+>op7$
zvofRGab@BFg!oeV<6;4W$9`$%iIXw{45y6!`Kgo@nf1Dk5S1Keo)O<efA1)XOP$Yd
z)Z(!ZH8N`m0Xgi*UFq)exoU5?LLB!Z^FZ1vj0`;}@y*<Mp<$^W<2&C!&DvzE89M&F
z5~^OEj}Cf(I!c2yfnMZ^jzP8}&^m*SSGN(<Xx_rO?$FMwaT{0fedQW_AMmL=<;JA%
zMxyhjiRvZD2$JVTyk(h(Vr52IyFYO^Bv<=6?zhe>ozb`@+sPrU)9rnL7)Y3WDiDQi
z%i(CRmulE4xY(V(1awY^W!(vRp?}4vz|6j!+!EumF>u}_$R7@ncjj!CaA!<E;Bdu<
zCN>p`E(S<7)O`pxPy=MO%F!1*==!F7UXPRPgU<>Ll<Vqo>vJ_~8|0WHh3T>mZHlv|
z=abEXE!Psh0s@%VoDbC>zSdf>FTR@@2<Yih*DbC8_`acj&@B{tHvlge_07>h_%nYW
z%tqjXhdoQ_KNzCBfV{^Sp8%zv(h;-$Wp_p=p75Llm}C)&$u74jme5cxWb-}VON@^D
zvZA%2sgZ5Ht0X7M9w4rVk{paT(7(#GzjK+$NFe}9=EnFf6`>W(rD#&j<zxHQlMfEH
zTW;J(YB^lc*4weBRSpK{dZ|WV65|Ofe5;tjs%DP;5J4|e#`=!npI{3D`djI+Zz+Y@
z_qtWOlU|d3JlQ62c4Lpt5X_MY*!2zG?ani-4`bGWNb3o3NVGe4YpyE57%_5Hqx;N2
zX5D{bj(3TZ(!u4l?pIg8FA1sS8}=l=`I3Ms<eO_6U~T8VAR%U|__O0ka_bPk#-sp>
z$l^LIfL^qZ$Ml;&;kt1?6nCz!B8=Dv#12}Q7GoX6uT@*d)dYi3bTlHvPvDJG8{x_u
zKJS%ebpGA{1Uhr1T<tE?2veWZyKKZ7%w;dr3#Cii$KC#EvXg52#CjRdHU;cr!b;!{
ze66BzRL?bCb*yi^sqN(KfYPtJOtBz*EX7ZUW130V83`?ST@M8|1czOs*>8KlKqSxZ
zyE@8Vh2et}r6_BYWqr3dIj8o19Le#}9YUR^F2HG5_P-YXyFR-7^=YM(u3*=V1|J!n
z+CT}r`%>?9UR50w%H1xbpRj(wyy^9=*%)PJHKZ<D1?Z1jzMGl1t}^J70DGF)<Cah&
z-7)Gv(Vne;C|~$~CV2<EKaiSS{`q3>tCT}f;?Uk#RiuG!PmYXWo}<({BEE@hIo(S@
ztTMvGV(!QNx@^y=SD|)P$%Rs*k6iv6FAaGAsVaZK8~F88{>_f@>Y|yo^D>#I>ZaNL
zSd5Y=U2c?GV5)ATIhTMmD0Y<Whlq(tiLGqNr#M(4b-l5BO}9D<%Er3mfi^USdUN4_
zmpgo0Qh%*patvMUi{jvm=4oV~2FPl;_h#k~7$J&t{eq(!PysXY4bfs(&0jE^4?!yv
z<8JADKyUI~c!wEO4n3kMg8%->$RNy<XWCCD;MUv!-lTk~Jl~9abgovzyezTB+@9q3
z#S1)Aa|>2C7@(Yk0~-a__^fVOv`%|4t|?0zD`6#SbC(WWPN+LZxyH02zhlGSfl61y
zDu6u@fq*j^jf48HFJ^SUz1(qg?<>oS`vNV>QR3Hk(2;#d28Hx9OUpo})(j91a_@s2
z+N>4-939c3#o0L+8g?51WyU~3KnJb+hbyUB6{myrYpAI-_8f@A#$OhJ@_VdvFO$AK
z#OVr8MnCBvcYb}LDTaH>#Q1~pi$X^{z6R0t;awG=DmQ~N>hS1fv2bw{b5XTb&^xVI
z|E#Ie2pl16gLh<<{-EmzqZVT#x`Isk_m?|l`__*pPx{h+oq1>76hB83A2Hntu%nL~
zc_$_eS3I8g>89CEHR<zqsCD?0ccTpq)pa}*lO*#;ST)41IoJ-S)UU1nv>-Y5kLW!5
zv95A;z0LKs%CAC9oKBey1=dvFEK@zUT%IUvy)|4ym&`PEW6n#yReN5nI2PJ(om$kT
zr$pbSJ=ugj+m^~L-MJb%<1hRsh#}x0yP&sF7+@%OCGo=+2Y70$|5}HirKIfrUU63;
zRaH%G(f9HeeuJe(w@nm<#1{jXE!sBmin$GYa1|zc{kktJ&`S082e7lBrAP6#4vwbM
zLi3vd)U`YJZ4i3iMo!ERZh$|^iXo@F?;!Z`^pf{ya6VV%(6MXTm_jZnS{Q^2X)F8s
z^Aa4C6pf2U7GEX>2m`J=nj2M)2RME9Xx5Lu5G3-cICG>p!6%1f+FUKs+1hf+qOk8V
zK>%DcWMTSrQA1B`b@0%Ce-S>rEZiIUv{xiGqGoB9eaZ3{h*X3aFi94>t*2T)O~_<i
zsZTVmi610k8=|*=>rDI?!|^D7tb=|b&<17UQi~|`rXW6WJ(k}BE@<DyX0n{sxEW*|
z&+GAw8<W%-#k-$yi_*PO^F0unsh(X0yA|*jdjjLUZFCg?Fd0m=ER<=WG!~<6w?B6=
z>r~=cr;)<505ykaQ^Vyd9_23Iu>n{7=dknYX~iPKV~8Sn)4;y%QBZW}9S9_5<7zAY
zPIIlrt88+knmcAeIc;79RdK8o<o9Z@@VHBfPT@N_7L#x|a2h0^PaBStQcV{67;we3
zpx{BV_-u`E{`Q06iG?1R(>Y{B*pBf+|F!AGt9kROWwG4=!4JM1au=`F-82cy^HOrF
z<SHjc;h-{N*VO;RR^zqXYnYnr*VFbCA69hUhB~j*Yb3ie&xg21-6A{6x^H#2Vzvw_
zO+mTpUa-YxtY-Y7=1--nKhoM4FY*Q1=%;Z-@|6vK)$4gSjI=T@HkO-7c~~1yc{u$Y
zsupYD;<ukuZ)lOF#RLO9#5m%^>#q0)Ns=;fCXuOxT!@C=z_BuudD=YnJ<YqIM(4Fl
zb!u{-B&*(*DOSV8&sUb=Qs1E3|Gf8e<;2r>vzSjE?)DZr)ak|Ue1WAud?G-@cp&8=
z*4C=caDX<S@$%5Z44_@Wmg^SgnC&Jat&7@Oak!G+=8ZXO4j6SlZn$4A10TbuSU`?k
zPCE|+<-1fm=^t)9eozXdl+}M(XA!W%R-oVUxs|_J_2nGt-sgQ#o>kLYp~Z!Unu7E=
zfaH~K&?6NRbvHJ3+O<9zUNp;)1f&ONP$h}+Fb4IpF{7xKs!~21cXWD1=7?j#Ord=P
z^Q77B*MmhK{ZDf9uVh2Lw*7cl7m5s9;AfIqqiQ+hDphUt@}#wRZl;;$Ta_8~6wgZf
zXP}v6#&0AsjNAaA?;gO!m|Q{h9xOBy8R?;j>x~r=y_d+r4KuWR%sW@dq-v@g;@k(?
z1aFOtM%EA`IU+oLWOeXi<$a$2%p|g$BpcX!;74>FWp^sM!p%TS#pX!d!Y~MRsDY;K
zCjOYt9fI*R&IT3bNg586`{xpHNIBy$(|k>)YJ2|MZ(-|%NqS4lZSOjk%w#9_{d8mE
zr0Xy1J2Zg=#|A05piH9wgLBafGja~s1(OW?^-AP8#nz*YF3lBf?>T9ne5-Bjdpt-R
zs*@jZQb6iy?%ZL!cz&bNL*!9AI4lA=T9cVas0gGng@*T`38z;qj(m@wSl6wKz1CEk
z(6IGSrGaR-hj;8+QU;R?r*a#lIY=gVp4IHX?w`1N#>wLK#5M8_a^){SW34E(tL;!C
z({;~RZ`6sLN63!XMz3{w*vGWmT&-|op9m|A_Ez($LMdl$`AfT{FEn^wq)J6JtM8W&
z?Be&^=1k+6y)MVkM=UO|6;t=imw4};gDVRD5#UpgdcXeJ<Q~o_mTz7-=P0td^;&QW
zNAk~?Wi*Arb@qm$qPmkAOOXqQ6n|5Qx!3-+o(>a%uf5;7qjeO^>aZQgKNFgNj(hie
zn*j!o#Jgm2!ZVMRj%4-?M>BV1jLgY%7w6);Raa2lEk_9NJ~i{v&i+KT8Gq}QDyHv!
zKjuu2`g&P>IsG9_w8%Ber^1yB0a=SM+zXE3oIk)M@|k`Ku=ruo>OM@zbLc4`EnvHj
z+jL29$?--4w3q|x^YvJ1J*yQc2*X)_sDNCVes~G6w3Qf?W-4EIW^YgPIq%&)464xd
z{G0eVPPWJKfI4BJ>x(K<qL`W>+U}g!@jQ3V+(FT-X0XO>hi*HQit$^})!)V<D8gtM
zV?%!!Rl{p1M{_a;g9V}i`Oiq71b<e-tf+7fUbktM>v1JE***K-7Qv)P21R&!_CjrV
z$0@{7bFjFlmzrjLG?a-Z)r8=DP{R?$o@LJ1B%WVy&RNjAKg7A;S$})|LFC5VwfVc3
zgsgvUj?c>o{A}FwdkY=eFWDwBJKlXZx+!H!Un94_vaig+PSUZNtGRTDET%Ct6*%1%
zp4l?$Q`P-&6Y|EvJ}vx0YT{A<8ZI(x+W#Sj!-&m<jn+sZ(H*dMO6({kcCC7L|NXM1
zUk&7Z1lnPymKat`qhA=)ps3D@WY=}CJ<NY7q(ZwmcP=1JWg=1PA4K1$AG~#mh3qSY
zF=&GosUoNLNDF9GlbIOCSC^jNRDMXuuMxAIs%nurZV6}SRLyEid4n1J9bAe3=yBDy
zEmi3brGqG4PJ|UYsxCTy&92Ph*{yH{5h7jF)I|-erKXb%Rcai892Y@n)fV5<RirP8
zot62)@=)7yC0EtfFPFATT^kh5F+h$wMTmG{*s6%}ggUS>bO(r}7S9$rVC~8NJnkVA
zcVD#S9VfNCyq$NHJos+!v)JQ7H`J9b2Rs(CXS`fHXuh3Ql<NZNd4}R21Do36sTA^l
z?O?YA#k_)Tuf+ZAY|BFE{25>W;NzAD{PAT@w1@t{Qsfx=FN;cqUH^Fhp$2dWt@jPy
zWW5(yeMEP9SMsT3&amBNlF!TcDu36OD)d5!`6FT2mFrAdX8X@IQIM9LRG3%<v)cE_
zYb-*k+6<TB2!qH*h>oQh00Ap2V+QRSV2brD*DcLkF19T5@}(W&?o}<@yn1skZLSL6
z|0Hh#%lt>jXe9a(asXXxyp7`+$+AC0xm3V%p}SbrLN4SUEgbUmL25k<%cS*ESeJ01
z3E{%@mp{JNjp#Zb<G(EYN=xM2s$mSMu*QmsZ<GpJL^0ewmm|>j*=u5AuaY@O#5eyP
z`+ba0NW0vBQK;l!jo|lp)2pHm@U*=_$_O58%u{Wk)RRzV4%E&O#yO4jF#>l0z~aEd
z)DZO8n8szq7Po)NB}2_vyz~b@o2~_`v0knY?qpt@!A3hHC@N$rb2VqpBkxqqC2F@~
zB&DPdO2h32gQH=peHQJU)ek#;go8g<9t>W7s4(+V^N;j(`OTlD`#)GXEF5kWUMiXm
zk=^xi@bTJPr9|yGE}9ug;g?2g!H^+`z%>G9d|GzLi_!Y5zYq4yCu`o76v9{OoZnVI
zfA0-S%_6c4qk)%m%!*Dy=nXc4wxaTQh`9_~l)1k_Bx6}hiC>HD2kqX5ZSw4Nl#qV#
z86RkdH2F;P2h+Nj7zyV+lb@HSyEBrOW9@a-O|!C1z(M>#=AwB#!FXocCvAR&?=x}A
z#{^<&fgkMe>sxIAm1(RD0kq~@X*UNO<<83HLFP<nR~m<*(co+~-o&AX_mAP}v!*hQ
zHnXq3<R#5nq9ivP3uNow2SOcgCo!veRdtsOOG(-IsM{tIi(d7*jspYW<|(5-UD2uF
zn{8I-HVr>_-W}yjuSpGC3YCKJ8S$U=oBX4X_~Cw5AbH6E8WtnD2M-D+r>?EhqWdL~
z4#PpBxMFYzB(P|<yLh!si*IQ0d^bXDXn6YE4tDhqMjC+PyD#yyFxP7qYlj>Wga27r
zPr2Xg>!<aXCHH~-nlKt9-tD+UmKriG7$pKC`l};JcXpGst%4bZPBHca%U3IKYsn1k
z(8f0u7a=FWKir|X!6C7VTR2Vs_5AIOOj#wC`h(bAr0=}x(B~NTJEv~Q*N-J!%vZmq
z?RlV9L0yJ>v^*|gv1ds-R6e|9#-ky351u|3ITE>PeHxxr|CnN{tE1~?9r5vH(%0C3
zI`54yvQapo^|K|J1H0&w_nsp8-7P5@lfJ#v7e$N3Dy-78B>if0gb7KU#k*zZlu^e)
zUNaN+A+z9S5ysrC*xE1_&FHyET6X;~(g-O5RlH21oNfvoCGcyuOMmuEl!IyiKp-wg
z6j>O4)m#2=EFswZXSYv|DQwq2h@G&e5;0sh+o#$YAtj!z4jaV;?}qD2#R%1WD=r_A
z^j|{t$@<(MY4j=gZm*d1d{&h1Ox%Wf$lX&FgbpV>?2}&*iY@xGSvoQ86Dh8jJ4gM?
zqDVyvt}bP8;GaaJ?gN&Yea|MRJLG4_0U@J(eFQ6XVIOhdXHpiAx`fu$m-14NaEV!2
zaL(M3n0C&aYr{SN@AbnF<hb3Fg52(OR(qSo?o5qNwxh)o^202dOLaq5O6GoxSIpqe
ztr7eg>sVe#;+}+Uu`t3hgfd)XNn~DODM*BJM9PO6R-x8G#b|coO~-pP<6wn8&V0^7
zk^biUC~T=13jN{AFYU?f?^naDmMkmCBTHLiVUORtP<-@5on6MuLo6_>1O_`L#aOAQ
zZ^y>JlxkR=W=_^ad;-Jx)xJL7Tq@bq;?!N&+C>RcFy~yl*m7n1{<7pD7Vgf^g);0>
zrx>xjpH}BK5gy~E2a-*WVoJr2aDl5!Sg)b4-t~wQ_>jR+E7FI@&p)}}tjd3_bv6^+
zKAf2;<!FVh(`V(NJxEV7-1Mnn^aT!uR4(T@#a1XzpYQJQguj--vI53&pD%I$j#RtS
zE}gn<+3;X&Omc<pKjYjKaP!LuX24jnkQbNnfMjll3wM<tntLytW=ymwvkop>I*86?
zg;{)ESuy}))%SCtCc!it>2qq{B%rN{VV(@e+?e8k%K!40#Y_5p3*LqjoZZWrGydR#
zaMwMT9G%#e(MKLRk8a;xjF8a7+^?0CWXrLniiw=RR5tX(1Z*Vb#Q9~!WaPaiCOA)=
zHqZ1#_YF6?0yqvFoIoNbwaz95%%RAiYbEatO{~Pay0OdxZN<;F6gz4BL_B3W6c)NF
zYu>SVFpym)S<*0Nn<7GN-LL^wSQg=`C$eUsP4|&+twdDqV7Un6kz)Ru*5+O5+AF(F
z+Cdw<hWU4tXHvYAd07oC1KuLcmx`Z}P&`USbxpE^y${1vY?WY7*kaT7#1e!3v&7^Q
z5W_*wBDE%2C7HGjfqU=&vU~$k95Qa@sob6N+TK_361WrlNSXpGAp^gf-HV{k$b>tM
zW;_T5O;^8i;A=iFH$}n#ivagf_L44=2Cmf-wH4p@gPvR|kdwJAoY6SIc<!jfVI=o!
z-D=~{f^w-@>u-b{QkTA$;hQnkvssT^{?v99X)UIrEfQd}GjKJ!?{NO3gOX`HW#;oE
zjmz%qAL3T$1foL(<=O;9Qe#j5`_q!fs<ZN$wT~<N^KB>o+|Z==KiVflUC-Z{XmHJ*
zE_?iY>&MIcgiQ9_$SMrOeHYH_{orUN{bvP0!LMTjsvdo0k#F@HCDwoBx+QkBv^m<X
zsFeYzv=S~1i*lO$%fh;#%|X1&L<JT7@xr-R#l{Vo)+N(KIyaPCcx<`SfVM`z+S2;l
z@|t0;)z_e&AM>RI<53B1mLYjSVnF()b#Nl!litll>!&+@C+V_13daamrp0|?eP3Ee
z#LKL8C`lDW+FX|KUXxI!?m*|LetAFshe?3te^HDtaDLQODcsNhv3=q)DC_*vP=}g-
zQRts%A}lpf^^Hu`R-r2!Om=X0X!?20>)_a=aAO@$GoaG?R?OyRtX+Ub&dw}sC^3C6
z>N!HK*Z;YtUs&*cOK;JJNn|llY6XA@;%+}!sxubMkKt_gbh@#x<5bI(hnH=JHC3jf
zgqaf#r$X`DTmv%2`#Hbv)eZ;ty#A)fKu>`|kG6#5pMUH+Zf?`$Y-{56kndNJ*1KP8
z$Ak85|7~tj*h>uDxal~=-mp?A2jYxHA?iyCdrqPz*Am0@Z_IM+Yb5%whz6v2T$c7>
zoAW1V2Yu_D`BE}%cQ&~wH<M>rglCxImzF1?YD#W8e@Y5Il^tfB@<rNJRjtdlKWK7@
z6ZVRfn#Ss?;@7bZ;>|l>@83(CZryvRG_D_EOBlXPh;f^6V)tJajn%yFwm5sx8X13U
z4)KIjF&(R-GchH?+p{#3K3_w<sM?uvyw@&wW@&x*LLK*hRD9cp^*;>(Q(3~^p5xMU
zr1gVcp10A&%a@JPlKu43S}jek-sV%X6&G|TFU@a-{_oqRT=va&DgJrF?D+Y6vNu@&
z=(OI|k^UIY;XCeo<u40oQjS}w=3szj3bO_k$dv;MC_cvC9x77z@6w2ZrRbl{cJY<{
zouws8%#_dx8IcP$swo^Hn~Zn1I<fy{@jxc<F8^g=k5s#pqm2n=C?h6w6Llw;nE@Dg
z_M}x()}s**?vrDo73*t$AztlGznhai?L`v0y}Lt0^(5{8vf$!tuX3Ws%S=z5!9CG>
z4ZPvR!QRua6HYu{Q@-`DQWEpU<a@pkCTU&|(Wg&If0B{VD}v|t`7b$bEg_DK#f!f0
z)a+UuH0w3uDsi~#xBY)vo(~5$D0`pYAh~)4CGqd^GrZCSw{y8KiH_fcQ3BgwDo;5K
z515o4|6)}Wg%SoJk#S`>uQn}Yh(Z<3U+h-a@#y_tBd_>iKYPme%ebhzq;1H}m!jpf
z2Z?G?e_TAzWnQztZHDYE10ZDNEz|K@xsS*%3GoWtc782C?yj0}`~5KBXgZetIy`gZ
znVhw>rZwRX+$V}Z53_e5h23n%A2dNShg&Uk{QSU7{jbh_9gh+*E^6>@rH0@@8b-}n
z-xiL$@A}nL)dtiAB{V-@hCZGsXlmn>sy03hY+44OiIj1gJ+5$OS+`_bq?`Fy&+0!a
zp{#j=>vL=8wO_;Mf6pWe90;1FUDWotgn67$R~~*)x3@rcw5|v;#}3@DswGsAuww(`
zwR@)wMyOZcmw#XRn>Gc&j}c;thyKy8ZUCRZay$&DZ`Gz8n_`qxeNas!-!+$*P&#sP
zTGSob(3+85g#NVB{Zqcc>HIG(9TBei4+6F#o5ckjLM-gSnDLsbK@kk(BuuVLRx%~#
zD8dL#>x&>)8DpC5BAzX_HFTOZw>?nh-{%r)IPm-Z<u&>jXgMbKz^8Y++HPSDRUp!T
zC2g?;wj7KvCJ>DvKDfE67sU53w_6s8cViaR?p~XC`KF6<;gC_nASr?UQA$y;6FZV#
zk5llfKEFOEsc5a-j;>D{C8$&p$f<?5-X*~WF7Xqpq8&}e&X$B;SLhitb{w+CM<~`B
z(~X*NltB<A%C_AK8x0*}wox2{s_INvIa33Y)#mvJ5PXR<`4_i3mKNVz4PkS}2y2Z&
zB<+!K9js488e|!rwm5iL+yk|f89CI5RH#QLU-#ALzgRl6yR#zy=e!6W)X@iINCA8B
zJ23^4oBE10ywXV|6&rEN%V~c1&)pSO!3XnqcePXpj>fMg`bvGfLC6_AaN4l8(_$=+
zc5<6$1qH5^^wGw&fMB9yUA4@&{ABe#eyu2L!N>ll&jawGdOZ$@(ZE-vwd$rZnSM~s
zPQso!$(Cw|i`X0b%W`O#TqN7oGJDY_^qjx^{;6Op8t0!85|V(0^~vSHnn|PWfGbYa
ztROPE>2NU$5@zea@H2#eth(btcyd_F;_J{~YyB6_Ko?Z&7f+&&7DomduCXO`Obz;&
zv3Ig3D@HqvIc&x>FK5#3RD*08^uv?q^L4$jieG<;|3{K%`R~f+6Vhk@@Z{`0zM$Rd
zKWX068eY3Qe_0q#bNIjp_}3`wzL4YS6aK@sRnnEPUqbxRFEm1qsPgc@!x_niJkrhq
zGwi@@$)T2|$aT>Mp4P6QWC=6^G>y|kD+S<Km@>aS=(i9Po7_{o+q{E1c~wV${E+L%
z#9`3Rj?g*APLG?fUp$14IA-g_%pW5SWOYws^6??KX6HM1QopnBid27AWu`_i1bn_Q
zy&TUQZ;f{}#W2m!dw?-;LgsL|l-yjD@^*7yIB-O+75X_@4Z0Fu;M97owajhYwOr@3
ze7HQmkSN4ow+4y9EPoR#m1c_F|0Asb|9R9kT;rf)X>Z?p)S_)Wbu!+w?R$Mfi{B>V
zI(BY)6Mn~4kWQQ}^9d$sJg$ggENi<p!wjjYuhe(FexclAj|*oBREeYE*_o-`<~@7c
zu7P53xW#QB=Jyx;AoO2lN4>)Q*pIn_D>LbDhL4*iK6Y(so`aX_A3eE_ac2v;c^;Ks
zGB!_i@;|8|q=e_|NOxlBa-}Zn%?WHOF>_`Kh-L?A9Jm(<e$aDN-K3VTF`3DuX2IuN
z=u(tgu&1Zid8QuZve;Fx?PZxTa@&N16YAAHP_jETM!4uCo2OVTU`vcS?*%MYTP(Cl
zB4HVQL?h)er#89QPUp3yrF4>;`1b7_2pYR!ozX!XaBcsB>%%dyddMjD68bS*Unge>
z@j_4ANk#sbzw{Wb)BVqLUCRR5dyg#IestN^TNn438DQ#yNvIO$1nS0yXB+Jd$QQ%y
z*#O{B-Ewbs$d{8uUg|O(CHGWDk6i|&r!|%xiT_}T6J|GcJ~dC|HnWP|GMZrf*nIQ&
zrteJvW#sg9pab|^y5Sy0%Y~u^g`5aR<2NVR$A0-FckFv+0E{OpLmP#20ao_%HNzGp
z5x`AMFbp-NXljXBi+SW$@O)MI+oWkisKz_hZ|Uhl$jJv^@_BkC%K{UtmUa|yP(jZu
z$(qbzRKr}#5m)qZ(~ZommIA>@nX<)&0$d%538E{s*C;Vanm+QVE~#*PHS5CF6W5)|
zF=^U5&P+|sY#vXus_JRhEH<Mi$LyBi%ahaS9E&PD=s>NLfzCgwahEr&-;b_d<dkRz
zS<AM&H6&fq0|s;;NdCQn<zjt75{&L?Pgmi{hs^2r2d@-ir_tt1n!1p)sT@Q59qjpY
z@~=HYZ?~%Zq)T)-3Te*jAJ+scwF>Y|cT4ueY_;`6RXu#mBqFGGJ>NI{>)IoB?HnBj
zlA1n@!d-RH`ybNDWxT^*WqIMHzcaAAzDVgk{(*DoOkKS!jiVb$Dcbw#>jAr@{dXYN
zrsnm+A8Rol-;8za1~nl(_O{P2+?>vtTB7}%I+G&Z_WQHpvvmDUmGSiu_WkT1^^}0G
z@5?^6m>9Tx_v<7EO6oR;5$BOtjWQ!8<x_mZ8U(XKV_Te_q})*GUMk@8yI`01^Y&rB
z2ItRyrG}x%j14Ka59Q^Ly1O3+>}ITu$i66OkGa*v9z9gM9`p0!Mo0_Kjl`mqk|mj`
zTW)ocg0j;l&58Z@aBtYVL~1;qrmECD4n6xV#%U%D7S_%kCEX*l8(6H`PX6igP+Y>S
zz@|^mYZzXUk>2R;t`;%gc|gpY&F5J+3HsRMXImQp%#=)z<K>V01+*!@?zl32xm3bD
z!&<@IG^dXUhM9e6VLEVGqJI~#x$)$+)8hs{$bLbUU_B#(kapj0eZ5DzbJc5TXu8w_
z7XSFnEh%~3UFS?cb!f5SurGSIv^XITti@~PdgTRnd$x>QiIadHyfNi;wKDAxbxqsv
zi<XE{?$sy%%<ULY76VY8QZgdOqxC)1L{y)En8~_ng3^tI^8Eu8U1w1_$0K_amu#ai
z)Auta+xL!>Fhf=3QHzi3<<C<ahA-MZz8Bj~1(pq#0hS9B%SnljDF)&|=8O!(CO@V5
zK^AHoa#{bjZrzHX>I!%+mPP^%7E!16QZ((Vu>IZSiOzSjm!gc!H$<`GF^cU`$S*e@
zhzCmp<i{#fE60Nk-6k@HKo<z*krFV5-OZ#nElOUH-en}d_;IF$E!|4O0I~s%ob|a?
zAD83TvmT~r%gK4E=W>>0_3R#DR@3VF(?Z3HLh()`2P!%&I7dG=Qe^Uzxv|irA;rbP
z;~#zyw{#Qq1r=`5Eq80^`@-XCjkoW%!r3=hkQYmM&&Ivab=-WLXRDZxwy+YOt%iV4
z`DeHGkC`6O>d<D)+(DcyU%71FbIZk{S)Zqit!a1gfW(Z3$ZajxXTv*EjL%JZl5vvv
zaZ)pOnFF`{Efq@FIG;~@T&6CXw|H>jrM0iu6#iw=k<?F1chFk&7em#=XSL2mV#DkC
z9{M8_5J8PXN0f47FeewSkKiM^c0wv!Qull(u&FxVG<>7Y_qlrTkj=W`$v4x_p670T
zjCSaLU^2J+;8+Ow&6GdUl4u?How-UP)TA4Bjc)!lSWT;(bWzQRi>$1F(N}};d})?Z
zBZ=GTLfG@R$@;6iccO2u_hoOn5K(skEns9XZ&IglGsOw~Rd`j~_6&RMx%t^cX#wMm
zS-<(h4~5+N)*mZKhUgJ8Wf<=WctKu4+H7p7p*@poBwdh(%2i)={U1eOOg&lMi(x1_
zDyOkVp#MIK6+`jl^HbLid3jj}uTVU$TSN;@R-Ik!zb$3b1ko*f!e+V*p+;<A`EMVY
zo0wy36Www2sT3{s+wYPLv3K}si$r+gM=(Yj)O|&ZpPILa&6P(-o-Oo7;q92_+&G`;
zr7vHr>j%f(+#6e=P$8H_zVZf4`q2ViExyP1&+aKUwpG$UB(T&hNz#WrT5n=nxSO?_
zMiQV2ehkq3$E;;;G*3d0VS<gRFjS{?S<NTP)ZvH@vV&I9=Z{rz8oWP40F2jdpy+!n
zUngK*l!%xAz20X_$$gD*_t(DI<tL%`5+J}i<QD|m65gIo49f*UHuSCfMEXGCHH}ZF
z4h=W@@7dBP+ng7Mvn1)ZKbUyb(Ult9)5_ESf@}O4G47$Qemd%%p`N^4pH++nDV*ZI
z1yY&mrIcsDk%AB7GDbOOKUGwM=QXW#MTZE(z!b@4_a2e?;OXCu=F>`;k#&CqAl<yM
z+`-%&OF~lym%XUfE;^(5$t%yAAX$mFP4*ozHN*33>=x`2@U(w5Cl383|FR65@L*4o
zX4JYE#+4BYGr*_39i>$DJ8P<$rwgdsD56px+#}vcFAsTV5Z(;Hgf82hGWSzgrmp>E
zS^v0PSH{<Jfk0?}IasSR(isr+s`C7kM)?%<Wyy<u6`?AzAphd5-|(F3UF<S>3|$SP
zF1q(@Vfzj9g*T{7QQ{}Q6;U4e;EN%v7Y5V;FUrp`SUIB2J3Ftxo1BCH8Y#FQ`S5Jw
z6nMGs#xvjWYU9#zfiIduFE9T)_6Cnje$rYWGvRI{8C(VBj+{F^AV%RDWH+r}ZB|Cw
zw(ny_K22%%7defd{<a(nwv)8@ptoyUMHu`;@d5^YdRkStZj#)1{*Jhu8$Y|V;xbpA
zUvf&Eq;Jbv$956DL|}0&+}gy@@_XfAOnet3=7W)((A);ou^|%a=xp9tH&^CZD~iZr
z&hSG9Q7aj*l9c1SpVZDf5E^FAvL?N?L5j~*VQ12MTmJElguhl(6S3{Eg2cERvM%?|
zGk{>Dh)?=<GEkhh^(ws5^3G<N7&O()4(t;TU__tzaO=9Bw$$G3MD&E*pJ1!)CEv>I
zH@Z}GU+C_|*_;0sh%@%Yv0;uoVb*O_B`)Mx1Ue5QacHM!T~o|oMx}zy8XSV6?jJW;
zZ4-}huD~xWwYAI$yLp;$%9}sAfj3oKbfiAI3&gkN4E&K=Ruj!%&J@Mhu3|7ji{B%x
zwbY;(EYnO{XZfm@JVgSZ%xM=QpT|1xbv?fU81B3!A@-U_FcYu$i@DMNT#yC(rB`Nd
z?zkMjFdt@sQIgU^ul2~*64p5SM0P70f((SQ!Qn?#rE0&CE+<ECB-B78AK<$8$pgRw
zcTNaPFTJLJD|<rChjs>s!i%pIqG84eXp*g6gox+NE_>u^fg_VYx`SQA`9*luWM<T{
zl>RA4R257L-x8YBlYBP*ID3jOdNCvtnT@Uk`7a-jEl0@7ygJ-6?9236L3P*F9@>s&
z1=4f;9n`EEF0;gG7hk<OTAit7^)EGF@6wA6KKKuAHJgy=oxIwG+>jjr=v7_;$jD_X
zgGuwP2?p93HN`V%3<q#9Eb)&2g?7i7YY>aqoV*Fw5~2U-o^$)?muVuoH;T!{lkGGX
zcF3^3VBC=53lIowxQHK>0uvp`VRo>+{r8s@;ZiqD_$tEgy53bWSLO4O>sS@De^uTr
zH8N%k4nEGDsynp`aU;-*+Lyrr4o;b2UUU@)8Pp-QTcf+%u2b-Egg<)e^N{m*8#PLK
z_Qm?HnUycB-Gst4Z^=WDSYSFZk~&3)uB!jES}7zgM!8@kDB}d#U6YfQT@mZf8+wIa
zT`wQK)KK2^mH#CiHKhN*lS9FJpR$DVfMM#|nJvz^?!_eK8iZg@HAsxOf(!}?j&cs^
z9a4|;k>CGik-FqB-?>+te<6h3omE7MB2hfFTuX+~OH%A8lsXrn$*O5C;q(X_NRFzp
zlK@NH^XpoY@34_fJelIXcHRGseBs%ylg%3(;@FZ;VUFey8KIsi4XM34-1cQdUaic$
zTH_EC3f=h5$!tvUd-8hQ*CI)dfVVUHZ0MEB@lmev75l)Y3R^`p{8DcCPK*YSLhDk6
z4q-peL5{U#)2gtCpcsv<El}Y0kS4RwF@EA4s_xM8b<|9+S2>F?^dRTS?>kmfo5%gO
zmgsF}?}VYrGMc`-{R+LtfdOk3VI-5(4CMzY{e5F4If1|)*rCIyn1K3`Lv!P`s^?Y_
z72JR`hc#)?8EDJh>a%dhxvJWY_s)%0M7vQ?Z31n~NC&q&moWPlLSxuP;Oi=&jKT<8
za8%UrN<jM3O$zdIk>mW#z*#=NIx(%$x0shZ<<=ohyP%ldU?6jNW10JVNIs(`Asj+G
z$;zx5bV3(L$(fQhO@nr$?vrxv+%+nFj^c;EbQKG=eu$fA<&zLKETwa6EJ<1>iS((U
zYLXZsTU$8b-Vh)XSE_3?1qBQ#<9D#L?t}xl1118odZhCqnr|V;s6DLg+=vp3HK$5T
zjOP!r7cvn4bsvjf4-W#IFZEogc$52cV_=<G`rA{(nA>Z;)MBuPhN|9Jw?R&WXqpif
z-&jOy92%5jfb}4Yp~&iCb5SbKuf<pV!I*5mlpsyl*Wdq?$Wc{)mMH_KzrL37*RZWe
zXh75LKSxl9Pj5kXyaP^mkuccu3i=a5qQ8s5FM7CeVAUiuWh(sXtTdSW)|)Tiyrvq5
z-~MHZ=H>E3fF#2Y$l9-vd!wyT_6&EC;1hZcF>sN7;Lfx&N)m4Sy~8rVVgfeMp}~kQ
z2Qkoa)o3{=LQ><{!gzJ3@;TEYZQ{21Fcan_YOq@Umql|G+pq<Kq~Jy<k+)Qj%09H%
zfke29`;IHqym^t`>@I|&0+7`#;UGWfa*%(p`Cxqyoy3qFvj4u&Ai-E)$jLxvJAVlD
zdW>WpI;V2;)0-a`Agj2f-3ux`Axg3T39A14ZmtHN1Kl$giM9s&fOR}g)}DU7l1^jP
zt<KE~WC0y1%TWytD6_Q<lNYh5h>{sU_0Hs)BhDk~TgrtYw?0d6linAXZ~l8^qTT9J
zbTSt!0{3SfS<4Q0j!QQfOEo+k$k`D{VkSLB+%g-+U8`qZ=4(-7UL7Nb>Vbo+W6k$7
zZ~?0+_esY-rQSnI@y2(zE?+HKKzH_L_W#bprE_~UWccA!BgryE?)&(!kJ$-F4m%kP
zuhFPHA+D04QP3>&g@>oj)wuSpB0?m@)|^I5ilqp}TDvpP7aBFz3);e`E-Ni&wT3&7
z34u2B@n#K)n><DiQ<mrc0Zv=xk6vKVo_}kQnzQ`6I>;2H;OK_*nL_R$f_{ja(~7xy
zQ&&g7n6b2i|H<s4coDI*VOc4p0@PkgOzty;hz;#V(&i^)E-h#ar1*RYtjU?uEW)+7
z4OZ)gtK?R6-J*=WHK2kE?U3jd?gZAInW;(+@3Cbidkd1;d!U7bnwP{@4=yjPT?+{O
zvs8Quc_bMEqF5D+Q^Cj}if>%AOT51+HKVvF<;BqP&6x5wPu!IeyT(yZ!b8yE-T4#e
zH`zW|0DojpLt)Zh<kuz$O;6}y@#DzPxU%v@@l8@&xM(`!k;u?ZcxF!1r~G_DY1GFQ
zMT%vUn0a-e?<u6tJ2TgAs~$0^kzZUAYWJK7qfK|kFLn%6)4hM!1WW3L4Jt~7j(~rv
zT@^NyetJt_VIqFLk~=nh)z=&@P1oX6%vh@3JESRu#PSx0a2s^#90a1rqYk-)c;pf0
zW{FvUQ1S09qaK9v1S3D<2@OI#QA>4!!Rw&SE_KfMDWm^No*j>cLU&YMerQMK!<si_
zexr6Lbyxoz`a*=^&OdV!K!tX&Mm&p<+U&D{mrl;h5b|Cz*=AlHB<qnl{wFy=NpoBV
z0jCN~M{@Eg@%zLppbR-GU&LY<5B8t^WvQ9~fl7pK(WQfeGEb=D?bsTB`-6{2WdOLn
zIlU*f)j4gxEr~Bny{LfpeH{hI+XZ^sm{n$_nWF$^MTKHrAy;JI+ZT=!dx){picI#4
zO`mtkcFqqgsG;_IiOKbi%&>kLlck^$QA35a{X*-9eq|P%W@zwaRj5%W+Q4;s)ld^P
zpWs^XBBeE-hbVh&RPpd4_IC|RgCSsF^p~Y`$m?d879pQ^9^%>_vbV>wnwft`8cV;1
zQ}i+w%$_Q9-v|4X+Nb=k-1hy;QsCcdD4%B}7v!wats3q#VV3r6%QM%iZOR!ovoF~I
z6E&YJeBU}5Hi5vSOyx{07mcz5PC`^L@hHwCvLvMc=8n^~(*bR{4c8>2sW^h{CwJeo
z&2f<`p`N6<j}xd`%#<w$?A+D&X#=`(?Ei8L;jUt{X0}XkRE^gPT1_@y^HXw0?8Ygl
zAz~9zo))Zcl8ViZspWslv^_l*`$x2wMb_&SdW$OIw#oTC95P~AFTv{d4V`*rQ`MZs
zsz~95$-5bl9-i{?PEr%cx@YY1&61MzNrzjlx|RQ`=)3<@lwQBAMEcRYXQ9>=1{H?s
zxfno=qiwT24q$#CL*WD5PTaNx5ny53+f$(LzC^olS|YnSFi^*>yUHFZ)G94%@uqdc
zOJ-UxC$Ze9Vre*}3;>}vSZ3rx@JoTDd4d1_mz?xJ<HP@tGN><YX3E>D2$D9J5>EZ~
zb#SoW-Tl?uqM&mDYwfx~%VlC7&+sH5^I5$m!d%i9?wG9bz9tPE_?P8Zy(QD%UaZy)
zpA-={x#gGPhW)=ukH&!@YS{Sx8WW1w`d{dnaYOwOGg_;7@5pfUF@yEAAb>ZllD1&&
zm!eIZ;{F$=m~5G*^1?YF*77uby5O>iNzlWl+sqPoN0fanYlGg<d=rO{IouI>{Vz+{
z1&p5Ic<Sk=b$sWwu{ik}kZUEBugfenr!wiesO0-^lNPKw&a>fGAgdCpS~oMF%B^I6
z*&$zPy>s{T#)ltvN2X^Y!|Ms;$QxDYg8w3IU^H7L<7=FojBUDLJI`DM*zcoto7vN}
zmNzD)=`KdN);EndsCP)ydsAey2py_<=deZ4!ls6*3$y__geY>biwKvH^XX5A#~4hf
zT&NFeYm^m=Kb9zr?cUF~q{%5wTnPfTp3SE4?NpA7m<Jf<h4=4xrVp2iTc*dD!R4;W
zAL=^CxR%>emee6s>`Cm(O$S>i+X4Q*VrtE=swaLBzpGu>es?FL)AUv<DlUoX$Zb1F
z)-X>EOnPQj2)J2hf>M(pmR*+W@P#vXB8Wg@^<uz&56Anq2|uOv&|a-qvT|1ef%2-a
zbR`BcA222+O5!8M=JI7G#cE>QkNpxe(v7_iAR`}C|6fnZ8NT1`e!RMhU}@-sUlSZU
z`C<Hz_}=~c@uOmCu-FmKRL*#6=1}6s61vBEJ6k~1_F$Gtv7tqhVL*l$XK1%<U$j%e
z;K54h#dgy-;TMzti{Al6um+e|tLuLZWPCz_p8wGXY+msOx_)=60g+vvx;arZgV>z%
z=Eu3OWVO9)SYr$Gx19nBH*pcPD|L}|hT!Z#JYL6U!jwG#dIxE7jVkSEos<D^&T!=`
zhD~Q!WtvzHe-HgraspF$mslvaR@UAv)^=E%G9`Cz`Eqe+IqfQ|;biUN;m$Y1Ca#TV
z+@k_cV#cKJh@gt9+%Nstd;&PB$E%h|XYX>=jI!Z$Z+IqEeMv5q<~YLu6SJD~6S8%y
z4?WL(aT_;=4G5+JknukTQ<UW-F_QiR3Zgq1aKM+|^K`r;++7buF-+1A-J@J@AS6D#
z7$5pw^sU9w0)an+weHTrQQb}XYY*I5;1;{~{foA1V4qP`P%z$*X0SB48CGK5ooQ&o
zAz^abT_Lx!RJUtIS_D|{mG+x~YGFwN=^zEeGZTS#-+iVXjE}vURx=Ol7tBLTFzw1-
zC{JkFUKO=j%6oPrG~WDO=IRKu%_G{rtOR|G!sa6$LBj*N``KRkb&BN|xrV59L@$wp
zWWoaLJm`6uYei@j;ZzO_H3(AzB=;YQHt|P*s)<IJ;|u-f<k$1uT*5o^7d6lRx!$Vj
z`X;l#d#!|0_Yicvy0`CxZDvp@jnJ=h>a|(ba{vZ3Yow398I_m!1aOncH?e(;<CA@y
z&UhU&n?-{zfl;X16hy8!xi0G`YMq{=d3$RunyTfFBRMRc5~~%b?B~xC5rPG0&++AF
ztBH9XEsq)pFPD(#z;>bx_bA8sk!E~!KzvM!v9^pA7Eg@93Xnry$bFl?>-k-v`Qzz*
zgMMFwP63Y)gqsi<;=?S5r%|*Z(OI-1CNkT=aW90l$c!uNt&Du4lOnH<E8g$q{ZI!*
zq}GY0g|jsVbegjUTZCX4<y5(LLeSC9SawOj8j4YSn(qi$#yC|MD&9Al&L2I!H+F)7
z5(X?1t=;a7_>|z3(G}R^9Xx?BhxYG6u0iomvqQVf?lF(IL2y@$Up^bLVJTE+f8(y4
zr7KvmKo75@K?R45by4BO^pT~d-Eg2&FhQ{veTWn-vezBxW@5_utCQcAgu80t8Am;X
z?6gJfq0<I=z#VM3oCSlRaZW@`dL0giBSehXd`jj_P23DSOjhHYK6fJD&-luTBX3lP
zY&V@mg>UL}H>DWukez;v%@PZT>kCz{D43f3=_K==YP!~f(N{4$^D#_;7XKs%dWt`F
zA|F3gbm^z*@iuA2bS@sA%q_jcZ4(bq3#Pp``=kz+q4bEDq2<Qn^1FYB|ED<k&ugAv
zsNrZK%+86vz%kJnv%Gcmm&NfdNQ=C)d=mYCv38z8O}9~>#>cKG7EtLbAiYU1f{hZ8
zAT>ZJB2q#Oy;l`1bOi#^RR{q>5-A~&5D*X$A|NG15<>3~LhtZw-Y+}*zPmd+yR-S0
zOxjH5f9Jl>`JL+$jWXJLt|c%wtNZGx@i#>(rR*Sj-(dG}YG0+dk;sk_oVx{7N=x}6
zTr%5_(beu_r(QOw|8l^R8qsLhxyug!a)|A+p9UUT`^a&M<HuI1*ItG!$Nw6N@n7>U
z$L@1`E7SQ`PEMCw!1poaxT}Xl+^9tNSG@Y6vpXTHHk?8#;H&SER_J0%nE_(b%&&``
zR?>v>20E|SmWz0EIU=-JHyQ(%HWTU{$%VT%t<jv|l$|X?T|)1R+krw&9{{>6WJJJh
zjH9e0?-$0(PWDDU(vw^X*iQw>s`&Mr1gCrzSg1OZJ*OfZg4Aab&1O;<f}0>pTQ2P)
zQg|<+*3Ik>ph4|klWyqguA}}JzHLhODk0Irz~zkFZ`u2~=<ExjH7x!{v0{9~dlT*c
zlWO~&Jdh5wJzp-UxGyX;)Yi)^^9l87bbF;y*g>lP-(T&KU&fA}<!9Zgbx)sJ>0cN=
z@+k#E+5OjqMY=;?jpo-RvRL#eP<S#XiG7ynp>{H?8Yq%7n@#_6DC`=$OGP~`(NtCF
zi=2FOO5yaecRs_ZKin#!mwD1)LnSC#IhMN^I&(e1sEg*HfQA3d@#lpgIuN48m?y>u
zfi`h8Ydy&&l@!voJnf{l85Pj|2Pa-!KR$C}?ab^bPwd)3kFCr}&y--Eb8$KaSM6BE
zIZ#+{zwz}<(MfGrbv-o*{wxkNn8X|fx<_8wH^|%B9(Rc_=$-D}?72`~W^oN~exq-q
z+c___RUt#;Slkw2>F+{|kJPA2xPWf*#8M2u>GjNKtC#GgHavlf$PY0=>Fz!1MY{p!
z)9}o8UU5iBy<VD6%9SzK*xwZad4@<CojY(1!e0oB*J4|as#96KpvB%kJhaY@R?)c=
z)zhH9BG@&UiGl0|<O>b_ZMv}hRn+^d=g01d#wyR+#P(vRU%=KzTE=zYgjPv8=CWJC
zURCURqW5KIXFE%X85XFDGN3=0C;>?0*^xwen55G-q^><PI~RGngKG*XH{PkOl#(ZA
zS2SPeKVG4LRClhpJX7t;41HDtN*>-y6r$WZ7q239<Qrkjj`BOmg)^Z|a;uS_5~)X*
zLyA%?`T^GdcQR)_ZK1r*8lG2|{&`p77HDa_C2+Bsr*zU|h|%hdMW;4W767gKVa4+r
ze#E)2WtmwqlXjNwG8f(7mSdMAU9P%^bnK&e&y=p~(ME0FnAoi+?4)FG(818qSiVNu
z@9*U4s)og&czdR6{w0*C93tx35IOKXCGw@v=~H!($udw`|I}7$W>xX1=O?qoucFj&
zoeVrF6t1~>6`fgj$s$F!B84Syf?9W8(CAc(p|p|v<l0g5?k2AvK*5$zW|?Q=`_{W>
zTr!?ng$7Q(&Rr8;T-mX2JXbgW=j*A|!^Nt3VyXEW6%+_1v0T;<9PDAXe%&><O`V2`
zO^+NvGT@{MTBWb)$Vi7$bi@@AmG7bCSEi|FQ=|vpiQdvx;HT#MHXTD1+bmq}OGnz-
zA^3a=-Uj96nRyguU~t9EzEp09i4?>?!gR!Q-&Skka2ZrMPYyR?<jr6qX|D2rd+oia
zy2~3<_??4~!O5p~EspK()*%vFfy3Y5@<<Pnx6>Q~U{XFzmOt|aA#=$GvkbJFDMyn#
zgxm|wA}}Vh@|}fVP$F7{K2Mk~#}rzD+$?_9-F?{gXZgb$7ml^1oCLv=FA}Z(#%ImH
znWHrZO9@*^4UVV`sXq-gu>eG0hrp(<KtgJw3s6f<IQZLkXmvUkofC$LLS^>79MpIY
zkafL?oMxPl3(DZVS$ym+k5-ZB1c&OYvnn_lZql+xiGhAPI#RxD2@BOjjW)c`lS+ft
zsWBv9PT*6+7!Kj3L%XCTn|Rj!j`MI~IUQ?`3-8GnPhI78hm)tKI3s76$hU9BLi~-q
zSlJMj$PIu*>!=>omemElM4yJN`yrvs$h=WwEiqU2Y?-}Y;<FoTN`F|2Lg!tcF~pkM
zSACBCqTP1^8(YDGj0?lUioW4S$*uu%9tRFO{9YTq(nAu=S=ioiL+RB^$;1?Lh46`E
z#2px{dGhsIoIpXQ-~&c}S(ldBs!DOnexHdq35{BR$;0XmNMqA3J&3A(FcvP8Fzwxu
z7;D`*$@+fQBT@h16f0D<R98am!K_4x@}1>T<00ufQt+Ayf>e!nIEEubje5Y2gEhoa
z>untC8<A2u5VY{?^{M-8W^T%m;JM|g3uR<@w{Oh+ys-n=7>r+*M)rp~Fc^-jG^R&v
zD#Y(4qa3lmOWIl(eQucqyu0hF;l5rZ#eHn;wMxTS5$CVY=(ko6ljv-Rs@6uY10J<)
zpOL3bWZ<Fap~cHot3ae42J5hC-490t?i);TDa?9EWo(PnXOh0AeM(GPZ1?>{2n~HP
z**i39HF{vYl0i_-Y)#C8egmZv!yITp_8J-J9&Wn{s5au^b|KS_afGJc)jxA%+N&`2
z7MbH=2gUb@a&fuykZw(pi&G~yY#F;F(5!2q;Aoyy5W9KL2sAD33N-xQC~^Aom!=~D
zagNQ4c-7f^-8)g+9n}Y9wFR8FUR6V$`BX-Zt^XeBJsB#Cqnb7^FP2e_v3j?f3N4*f
z&L@9qc=+O=QHrQ}N%MPrNWqB{^t@`c>fs;;jz;V=NGc38)c{p{*y>eD%7#<6`+D^A
zFiXqKnXxC89|2Zw{{3%wD-x4~;J?P0Q26<_^NKXT;?1y#y)b-lLqg})mDFHk$D@>G
z9M%mhy=n9-98g_Qi|Sr}R9160*xBW==-OG;zGFDQ)+Dy5kJz7Ch&K*g3j;t|9x*ru
zvp*8HHb_Gg)LuwL{=|s-!(}x1|K%XoOS#9qK^#!X&+H?<45;9;!>hM&j2s8dAwxEB
zi-*n!XBCdRN1C&1=3szrsgpaY<FR=yXX_>Bh9lruW5AjBpSugEZ#L36c71wX{EfE4
zP^!#GwZwe1hST8IG;J{WX~w6SV3;j{4hF-w9oX*JDx+<Jwb+Amm+F82R&#zw`60-w
zKhcT&*u8e_Q{1<k%s5GpOhN|fkmqo)nuz<hr`|blY`CV34Pph;dy&5p;dV?q4!ztd
zmgYTTlg1J_nt!<bL%rLS<8GZxy@2TKEndH&g}VHNE@D8V{vbhrA6FEUy)OrL0wY}`
z!-IMmM9A->-_HF~3Vo_Z>&<?;1nw(-cYhe9UYv=0Q&m-ywC;x%D!97l<zT+KhTAp7
z6%guZ8TcLV;+l1@y%BQ2_J{tGr!PKAELh7OJDTBmS#z<MKeQ#7VIaIvVn%(j7fzi4
zHDDGq4~%GmiLfq_Y@_*_OrX^?JrL71I$aSHK4GHyTVW&_bo|m1e0@Ht<BZ0So0T>G
zYp7i6V~K8{TH2r;Lq#~vajVBf)B6Up>q=hrQ?p1WI&%cGy%V?=WH#k>U>oUUI`rJ|
zvZ>PW0juZq3l~pG&PPd2Dcb(E*vB4Ca$Sd?iMHslx^Oh45Qhui*Qh3Ru^P718nSJ%
zI@z0E?TWtP%0Ef1KdDz<6y!fSx!rcItq|M!d0B}<_z&dTjiphCuz=AnMguJ+gNeqI
zG6>#L+l%Fay<ToTVHKt8XA&>xeJ~v6V7{@_KjEUQF7SG|ZyHDj(O4i%e&!OI?lA4x
z)l(Vnuu;RvChbKmr@EeZrv%m=b$lDTM?M_<cu@0a_C#n}-uGt9q`z)g)o$Kz_i&Z8
zK$g=<`-`-#2of_=mJV(Y1Q=w}I>d^q8ANv5oLR)TM4{bdQy#1?rD~;I?xrE=&mZ@V
zT|tzM#l&3}xeoGF*Kt9|p3Yde@wewuL^CXm6z#$R>NVKF!t~s$cF5I^fcQETApE;I
zIIJ4iD<9SaVi`$8=Y~z6RrNoAqg;i5ecSM#lFG_AypN6Q4<_y^FDe)~;mARgrN_@f
z-&s()I+-Z{PN3QbVJnTc6$_Ee99-kRYX6<#v3naStL8@?YWtUCG(86zSYPcDnD}e%
zM`xC;P@m0gz|g^UP0Fup8LwWPif_Al|ChDPYxBW`sv&^UIjZA^hn`W<-q3)rX&E?4
zOO?e?%}&FIQsG}OfLctgu&pl;wiqk!rGPlM=gF`UkBo>FYm<kM(<}$@(fvhFfBL?b
zD(R%&A*zupQ_X#9@Rn3DK6cF#nQV&A%@)r?>1#?7b!>;!ijxb?Q5Kb35a~O0aH=d|
z`5XS-{TbzWf%;EyLs%P7jnw%0l7Rc&g<wD8rs~v3xLHCvJZHNG*5H;g&mJbEdp?b4
z_(Og_%C*j*ICScAI>xi$Vv!gU&HzKu4Ol+}O<YKqy$$i)L;pM<n|)TB$p7q=Aq+66
z?rb>VmvQMQ$BipQs;jmDnuRzi9>o6tnx^1!3<GgWtK(EB2r@#mRcj!G6CA+MW(9EL
z$}SKw!LYD}%k+g}?LwnW<R1gRREC}x$Xc!toB1-83ii4`yNyH#>BH5fhwfyys$zS^
zh>2H>sFk#CW~{TcW0*L4Djg9SXt@i|%U@4N9`UQlRki%farVFf^EJmQTbFCzlb6=a
z0Cf-LKN&~Z+<d8^<9hfrHBk19mWdzew@0t9CDs~-xbEILLUxrjsVT4Hw!jVaS!=pH
z=w?PS4L}+P&<0j2zh`ctG|JXn*ng!bCP4S%L*a-0-#DLs9bC>z3(`O1pl_Je+QvS`
zm=)3PC+c6b)(Bf&RnMkOlO0WP?n{yg<qQJB^Us^AowDvQh$!Y0m2IwUt(8wWdVXiv
znl4#e2V|5QIHtlRjR%#n|18+$>Ut7vCcgu;Tl`Uv2koBc6UX}SaV!BpvQDAT-5(0c
zPtvi%m$P~c#ei4*P-3peT=Gy5Ebm3#Euj^}h3dKm=gKn5c#nEXueDdP$aa8L-UQ{^
z?i8V&4rDI63@EebH6W_zy*2f=hR!%uEoA@n7OV+7R_O6vKW$i_YdGYlA@G6P6Yltx
zvPmc<@aD`s?O^Anw0Y*??+Juw0AfTbw;1j1&R_7rSgvJ!KnkMFB*(Qq^vuWipN@Hx
zWH;}Th6G;9JU9%BV7v@-Xl-piWs4xjX>VS?q!$`_BHVwLHh&qv<&|(zDWMAp*}5e<
z?9&=S_*@n+Dc*IXFKr8Ekz4vSx8Q(?PRNuKxvusvhmH^X%JjQa*tOW(WL;{m80#)o
zrb978t5aK>r9G<^6dSG|H<8x$PDtOiAeMbAO8C>tCa#6`eZ707(o@(=c7|smHRPS4
zJG4WAm#?LHOHKbSr;F9o%Zu6mD66u|iK%Z0UrOk3U7SFJQanQNs#TTdhJ}ggLRG{c
zt@%GK&iy|JN_ma!j4v&&OL})*&obh<SDp=&zmWd4d|Y(C{)PP6_U!uA2n$DpaqNZp
z;D$ScuFqEIG*#KFez)m$|M{~ry={*KyUId{7WGmszZD<k;f6SClqI!Wlv;a#IDf&l
zPAHz^`V>w6{CxN((>u_)lE8g6!N;P6(#`7zR}7qFcLMl^-re!eR)3<y(=7)&QvPn1
zj+L$@FNU_=CYo;#L;Q_1DcVZ=Iw7O2m3I)UYEEe_HCH&I$N5(%V|CO!v6I*KGUJ5g
z?=;I6h-TOc63%QbB{PFiY8^9CH(fPey1x65P5|5`++}rxp)D$gZG7}k!M#u?8%k@}
zy0?TD&B;G)$2r$6P$Y4pyxB(6+$?%%hi`<=uMY7HNsRbX>OA=(y<}`2@L<1s0VP>r
z8S>~y!<7}GT;;~IbJlz+*i$ahGy5jP1+Disr*f9D;(lrX*`I4wI^e{aM!7Ek@_wzK
z+CWGWHlcLO3UIWXI@nuiq>3;)tEI1y__BIA<*#4Fdx>8@ix<+z6n3Pdy)3_w&=5!{
zwVY*LJ#{#5R5Uf56Q<f-Q(r~>&xX(c{XH&$otzc_mc~ovUfVQUQ>p3sb`iii6~tcU
zr|FT_>NoAWvdt5E+W5~Lb_ear+A8Fb;=CtrR}_A@OUbs?UwJF@WDvC|5Z9^Vg%)^x
z{cdAJakZ}ASixogsLQR+yVBIsm_qBTvCigZKb`I1yDJn8cq)ZpUN2T1szYt!6T+^7
zwjNTE75xG1Lelk!+fZZyXyxQj;-);OXkTU><=}kCL8w4=<y>VIWZTmpy2V@BwKK}K
z>V0L$JBuvZ;{ifvJPd-L>oIIIwc9bm7;z)+<fb_-)#iaM%OsWd8kNX9T=N&;7OgJ6
zAwZ$kvrm%*hzXL9GEe4pr*liV4qLoS8m$RR(hSX0FLyXAzm-zn?;ERYcqAOb*4ngk
zasJ)GH-*(})}%z#4u5G4cJ+G|x)+6Xy(6c5s85NDpXr{yVv}b&V$Roytc;kI>+xZ0
zLELVu%vstUW>+Hz;bA6qeJG*t*`{R#k0Zs2Wv&UK)7=%er>{(J`W>{NW09pTTP8g4
zo;LLt?_5VDHi3-u-`z<>0)0~{$^+qHn_OA{^|za>2aiP_Z_|Vg=9ELWs2Nm4YiRg~
zmL}<F&RCZKi#C1dy4dnR_}@hz)gq33TmH(>;Cd4JJZ?CMol>Mg>Y$R7TakeKb(Gg|
z<(^DT+`5-w+xTi-kX!>_jZnluS#M5~5#+!OxRoa<D`i&N1uP2i1sk*H+xQRUsYe1P
zTc6v$qIZlKLth#bc@G|qhtNeM5rr2H(S>a~)^%rG<NlEyzNpsSnBk_OuOv~bJ|Wo|
z@*-0f>Tg+;gKy}=c|u)nJ=1W8s&y&zBR3{<k%^@Q9-*t!Jqu0x#n+q&ok~N9vTq29
z@<zp+ZOZu)pcnA1;fTV8^R~M;K^eTy>ssta?vKu?efR<Qa0-&e1g-s1o%a94pXeGo
z!1K5O3{hdL$E20F?K%eucJS<Je4s0k2QpAZ!@fuHPxw?iE+*+M6E?ic%i5&aKs1ON
zB<$_mzF#Ox=2u43qkd>g{h1S!Uj<im>`K}!H#`>|StgilGg1wRqIJ(}QvPkL^YmG@
zl~Ef6R<(PTQj8&hvBW_CqF0G4e(E<^ZCb1C7N3%TFy|er5$xSI;lQ-RiBXfZfOai>
z+H2fej;k5$8yqdlM>9xr+3P0NGth^cE=VWfp@zuKz#DU6vE9;y93ut+=0SI|&IAtb
zd^?iYDgKvZmWtSLh~{pfHhb!Ia0D-3RAS|ZZ~SOt(?q)%rii|V5$_JL-e_NUZ=-T;
z8M~~F+nlGb4h@YAX5>1?Br6s;W^D_NB??TvKN9MUygGNZzlXJ^<Q=fOgwAC;jX2Pj
z>X|h)L+G5O0pmGj@}c_TZe$<uqEdcz{nU8)&HR0bA{NyZF@O#SgITUU<t`m+UKx$B
z!GAf#p`(!_xi#{Sivq;y)Rnn2rxOoGVzCU&ul0b9;C?3^g2UE&!-v^4C%MtWUJ`De
z5v=TT{GEUc<^lV~$Ok>OZTuLiEk?lZ;TB`jjp*TP8MHn$Iz8=1!Bx#;u;x?HzgtUw
zhDT1B<2WB^h^)%;0h4W^;9B(0HpT7BVT0E;haxuNzo>%A_q3S8Dad{uq^|sv+e;aZ
zfy`0ttoQ=fCEuJi)JTjBJV<2<_PVJIKe`mQef-93ow{=y7ymnh-+wXb<><p`C;9qj
z^9HQ<Vn>uyWWp~6o%$m|Vgb7iARSPidW79M3Jd*gY^%En({9A<rxXMJrTpje_<y}t
z|CiGw|I>pfZVImo1Rh07sw1aoFb!c~rvk_9_nYjm^YOFp$i2Pw!_%94s}^K2sLA|6
zhtb|<)Z*I09)o%F;xZkuAtH>H9}O-(GEbQdjdF?}Qa)nW(47Xqd8HkNrtGurq2<w^
z5nXnpEpORPdE|6-lhZFw`AZ&r(QzB(JQ2HoruR!Pmyi+3A8E%1zPfgf2`HX^2Qs&!
zLZlcs1lm=}z6!0++<|XBy%vm$XsN)1%6Owy*d59;KEcFM+cv{Cb7luHwz?5X;)`Rb
z@JJX9T1j?yHG8t0(g=jiS?|`KSJ{~{XY;kddErffdK(60Rcj}qxg_Yjv7{PV>?lv@
ztfpS`uhsJK>fJlmstNmz=Y8`cXxhn`t0%KP7RRRwD2k;{*?u!YNHdaE$R-@6uWH3L
ztD`U$mL-FgQdzAt&Xl{(YYczx0|a(nop#(%C0!eK?L34JlX9cA8kC}bXXr@aystv~
z^_j{-u*L*sb#6NbX80|&dMECMRiI)=fNHgP^yig|A*V0qXwFXaLPy+_J32a(2(5>E
zL;6P(Xd$plxjSAY6;i-u!1MWqb`=7cuO*RVQ7<Ny->EieIa0~aWg}cQP3G0vthw1_
z;yCuof!2WDyQ&w9yw739!)_I%4+2FsYL8*NuTT7-J1D)=<4}SC^CB9)CHAM=H@-Kw
zUx@xaHvr#t0s?Gh!|nG@tu5dFDmCEgXEnWzJk!Qgs){DZeZi$N&9scDK&P{R=E62f
zKeR;G{=(ei+El;q|11&R=pKB~q@jMCXTwHn1KO%-)Q3Z!_UJsjnI5)~+Gl;VS*`O(
zq0?9WEYA80TrJw&T4%!@1u}2QUKpcA5;TG{s85G6Vp>2R%QdCms@h{s9Et|~$$xYr
zKxjV2nfknGia{ifGJhwhrk9TCT@^31CxQJS%)TQG%N@Oemn|&HXc3hg9PK{Fy9-o5
zamhjH)fxVR4Cxxe^FnSkqS`LJ`|vOfhj)B`HDQpsh6YCkS2}LP!1dJ(FAtijujlTY
z(AzJWqDqqIKN>P}6&O^F5#RMoK@_^)Z_E-cjJ8KV*xq(p_x#NaM|>mg1{rr#lWQP%
z`e;29+u>TeJPwaLguY(SJ*7e8p}(#saIZ~C)k<HR1AVGpzEM7Gg}sjIC4n4Zax8))
zpz~;ciN$YcwB|Lxh^F^0LU(CYNaqpD)df8ddLf_x-7xev2>N{J=vvO*M|gqQgjf1!
zWo^|vpze{%y=YZWayX-=KT=jc&{*;z2*Bnc(>m$`!oWpfmMGobfG@8Q`b)$+%ksq9
z)aj(w+b-H!^L06*HFtd;>1S)nL5FaW{7Z~)SXAW*!|@F+EqROX;0jB(&P($~=22CO
zoFw51?fkcfiQ|Sh^1k?PPoEZAR-Ke0+6>@4Xp+_a1PUU$PA#~wrUAqU8tk|iX1h(o
zkyynEeIVDr8CFkAZ+nNDj)o&;kiMcie+%cd{Fg#_)TmHBSs1yf8n;Z3e&4!wI)q_s
zOS<9>B6>frEAZ|dE}E;-`pz-^%VBih5Z&ZdkNEv6Eia+E@xFf0A0|(DaRy0slx4IP
zl)5n3oa!Z8GMZJ)S{~URv93l}zA-jt*1wQAZkW8VC`8ff(4OMGNd9o<{2J40B*+iI
zab;#JBe%ygBhzlVAJj`bC|mAwWE!8m*1v!a4vumxMTrOW&xcDhpdP=7Ajm_W_ah#v
zS2U)Uac~Mx3Zm-b>CReZXfUycur4<04$B}CJxPn-iqoA;tWv9B^acz$GVNcEE8oBV
zx$=;`^$@+L;q|2q`*&Jn>bkQyXNdZ4Mz-`^URotHeAAKLyLC_<%FSb<MizlbpoE|~
z26gJ*alewUFM1fmm*2#6+_0Gu4R!-iKb`{cBS@hI`b>QN{=Lw^8mfag^p=>U@#E_7
zExJBwX0xWIacy*|_LhdsSgKB7((y?~rdMyl_&Pe%cQ@hHF;KW3g3a@EJJ^nnqFVQl
z^coZrz>PR$M#A8lV<{41lg!gSp3!El_MClo4pi-U$C3M7<CoiR{g+dGVv>Nhxf((h
zX}@Lww;9j;y*v!jSF~enQ3KazKqebe_<hymG7xgP;hBqHN_gk{&vmwc8Ln9hz5Tq^
zXjYU?qBnGRd2Heu>>5L1%P18o^Z0n9-a&?3%@Se$08brp;<PlpxGJ=(&bb8Y1UAa8
zEwuZdGpYa@^2-5^mHJg@>W{V%cf%RV{l#5cxNEuD!N8u!M?9F0Vmt*Az$9ejB~MU&
zew;VGN2=pIJtE}nRrcWHY^Zso^mCJ81Enbud&~SzfM@w&HUuilqFNPaBmKPbw#r2V
zK@CJQA<PTgyU2RhGvS~!l;r}!u+!4V@5c1z_`kfF2A8|^y@w_8NgwL*Cq32AlaEyu
z6l`ZpYA`J{sb+%!N1@<g*Fg6`cT8qOO5sef-z-bp#aHfm&0CR<=2zO2-3qVdEGsVw
zoVbF~h?{`@A{d;5=LCaUFZKdSX@kp0CR*#pdg9(eUh*CC@_|mfcn~hNA6)0JrkQt0
zSEI>IpO*dH;{B}14}ET&`)+bX-`n>x<%{cHrLA^<lBopm@Oii0Y<f|!og=f=R!h#S
zgM{tEZF<ohN4DK4X-WsniKt;_sA<-%<Cu#giJbZIk!lxDf@JGSf|=P3Tf0QN1?qOP
z%{(H<p{~3JY%6OXOxl}^?;9<05>4e&w2I?}{S3Ydlc`;Vo5yHe5xDv=g;tYMW!*os
zOo$F*p>rZCmU4!OFY_=ttSIQnHYpcZ^Hv%}Q7OHUpX4e=Z!_u;y%q79J5b5tm-)yg
z&Goy6`ax|&{OM&6^AK8e9D{BL;R(kTn&z+*^2sZOFLIGL&{t=?)JXZ6fNf$8`{rIa
zH+n9MBCl-`Iir^}(@nE*ch8-UB_#e5BQ)LI4YIC$Aq*caTQlXCuI-CAAMD!io8P)k
zOL1_Qpre;9j2$i0t#kXfHNwI%0Q@KrO4m<u>E8&yaHmflE$htIOPyhmAsO8U4-;2A
zP)^GNWlaJ?9yb$nVe7NjfiHG*a;xjjlk`b18B*Q{Kv*#7iKjrrR{VDTFybPNxNcP%
zh?lQw*y@#pC>ijTKd^--PrYLgkX3)oeO8?1)whuSUgL0jprUXE>}FVE)-Y&|8-|$!
zz-WlBs|obm8<<^(mJK#YXOYT;UOzD!?Q)$dE_bjFU=^1mRpwqOR`H5FDyZCr2$s8u
zPW=TFzz=#Jl`x1O6<plZ(LAboN7s%hEDf!IQWeSWeRiyPkQFp}5wTq7&C?rR0m{G^
zqqzw*|4g#sXF@R%>cZ{6aQzSM=a(}PLn6(u3o@tyQFf_kc4Y!~jSeZbj_0Ak__T;m
zeoowZh#+R6ZzFs56F)168Al}M?$Q?EGysHdQd*4N5|Llk-_FeUEVvx>Uen?G-#P!%
z1;2afdKB@^$IR5d4fj{6z}wFMa>#&|v6c$>oxJjmc);SeL_3~%e$yg&Iu#TqIxAi{
z4eLTHXM()hmEoC*!kKl?y`DIXHGcq!I&H0E?+CW@*xLV~k=(6n^;a`ZyJDlmkvaGk
znC|o!*wwM9Lu(AgQ}x*Jwh^prXJg2&2%>P+1XHSo<1R4N-&>3fC2>98d4%%3+!8*)
z04UQrI(vt}>2Gdw>Db$vC;Iu;oQG!dCmIetueb6)2iVuvcJTbvQ0nBw0_!1|JrX6@
z$A$Hhy79e*XCXc201f3WQjWJJsZ|`?+#k^}InFvWM>}EG^bG<zZt_ZA{#F_@_;E6)
zs^YA9eT#swCfMf2<<D?n=Nfdvb096SOxnQqVNQJPmEQHiv@Hc|{11d&pY?X|TXp7{
z5p!A~H&*s#oy4Q<)L_DPoeNYP+98B5-Z^(P|Ga@Ra!<8abp3WMCD!fnFYUi`-)pOC
zZhkaFK(lsF7M^%s&&M8tW)CDS*u>XVz2mJ$BS;u_HVJ?l0X)>f8{iEju<Q`thpmt>
zNsL`>f*OUEENs{`3N7s%tanH;?vKzYhQPRM22I6=9xE&mro8=2I_3|viXK;V_og1R
zcCCn6&VQPLy;!5uAZDagM#=@7e~rzBjI*)L&Nu2XdSXPgt@UtOkPPWS$Kes@xXQH!
za%M)uU8T~vG|vv_6*Hs47-Iu@RMWcCV|u35#R6^W-HzM8wx8aF)K9(HTJF6+?53!g
z@MvWK{6NU<t9&leDzSV=WoF=K|66r2i86oKqfgy$`Fe`DGppNX?lum!v^@>O*r0xF
zd5ob3auG|@Oo-@0XA#8uXKzB4zYqL7?+d|Pt0$Jbldhpg)-!)>m@hvvI;4t%Z}QZu
zS$wJMd7Aw@gITu7Yf&K55*Bgo$MGA3rx=TE3!tCIf15AVYH1q5Rp*N~W}4cfEt@?X
z<33M_n|W_a%6YVt%$moYnTuA|DB+-bl%*s4+}V+m>!`$ExO2B$Oe(y|CH-rU(373I
zP3a*vI<xEAb}Ow?2bhAGUzA1E&my{J@+-;3Hs8H~UnjaOEC`&4H`0nmtk6gqF^L)8
zhPxTy_mxhzSckIy;u<996S%gXWq;+@4Cj@N-GGP=eX}uS)^lGr66b|&<jX$XZa&{T
zUhR7tjk}Q+XOJ@YH11$3G#cdDx+<ohhI&4J96QwC+8_>Bk&o9AOEbdxD2*4Kchn}t
zw9_sa|6Kl5t@!pF8uATn_p|WBvgJSgQ%bs*)|O6)y&bP_P?^eM%OrkFN<1|=B=APK
z%7r?OY<(uArQ_jh%LT#dJD@B#o@?^)>l%GAW^60c5}_LLAr-W($eUnhIq8;J*<^vd
zF(hG?l(i`zdvJN&^f+YFc%}F)a;_$qH>!ifcDab9!lbk6>a%iW`i4zJrt*PNPQR6&
zOe%F(a9z!sl%7~%3PW}Ma>E`biR}}f8j&;3X*m>Nr+I}eK8|NxmH6SjVi}xSQJlU&
zE?=~Ypm=tX?8_{3h@_F;1=R0*&;E}%h5zvYtl`8)`aE7u&L89>Ro0zr9+~ISYpqgf
zb8(@ZogR<utsP5bs;oyCH`f{PJVV6(o|6RV8@;=8R%#~{;LUqa?9`E_1@Nk!2Q58v
z`2B|}9l<t(>zUN_$T>o_D80}@JRB&A0z_tis-D<1LA5y*j7aY|ta6RXC%CV&n!~HB
z>xj>2-S4FgkGNM6MUR!ho~j<c?xi2nu5GXI)Yo`t3E8Oq{FkGL;v---9t7pvvF&cK
zwOZ3ROI&cSP%gx2Rx@z1AT@Di=%1)fN#OO^Mzr5WO*UGGr#33{obQp4)x7gShC^0=
zu1o&P;BCYJ^x3N1g$W<?98!ds<4ctgB2XFS2YOE}IHjW*0-n5>sZa$7kiQO*`m~eN
zl$|5BKx4Nm0?53(r@kUO4`>V4zn=R9{l1;$D0;Z~ww5S6ktpgpUR>&O>kRqon_R5A
zgZzkj6szXTXp6bz&_0~oEjwj>Hcsc-Y=fXnY5n8UnOxn^M&=#rX~(av@FvN}SroKx
zYDe5Uh`7N1NJwWvb<wy+cx0sQ?yLV47Up<*GCapdZo<P$#P;%Qfn<ICi(D_0xt?D<
z<J{`}p~bA#A^_C{ym$nZMLFK3AYrOh_kb=BD(aypLQ=g$c_U0q3&jjq2}BPrw0Ab>
z?g;x91A;L-=OiMshGr?+Qaj1bhjAn85aio_l`DStA8Z-F*U)+VJ}}BZ$A|Rtd}-Vd
zaqE~&<+-^!uT5XO5*>mayEn4uryD#je|3J+<$Y%@24M;@Qe^9*3Rx2?ap4t+B+_(J
zQ>(2*W4Za8O1O>qadNeZyK|t6Pch}UnvX+U?*gBFXHX8zh|v(7WO`oxwAXPjky-4i
zb1HJFzS9wv=GN(=*nr8Pfkqv?iAH3%d&auxM5*!OS*sRGONV9CWGTs7<fK`6MQ&#O
z+$bopGw-XgMZfozXNaKXXT0}sqOjZFoIfsv+`Q;Z<z?9EU*ucHMus^!S&`U_7ql{5
zWL&WpVv(NKxJG6`+N^gx4Gef?>uCgI!}b#Vv%k`>Q}3P@#6JsvoqRB<D<MkNQ$e$^
zCh`rXMcMO2fL&JuIG@Lo<sR<IS|si}WstnRY!)B&07;t9j#Kl8dVDmmL^;(nR;X48
zf~+?MhTA5wC<X>1@yK1<#(d(gCuXDH>7AypfBRFdXr~9nnEufV+o*>0Fz<CEDs{Hf
zYkq`kzwM{JfCY^r3rv>t^8~AL*&tu#Z&NV?&w_Y&QylAbg9Z7uBbPv~I-}Q-z|rVj
z$GFkF28)caduZ4-vL0g|#_FAq4z~2rKW)|@!%B|zo%2!J0lnm5P-m%ypoB2TPT2-(
z(k{>_6$;E|Hby#m5j@e=)w$mKB(CFvhwT`BF75TsjomQnW2=m028``r>FmKZ2zA&@
zrWY(Wnv%i6!T46`b)$ugS-i(nF!#7_a&YwvWpxEK$zCNjB2oQ4PK14dKKe8smfeUU
z)z^4g|KPY7y3sED{`8u<U{!12gM0hYnPoMHc?Zc5B8kloXTxr$@avT<sGZvLCmf`+
zQ`@uaf~N(9hGqyiao+gF!=U|g7jf4`<YwXFJ!7lV4^L13%Z8HtkBi#>TN&`bQ)q70
z;@>Qvg<+hN!$Uu|%iJvqnR-OA1>a1W^3gwY3In%G_k4(!WIHMR%wxxB=`U&^#RZcX
z+Jd_^TxtWUk5jion9K1<c1u0YbzA^2RCc|K6L;1poPH!ddQ?}x>eOOwY0JRiuovvF
ztMVmtvp&LX9s8K+-}&<D{fv`JOK78yzZvS<j*lp}Ocol;^^j;4Mj8BqCwU|mi`)ui
z?AhcRIfO*h!aOh?AwgdCk{Hw02TKhVaOqq5toxzj$wk9xymBe+ZV*6sL|-Y_8V;ex
zC%kXfQQA=(^NtU)%P-(bYJ4BKb6*qxJ`nm$hI8$TOLn*o+tIKN_6I`=N*&HVI^Gf=
zy9<zix>nV1^dpmttF#G)hg}{GVZDg~hZF-cQ-BAnUzQZ+HE%bJoa!e_JeMN#WLRcc
zW%VA3>Atx@Ub2n_o0*4OA(;y6$=G0tBgcqmhgtRRD`d+Iu!-spm<5p?_Y?b6XjY(P
zG?1pc*ZSSjT7G!so8L-zRM@tJeuk#5U8nCkk~*<7(X}I?MU^s8D6OGSF9k#ONVZgC
zwY)o(M>kszt&3PZc4o6bCRhR4>0rzJM(zOW_?v;%d-6Z2s-{y;A{P=}KGkTuI-C)5
z_{mweTFW-<5-Y#l#c+6AT*&Iq*|JFqlOzt9fl>}5VyeFl;(Z%iDj`)l3gZib{b=@%
z-pt%mOadoSeYl{K<_j6=P0U9z)mFozGSQC$;(JXRZ}|-gynaOw5vlkwdtv?L4f6g5
zs@cap!92l{C4)sG*h%szszK{>1*;NEpIp~dIoqN*A_<ef&BAn`1q&$;{7)2}K4x>Q
z!#*uJX;dr3YMjm9R=WLN8tgz{b=+(K<AUkN>mF0REm9E#Z1n~%4_jp-N)N9d%pBZS
zvA#WZGP30($H}^*1(d6QZx&v$L+)qlW%I@2MyeML)2RCa2(qtf9CxQUnZOX3WjI^M
zk*j|hG)-%&s5|!HqmRouW7xUBTAlAdy}$fze8^#BnH4TZTpFkbR1=udal`LBsj_&}
zoZxFLSn5KJJ#7nA7uC0RMAvn%%N85<F&n-!`KEm7c;vSyW4rg&keub!(`3rrGGl3@
z%1(y?HtsR!0~pb?g7J5mZmCJxeVr_a4b2?WYo0aZ=K3}8GaU83=N*gQt&1k|=6#qw
zzpgkIrs|b0+=znF!;7~2)WXGknY1bCMNB;vzy@|bMhIE0?+|Ijm2<KY?sXKefI&Bn
z)4x(fjlJ$!dAY{1HKxHQ!Kr5-WT!k!2PYPE>C?Pe-1&G@q{&w7`aZSS3Fzf^XkjN!
z*JH`w6GT)%Cb}fN0O;b2$9aA$+ADbLD=?(>&&&aV;W#}b711w~(z4Sm6=YzSVDw2?
zAWgYU?+)VhU{{^=!7he?u4W2q;8H#)2O3U=iX}EI{eWT2l_PwtJ+P`7{P=%*kL6AY
zJ)GrJk)u!wtCtZP`rE+?b`aN)kwc+njULeNG$Ui1nmVt%G!qRC6%@#Lddn`5$@A{Q
zlJ3ov;G8@9Tf+|b>+n{?jgk`<udHIExw6(Q0pUc39<#V}L5>MVvAjba^yRzwi$=py
z_X>biuTIG!t##>Y<;6WI4Bx6zjllsfS+!%#>z|p^J+)RG(-ohLhb*h=1LxT)E?aS7
zFuu5x`6-CzOLPd$%iO`bqrxM!z{Y<cP27`Is_xKCi>Z|DO+B?Vr9GH<U#liI)*bcF
z59zI|u$QU3S{v4V#z1_y?NYDTJQUSH9R*@-I=nVmmAH3m@h_WKQaQQOl|x|les4mr
zDb^Wkea#aVzWFqZ$FU7WK}KfpgrO&?t`L1=rq=xBJNx49MeylL2aSd!Tgdh|R7ka9
z`J%U?<0D@8wKRx;WXM-o%kg7!kMy~Z&2oCX{PjC58I8P6pJ`ak1`&fPwvEzumU+`|
zQ<>5rlYn`cN@GxeuS(tho#LgseHjI?lrQP|I-hn-_35rZ$Dx4_re-seeAg^m+D7)%
z^B2}2Py+7D=;*&3uP@L&z*)>dDA<9Y#;}@>^xDvewc$$aXEfy9Tu->XcypskWckZ`
zzp}j!&Tpz6oH0SuzKApX-PeM`jA)BgTP?a?R~JkcR;0lwS@(p~84lt>8FAvMN_ILO
zot9w%xXBsyrwsgN`HYz9h16gnj^+|K1FK=p*LE1|&3ExkTTFo*dXu77W!F*L)&oZ(
zNzldM061o9Bxkug$eR({Q3rH!KfydbLj#z?(@Qv{4#)nQb6u$Qw=5i#rAJZ<fP0N5
z#6YF$4EBI^D%51;@Z}NrKH5$sRqqU0G=V$xSmv=u{s&NAms>Zzg<iZqwxpnT?k;dy
zEzEWoT4o|zo^9=*R%xB&XyTbV9b}~1M|HaJt(@MUMfAqfx@#M1UZ0<xKDz#~$QN~C
z=x69tu1B9#C)96<%oI-t24XU*8lhfn;8@3YP@P$9K8?tv=p11NX~xi{kBb-7oK`Qp
zApVlKf?n!;^^0mnzA@FpV6@>8@*Z8}XD%lympcQ$n(Pl#?bZxz8&g4}^_gWfEeWq_
ziB>SGipdzQtVxLU4h^%8TS@Nsv|q#r^L=+nhQSn(bF27~^QvD%p4<Kq@iIRHuXqLI
zrV6sze~B*Vf{7-0>+;M%ARj|khRwsv;hUL`$nDnso&K!OZZtm_M_t2@tP$K%e{CQ%
zSAqeu<z)G#akGHh!nLOt+ieD7t$S1N0%~d?Tn~u<)VgX`dS{WMXBVr8Onvxlxa`%m
zBzl;`V-ar520UYRyDnndE=+5A*F`pbZN%Dc9`@J1(BSwOvzD`O7_VJI!Er<A$3@)o
zT$<MqYv+*HwOUI7T8^@QtwEkia;{jAb@&{sb2q?CA=*v8&@>xJW0%&8(YQ+F^=atv
zSFxkE)W^e>aPqKHz2CDHKwa%R?vz0O>8%LCogXUL%3euZ>3KoX>flBhdr9Uu0!pQ7
zX1F;}W||x*4s8t2aY)OlXYBdyE5n(v%dMVKs(muQKO%l+zwKb}1_2M`W?8eJr`BHc
z-Wpk0UP=6?ms0}s`A-^N#@yOCyU^QIocztGL!w#G>S*}wb<xS9t91Rk{7e(4ELJ!<
z$0R+?W1R_VCnSD0)x2;`bVA+vY;?)Tb`dvoX>jnpXkWfs{tuysw+Su}FAZ9!!8}K$
z@OlWk1JCdw&ypC>qS}Sm62d0BbBcHyLw;w}1UZPfoCo$rm`s4{=ffO+_y7JWXl!}c
z-oEAtQ$fh$3H$!j45PT;+IDT$I`0P3{3*NA)KOIJ<VI)F)aGz}vEDa~{BpOqEU&B3
z=B8@~K{HXHD!)w_*H~+1r`Rg6P!$n&#vchjRrmQ4SLce}&14S!yIvS81M@;rd>%-V
z@id82Ue;9>fa}Hm#ww@L6vKy9UQGAi=<sVryeACii7}xxDqyIjnH4PgkZfXJw=5*Q
z^4ljg$Wf^8%Bb7bYabt#jL3^;)VeyzwN`X<WIq-;J)<iR|IjL$yK1$J4Gfy=?R{B_
z@uYwrvu<?mVQ1zG-<6u%*M`Yi5bDcORvZ3)0g$1Wq-_KyA6W)z<l^HMsin%FOkd|j
zkm@2$?Q>mxAJ@7<F1eHq(c0qreXLH_OAQU%yK;)@Le3mmiIX3lJss|eJu|Wm7$~9l
zGfe(&t?bGS6w16f%$?aNy!cXI?%SwA=T!Y+?rk{h_kdTJPUbf|Nb<A307_L}9h`Qn
z9NK@e7*KfeS(P<YY;&N&?p2qUnGyEwhc;lf<too1Liv#1HSS|}w8W1yLOYEY8VU56
zdgLUgd}$v0st?Q%N)ZF0%!(^&*>K7*-vrC=tG)Y_Ma$^~Vr-x1y^}Thp$(y@JLgiJ
zNQ`<kcj0oo+6S+o`3!USb1-FTDE&|JIk1A*^=@=RJdWT0p1nl4)F<-<O0Db0=pa<j
z$*6YTtnfEliuikTW#???PNtZNUP;eoCZy9WZ`V@F4W8OGK85C<bN%!4xd^ZS!}R3|
zhnyuobAZ@Xhnpa0YUsO#K|uOgmJrA4PNvnvIvv<%``duQ<<y6xy2?V*HwGXNtt_3S
zpnoY(Q-c7O5xZWzC<Ixb7jp~Zeilg{Rex4V>9R9JNVa6PeM18ghNPJ}qyUw#K)%(n
z*yQcPxTaska81F4EVM27d?DM1(z+c#gZtQq|I_4t`n_@~o}(W}zn_1t&qhhG>xA9N
z_hY-5yn%bc&lPGpvvVu#AAIin!QPzr21=3n<C8H8X7yDvcKs0J)3JoUbc^ZCYge!4
zxNqKy8P6?i&@j)&MY}&#7ZAmy<sIhw<q8ybq0HJRckR4z20S7|2WtCDo^}IB%blu;
z(oJ5upbp)4+Di8r$p!7V1&x1R`K6Ngl#3`D#4M(%Nl$C?$gbLE#wmaQvxn!L{@$X0
zo&8p&eWl>44B_J^R%)YX5|PXWS;0xs-RK9M+IRAAN83;Q?!(+IA5~sEIO#bsSF<_&
zb8_YgA6M1emPwf>S<EIF?j^J|Twh_u2K=o~;hZA`y)XXB0XESbl=k@hI_0kUEA6#U
z+r@cP@Wwb=qf+~f%|ZWV8EfanpDv15MliR_%K+k78I5Wr_g3PS_&InF_3jc@u2Z46
z+4ZxG>gfxDrOu`LxnO#>m(->>Y*tP3UQXZI2u7s-|NemZ4`TCeJ`ySmKdcW{ttz#>
z4Sytgx^~Vgv#j!CXXwc=s+`ivx4#uAKA&mat2e@Wr}L!Z=XWs`sUvq2%uOwOd*A-Y
zo_~JA2j{L&QVX$+sd6-Z(LdBWw>RoCp_@=pwYim5)-*Z#e(&wf`w7`wv`8g~{s!>k
zwO<TqR6L*7q0V@8;N2(a6TOPYA6_3@WM4gT>LSYHGS7S1cIHrLNz1Job~GvS?OI+A
zP}p_AIJGD`*pY9Yya#PzU03+Kbkgza#ITUCO2_({GMiy6KZc(j19&OQ;&t{-&s#8u
zYv>o<NiP}L`6~QiM{)mA`vhvP{@5zt$Y_i36=r|uTY!Q`gS?G_8iBJf;`@lEe3oya
zmUeH2yk~#2U%^)&Ct>HRoku4>8{M#{2@|Vunjv+FvRHe>QpGis&X_cdK}%POePl-n
zeEdYEfGkjUj7F(yIFFWxm?4<{a)N=fFXdoBz8<c#D(5uPo%#H&lS2AYQQoDfjhK}$
z|5FuHJE48_obo7l<5pYxVqr-BnNIq`(=S5-F`pnM4^`2WDi<$du4~4RTlo2>*S!ST
z!nVsXzF*_)hU6KPMY8-m8d^Rezgs%Xcthyw<dWAT;iaVrN7ql35(N09Im{@rCU7MX
zrtuWZUm6~#Ul?z7oB2C}d4>|9TdJCXwpGn3eQcZ~J35!y!NwG3%XT#97G!<tu`hk<
zBR%QB+lP|65f4nh&8JTC_QnayN`%>>>0a%Nn^9kL#tO4sYfC15EbLKNp%V&d?6!Sp
zd=+8G3amS0zglmq*Y7>(3L4EF5B_Mu>AwanSnIwY(sbhLucnt2DWI%6ing6VvGsPJ
z+ps3!;N=lsT4&|~ZFEQbOcPsdayrgD#<YuW0QGCf;3Iz?U#n4CVq3D4E%U>!35Ir0
zFBY-9k`B<L4s!W>vcix1-D4d5iu?NxtLKZ+whRv%Xvps`^~1)E`9H}9e|+=9)<;jz
zEINF*uJj~F#i`)7GxmqH0G`8WabZnpaAVXSY};v{APX8&zQr!g2*=&?&wWiCm<>9{
zUsXedME3ND(<A%(i}6RB=|GKtIY9Hf4uPbD>fzqRFoCPNDUOQF4c6uYjt-QMI*e8$
z?%+)%*g~5fmv%FBIWyXntT(sm0N+iz@k4zqrfF8&WuBUR55(Ji&_sBK9UO7O(!xd&
z=ss$72okXw1RCuDA0l_e8d4m=6(!q~{m4f3N$P7>H9MX5L<Abe7r@+Zpwaen*D`bZ
zj)F{&PGu13=q0-k|17roruFnE9~ua1Y)z_Acw83`y{UVZU~)e2@G$&ej?4}NT($g{
zg=ntze>oNo)*TeB0ntv_O}fk31IGe3ToKEj)$Vxm>I>Ju)c>!wCwA%Tcyq&@gG+zv
zU!Fo2d;d8w^Arsiv@rFBX|f{<N@WeyL<21eHC6oOF64+nWEmw_v~+7&tIejq?C%rz
zeK!X+9`igarhtp!Rik<GC#{EOxzZMP7Ny*~Ts^gguNAU-RF&j?f6P3!NO$M*%L5pL
z*dmN8Ie;#stA*CqfshV_`oX7i^h~V#igzoPYltzQW?1uaAy_q@aOz99@1?ZvyTV+a
zmB$InsiDe?na+m=CD(QAolws11~<ND`E1>d>Xr8#2S)tO28adJh3en|TI&sZcG)JB
zZ|Kk?r39qYNzE+xOJk|_N9=5?hQg*mQD0TWC<rB1?yX@~Htneb_EOwcn-pGB8cz|>
z^Tiyfkgo9+_e=J%5V!SqZDS=P;{+!)tm5P+YGc{V{h%=gC;zjp%F|-TT3^Xl7{kex
zUbA#G%+sPW$+DadC}pTYH1u-GQmxOvElkzEcj6>1WVtDJ8l7GA^pCDDl=jH3cT6!>
zj<O43jYt19jkT-Jj*hE#FfdjmhJsAJZ&v1At{~Y){Bp42i1;-P=eU`{du~+kVR_-n
zgdfy*w_~E)T8hWgj8?G*w&p>j*{Xyfg0(npY@OHme9~7L9qFUjs{-HAU85I1;Kf{n
zXo;aOYJB}n+VYboZ#EJJxL^I%@gYC<(Zur?n&q|g*9~*6p4;*n>e9do=*<sBUy;wx
zZQMW#q7i+Rdzgr2mll;P{>=arakSbpHHOeD?bk&j=#)|8GmGbG!%A(HU|^;GkLipj
z%a7iiej56t=c#{E8Ip(DpANNcEo}#gU%!?*v{$`3vIl1L76^Z69%$)kMrb^Q+UlD@
z7!6$>BLp2+ig78`*0b}6v|lXj*1PkZ5oJm59gs+yZInOpPH7u;@F-6+80PseN9T5Z
zUGBX>w0rdX0Bq#OS_2-3eL<TUP6mU-7(wu-@AP;L2ZXLDuDK_Up=w88JzaxekfuMz
z)1wG%Lfrg3SzJstk50758lY%vLx@PnI32v~2iR-Q)EmD}NZdW0*P~&z?(#?ov|ME7
z&sbA@SQ)hz?%CHz^TI3>hfvYd1D!0JB3x9Y)oxxI)wIqDOfx@xr?}l=c1LL|sa<1=
zSXSx9iHgZVc;}qvkSIow&@YH+9lN9Jv|-wQ9g4ZC-WUwG#Z3pKR_6raQ-M_Bt*2H-
z-xa^x%c$OjckFkH3}xK%(+xo<UaehL?zgU?Ewb3dC3&eVr148O90GBNQG_PXx|5bL
z`$KlLT}an+bt|jEr>_nQG%DVzKi|FcIK%yi6@N#>>sJrN{iXLiyCM+cT~yc={3K(p
zGpwnND|;X1P)v+CiwMVUZ{IN~`6`mG_W6v>x7vKA>&~A?M(&?awp4x#=H=zjNHd3H
z$}@V1Yj%!D1Q=o{J5wsSrqQiqaTXPJ6dWA%h+=(BPWx|;L}A3cn>R|k#gZjCoSSwF
z#TRRGU^G?Wg3)5645q*4s5GeITQVv;YB8DN6v=atI=_H+es_0NtE{YI7}2Q(C|K5P
zisP*`T#r6!Ka=bVy)zoFC0YQAiYLQ}D4NN-TBy?+47OIe$X0DVO*C@VAG;gnBu;L-
zo;H2f`p(VOI?ydp-flCFdo##uZ>R==om$%P0_#wPl4NBXt*Sh!RbiWr;Gv51|Bbfy
z3~G7-*L|@pv7p$HCZN)LkzSXw00|PLLqZX$A%q@!vCyO{kf5OnAp{5^3P~uTNEZ;0
z&_nON_qtB@{%~i`K6lQYIrr?3`Iedd|C#rF-sgFK1zio4%2Cq}ihU9yS?ZVK{8Q0<
z#&J~+uHG^OhqQkXCkl8<vj?sFt9+_&2+oWI8zw}mJs?w;MrsQbUi?kF_|{<a==G(R
z$$HEuX~uaiPwd0Q1g+fJ5%-gWI=vB?%5v{WaEruwhps}XY3&;Le1()bcAO_w8ueFR
zJ!j(mqPO6v#!CwllS8jfqk=AEl*U#K*94LGmYm&d^)^c9$w&BLOE{pd97BcJlE#zt
z_?|l_N@i$(d;Nir;{Sk49im_H^XFpdHNj6<5&xQAgZzOFDApgdw>Vz)5pD<*Jgr*H
z+J=5z8HD0xDTZCjDWmO=%AG2+fR`WWs`X(7HN^ZD*&MvX!*Ijj(PFE~^I%EYs=(nO
zg&Jbbs_VD@b$n6Sj;_nG{78_k7B{)9CzCsKGU?88Ji+r(PcEP1-Zk^Jcr0Sr&VE0x
zv+;fI{QQ^!`{VK}^I1>sz_gzhVykEJe8<LU68qy4v~jAL(V)Jq4A_O<HckU<U~)Ig
z$P3?=G?-OYFjC0PZrr&0G?Wz8&M>k1Opqq=)!XX>6Xm;=6yLzzW>b`(A(qn@=nWVd
z^VU8OS=#%P1_h)1NXdtCN5t`cSLs2RdUxQp#6LODZWc1QslSqWE5*W_b*Lxu-~_i1
z{k!$;g(JXr8_;H@IxRI#qI{r_8b{v9LIVc<Pb4I|T^e*1{5eVaWFOe_ae+7q^r<tP
zdrR=CF893I7h=g5jX;MgOPVTAwcQUgXAskA$Ey!3>&+xzqJbXYk$CV*_3TIw2KuqX
zY|nke8VrJQ!$oy~4l`G6!JvaQ$%4aP-Lg+dwD-E+%Fmpujt}M4m%l^Pk_6dIQ2j3$
z*I&b^vJ%C)$KuxCsNrL}8tRS$<pZl&j#6?@IW47JBoscdh6^rH7%X*WEeKfO{L0-$
zy>VIa$`1Xz%{s*6i8Ahw7Wf6KJ`v!T)_0%BlJfe?=}ztQx|?I=vx$S`glpR3r`{Q;
zs`z_B`yS>s8=(ZaK(=thIJ6wq82D`OCOq!;_ZA!^|BBz^x3Q8E*E~IycJka4-z9=A
zo{OxDwzbLc8P=B8AokW%hFw$PuTp)}qqx^Bbt_GJ+|Zou8z!|Z5+%Z*P<O;;Zwp70
zuK3BbsT%2Dk5t65U#gSu1*KD>tm3cYGh;OP(Coz`_MThSfoH}28`t%+3_~$01zI5k
z0|V!KMScee1%dUN{KqGXSce*#xp%l+ST#X&;8blB*9ekM^e1PV)u{N5PRJt7*1^ed
z;y>=`kr*Gb+(r`w+kX+%OYA`YhMu{kI&mDZ<0$V4VhmF=9RaHmL{sl0@)wMu&ukI|
zVMmaLJrO<Nk0iNrbF~hwO@YdS=a0yxCpY8%8jX^ty6x{V!6qjPtk)QC?x)8bB;NA&
zI}FGeyz#6Oh$R)L`&SV}%VB&ptiH9l9XmJ}XJdop6ls}eDKy%bO$QU~S^KSZ4oc}<
zkNfCQa1~WO8gt0_57AUFI?XbmGMU<ZcjRS6QniH5Q?8Q!w!7@>f?iZD#wXQo+CH%b
zwD%TpQ4l;3YomwG&)$&hFR^HbqW!BwtL1ef-`D7fndEFJ`({A?=(`u0ROR9Q;Yw|I
zPCfft;t!X#wAIh`_Sl>|N#AA7{4_sFOO$(md|0e$mqW#4(iN}nJmRkg48@#i<MjLR
z(1QMWoVh=xmr=I^AxPXZ-uLT8GFzPNN~L(#T=&98C!o7^TbZK_UeNOGY@`<uC?K1%
zQ9%|p!?d%*njJRMF0v#x^ZP{BYaHC|!yP^#dnd0}u>Xm1CUrFbBX;uRgS96|xNh7R
zpWsbm&P^6NA>2qdvzA4vV(yEjn)r1C_L$z3A%Mr&zL3LdHE~&i-J~oa$dmBg^ln<q
z)L*I-tZd{BGm>iE4HkY=Gm~LW^TBGlsS)8@_yP+MsHruSBA5F4>m}x7JvQg_`luPt
zU&;ypGQ>$zfnn_>&8$}H%rdS0aXH$=Iev`x%CiFEUSwds6U(FCe!BukSu5H71Z|nm
zbf~hDBkT2k7olDXf-U&^=Xu~LNys>R-<sny0u}IE+b1UeY~xM3tJoj>xzMRLQmOjd
zluE+Wm!DM&PMxRQBz!3r_%<5w%faoEN}pRTx5&rb>EDJ}RP6mVYk#y%D>_H#2syJS
zsM_$$DLGpD+Sz!cC}(HNaBhyLDOGUkp=w88s?QwE2bMM&Q1@|JyF?FZ`qR!WhiOJ8
z^QCfvie<5yYu|aLh+kQPYWcNyjTP;GGSGRU<qh--$c?BOmyL7Y71p}ja=eee?jC6_
zso#&a(dGSz{B^?cb?aH!4}ZUn_*9#_wSrH#L}dy)b)GaDwMm^;f5vfJ0pCoKYReV*
zqs@9#ZfK+Qbc9E~oZS%EOcQFXm=5@G53JEVk!6DEjO;MkoA<bJJvob?+*i2;ccm<&
zIk&U*h9CnRrg<D;UmYRXp^-<4<6SK@Q9X-0L45L?d|uk<g6nVnr|VP*F@%pe<-^#1
z=s>|}ks00yPInBAF6*}&g~87LWpLh8Rp(@mKpQYe=IGsw#;|bsx)?`_m`syjVK1Xu
zX4A&4iuK>W{Xe^@Uv@M%GrNFzXT~Ixcjx_8)!RC+!tnyBDe=jv4yaU66=bm&xRg(X
z;6qx{^<`<0e3TyqZ>;Z%3YI(Okv@HMcmW;I-nQXOae99|cHV!HrUD)ZS!<6wn4_$J
z2RlW%U@mGU@I5cS5ZPBEy0tE`xd!m*q)VrXPsIChAM)1xYUae0sr$)wr3Y3#HCp36
z8nDfk=Hia1EyKncT(BFz>8i-t8xb<+ooLitVRWk@DpjPnkWr}JD84Uct`Mk8_MBh}
zQ-;>w=!1pvoVtElh1_kWr#0uFv5>|m#_F+$Io}JRp`RWnnbN+G>PJv4Fi&(scw6`G
z&!lYe$cgmPuzFmklqIah({zMfu+X%zeh5opi*A>#Vaxb~IV-&gLUS#L(E<9}8=u76
z`Dd6AwX@@j-w8HGgh<`0l%Ljp80PR>3;NzVYybFuHl1<wg)=2a$drC>Fa7>Y<Y$!8
zt*ejXKiGa3$V;VnLJFY#_0^2^tz`!au?M5)uBndK08iuMb4MN71u*P_&g_CtDA!Io
z-(A1&%WwYc4>})WI3<3sdEiQ`Iq5D-2WYJIgwi2g7X^jB+i7Y4o6iBAgy?$@{lJrE
z=W{&-F}0xsg4<yL9@UN`)u80il3+`o+fE@Gu@=Qul*lt$pWw#S583m6<z0|h256W9
zkhGxWh0Nd%!>u;e7L$xTNK2(v&5qNTlMe1Zo>sB8c?52&xHM;3J7X`>B;SAd+w9Vv
zw}ovss=KpmdUtL<7I#lgKIhW&V8x8}A~tx2wogYyr{g8%uZ7xjUS9av?#Q>?i`*fk
z^`#_{au0kD64r!pK3wu?A^flKEj=mhDp{)Ad<h>E((h=%(=skBKA-tTgs2aa&*Tks
z!U`z|pch-xL(cl-s&d!37oDq9ic72WWLI<ZC4%%Ixb2e?Q$$E{-h~e<+wnf<mme@e
zb2EFbXk+-GeNypW=q`Dqa))Gx)Aep&U&2{#<X7RuiKYJf{Ps4F0+iTJw*&S^&X^6-
zFF>nM)DuESXHAghS}%Ak<DUOg$(aAn3Kp@pn39+Z2-sCU=B9IjY&x?>XEFZtBQ+*J
zeI!)Y!yvtOpsXfQB+0ygZ^!jqujbhHgo3(rVX(Jun|))cmv9hd{*U^>QCO`tqPj~7
zbZl$w@4vA!45#O7((Jj>Sexqj-P11V)<RyTl3(IR5M1Ec-YWjw+SV9l9YU%zFR1|!
z+Kg3JrM<BdQX9$om*FkFrfg+FX^ALlVoI|O-^1>Wce2S$vN42buh6j{%0u<bBkTV%
zluV?JieJ3W@b91h+d2rtw_83<wfFv<TH1>zlI(BxdH=Jy5isL#F>6F%-?>Azu((%y
z4f%pcL)KxEfI-d=triiXuU7yMl2Z%GV@eg2{Atu2s+?kUi-mXJfutHg!Q0_ebt_7P
znFA$vy^!B5TLyv=RjEHK@|nj<eD8TWW59CyXe+7FGTms?s?~IJSJ2zVU*YZ(P}4Pa
zo6m(W1XHv{ZVeGT{$(J#cr=;q3_TjQB{*6cr*2&~Woj0aR|Wx!B0P+&tpSE}7U?zC
zK}OiPaZ}*|7*HeHIfE->b99{dl3-lN^n^sIO}jENbwZ`lQbk0rk`AiSW^vJ~F!-Ei
zvT4u+RUzCA7<pD+nI$V6nzs_<3n`#+QJB(#LPka(tB_qmZU^$-@8<a%dB0{4Tex)y
z7d|ut$AonOHxbH-zOK!b3M!uSIS8~1HBNZ3m|@SC!;_7CD$)5^Ox<1wS<IE6so@BU
zGtdF0h8|jCJ3#RZfZ!H-ouy@n?Uu*+wXvyCCzXWYDJQ(d2U*OV-jz=ii>qrpYzXV>
z5nm;SH(Inj*5_THB?}{`gIJ1!ABHNu{vpv0hZqxGb>%CI?B-kwZ4P|1YPZee9cFMp
zoh(EE0saOG9jld(K^R<b>_FcZs14)j2!^&Cq@pkF__u~%R+kLnr=*<(H`3mgmVbJO
zbNMK~_u<KDmP6Y^sH!48f;-egiIF_e4>h~TX(Vi#BL_;Ml{~OI!MF#7l;k3+6uW0@
zX1WnCu~N0%ltiApy8&+P>J1@n1i!?`JBFESjzXp%{PYm*i}SMCTvqiz+~uv-Mz2lG
zwtqXDY|Nj^c(^9szCMyryUwa;Dw|Q*2W&SZx>{?@LA3Njxk$sh=!LRdEVCiICW~1A
z`_s$MY7qH|wP4vkN9r1Adlx3zVs?+)K}vc#8KK#I5-#%TiqDRSeVN9uaia8n5n?J8
zW+P&=GFUZCCQgJc`RL6<=f1SxnB4ya=z0azYm+VMKN&v<tgefb0<z>pe&Q4um6DiP
zfB_7Jdm$ctw`IIXNY3Q_9x%)XS+z|)u8Y>`m<MDK^@njPx{4iN74QUs;t^1o=!;d&
zf`8apL{vnXa`?PjT0i)+cQf*?OZbax>W+cN;Qf0Lyg!(~({W&l&wvk+xjw!*-bTW~
ztmW~ic-k7FEH};Ncd>hWokd%{=Y^G%A<cX$TLM)fBng$`XGYo>+v{2BIUX0qYV!lz
z>YQ02(0<(NkvknvH=mUT!awxjtWyK=SMhtlFjf+L<U>;bW9+F~`u6WtRuz|6!@)Gn
z$S_4M<UFL7+|NE-YYO$9H!me4%k^AF$}{Ko4F%uoASz#mPN0;;M^}{`onqTx{#bzC
zU70xR5q3$2h>rQV><((vPT|{sfDeLYseZ0Iq^h>X)hY=nB-D>DINhZ^;<LKr++E!`
znNLr*<+FZ%`rG>a4Px%C(goxBali_?YKI=bS{wvcrLc`H1*|yGuj2#TJ+-Jj0%p|x
zSLaxT;KK6?T0P&_{x0><VpQ;7(n1$veme1!-(S^%u1wSreAk?d#?SD%<{<AuQLxT<
zO)t59q}<<kpF{+cQSAHazU)I`HetppuS4>_3AD&`e$McEmx8>05w*gC>8F`Nh-(X>
z!N*Jf?U-xsduAjex#)zvXSR$VAN*khGrHP$FS6zD_EC|Acb}NH9$z~9sixBJF=#Ch
zsRI*7g7=P8J{qU=3k($PgLI*^W7~g@{B2ym$(dKGxWU#XFpGBw256Q(ttu&NGR!G+
zN+Ua;m|qsJ&IMX3K+E$7$tNqbh?4XZxmDi-MET%Ek9zx-flZt*X|C#Hjq_B}gXQ>=
zf$(9^a&`9{FGcSNpY#A;GB5k(z~UmG`ufK{$R`=bM6?37DsyeTTW^2ZdU*q#@+!Z$
z=0MyUA;(Rzfo+hHyRM5XbkB6uu~DG6tN);Ke0KBwD_qJCldr0LJMXD=WTgClyYtrD
zZ%Rzx+Q={#99-SqHA;=Q=_o0u4u<r1J}Akh&i7Z2K{m&w<4Eb@j<(1_XP7f~@%Uw!
zX=Ek_$Ff{F-}dPIo>-c3n)lGT=$ocvXO?JUq1tF(ja!vVDzD=($*3bdXfR%`gj_>f
zaVH<3#{52P<i~(J{q=st%k}ROum7B~nX>23yBa-IuXN<KAL##(;p!9mIus-rKygV~
z2@O*i409Rwfcu*5&&}(OEn(YKp1B-Y)gGV@E8~)QmR#kRRssVv-ny-we&J!C9DON<
z9_xPg@QC-4Qm5<pZI6Qt%2}Z+Vb~@tNB|!^7A+bCVRTr-Z}{WNB}M)E)wl@1*T4R@
zIUF%{w7(w|_L;|4X0EfZ>DI{g5iq0iaDUpPsbB1{&{)!Fy8n_x8uqD&b#W#w+!4wV
z8&?G|<@6`o9Mk{1@yGci=HsK~X<TqZTjA&XX`p=a`Xql%?V*@u^cJ6C;Dgn8pN6w<
z8BFl@rB$lAu7ve0st%o+b!-X2kA#np39=G};a0T1@%HwI+eW(3qBzR%=pwQE^5KZt
zM<!(lb*}H;%rwaezmdUOj;SZf+BVGVcnLPU7~{4Q$aKN85t^``t#@iD4&-kZyIx74
zwI9gs8+xP<cj5dGiaLgj#+U!p5tESv)y8I|y}4aH?_~a$_;QZ3x%nT`Lx3oGKey4%
za@LsMstkwFID@|eA3vK5Shk;?Sf62mVT=#TswPI0)+hJ+0cN3`bri37s;=&{aB-H^
zHyb^D$p}j_MSwbCKYA*?xgLNp*W5hyb2aC2jlIVd`C=Jvt<zoW+wv!@!{aWuXOS1B
zwRt}E4ZZ30<34gcJxv*`5-g(Ll^kU4)4t;9i<hWYw~obmI7WHu+h&%St(U`HZSw2J
zA~6F4FZWb<qCSCSMpuH8pNXOW5|1OzHZ8DMWq0>Ep0C?m7*<~ARq+oX`LqEy8+v?n
zPmOeY6Y$~Qs;Rs^CsdgGMsQZA1kbE%!0&(TLeZ>{j-rGKHAs!@oZ<W`o-K3z>r|(P
zz8Ck`+&osJ3+E{Xd8HGbqq3k7J$bC+>kG_ErSY&|&PLY{hvFOHDM((H?!U17!p%l;
z=X-(PQ&aAL;Ft|j{=)UC?E>TEMAj>PSw#UcodcO@e<8b^YtzftvPT$>8_0ccz{|y6
z0QCKm53kh`<Gr6DmAR3gVV1>W3m<ez)|DdgF`Lm0yEV%w(JrLCR2lY`Eu@%!XwXnJ
z!vQUdO3qh~6K~<Oii(ZZTrC}z6*#CyCL^Z2EF495CXl|uris^6twQJaPT@D!Rxw4g
z$S#_qJ<^tM<jO_PxUb@~#NYNJ;(3+txgN+{V_)~|hTrY_vOO><H9l(GG?=k)wcV?z
zcOC<H8tIUAPQ6dR{TZNo1AaS!S+}Z!|Jj{-*Ju>ss+a6LvuRFm1kYVflPtx7?r-m|
zW()RtT9W=D=$p*Hm|G7C#t2}~(!M<_3Mg;NyPWiLv$JMDr%?Oh8`<O|rm9X-MI&s&
zXYx$N&INFv6PmB2Z;Ly#=Q3wbdwtK{bZbwDidK5QR`=#eO2Q-1hMmm&)d#0b_UY?#
zIdio0UREsbmV$`*O5JYiC+N!-XFb__%CAK!2p$$-M(&P%OHPC3hHxKt%((o^FdgWR
z2@&7iYy9Z=y*zfra!Z^>_8xp-ws3NnwosQ@WW6$|N4Xmqaj{ljAzl$I<gqOK<Dur4
zXNsD4j7;hMLT1!I1oLzjAxlZ0YkGeH&3cjRf`29z@5Sv@NUm`lhH-QhwV4z~Vy_3p
z!@+p#mvZc?BDhfNmtnj-h6?R7M#9{ClikB7Q-lxhPx`N<2!0IFWHUG{sJ2vr(o)^O
zg$xJ>aC87(HA=VU)4+V1bAMlGKb7!kwbD#)FyW6oPQ0%FFT?k_9~R0t<#+KS`ese!
zbR$F%f?sXUXE!9Jv5T!u(BSrW1DW5#1tU_OPFtiIJ|yw6*<pxCmA=l+=gEmZb0F>^
z2CgfLKQDc|^o>hO{lhjP305;QeommQpH3`>HkkFeAU{gHr~ZE<VE?!7jZ1B{d?A8=
zRD-eBI%<fz5vPPk53OzYk4Dl$^*4ks-B7fte!=#zyxz<9(&Wxi2Wv#UXJtlUUXk{n
zs3&IS8{&Dc8b?PS!LG4@?W74$XC9|h;rJY7MN~I4&?`02=?ufEt5Q!<xV4*3A|;@R
z=g`fx#8bOXW<!G;0mE=E|EY#9@_=&0NB*uKle11!>_bEpGMcUNs)Fpkp)gb%$*%`L
z?BbfH%MM-|LzQRBr47_{%#~iaXPX*7VBM$?FqncShyYFaWsmGnbZ&c=-#R3`M+P@a
zG9tV(3WMFuZrzP!qQvB-ROejold{aiz}JlSEsBzm)xw?a#kJ$Ta{4Nu@5FxD^bwdQ
zu<pG|y2^;IS#WR+AG_`ogt{4H&SLwp*@bteXE^2<bS&E7PBiX%pKCKP^M#yHySTON
z;7qXWE<^==l*IL}#j&DVn~^69|DnZBq$^7BW(-yYUeN!6m}7Jopu1<yM~Z`BMfZNR
z|CeI$3pVRi+A$5P_v=X|Ig`ofnanlb&&;Nd?#$)S|3jtdGr<Cj`GLhIQ?h+%abw%0
zJ=&mrjt%(%NmZM>cySQJuS~x_w|FKGH{(?PWdJS>ZPwimxVzB^^w^a{?Px!D2xf6k
zJ^*y-so}Ufy<BTR0%GlzJW}koh?Y6y1{=(czJ~H*Ik-?WWqACw@040KjNPv@Xb;GZ
z^5H3(PegLcqy|3z2+_Qe2-V_PJ|*I6(6ZE2QV)im1Nf~>1OfMWnvwYAYuT^*w)Y`t
zv%D!kfAJ{rJ*nj{_77d7j&Co)C%|LN(O>M;GBB)+m&+;DOZ$TcP`x6Eb)dYT{O?=W
z7kHYYFT7jSX1;u<TkPk#3N$~%?O<Xyd$xR;xNT;r?pF-zq3u^CQ;b#zp@Hx{`Ns@1
zjF+9cb~e^}1rsM<&%D`hG!0+btF0M1az)SgkByxU)P`2Up@aJT;#XT4GW=!;3xAyB
z;cG2QB{AaTHL{^%`#a<b$H4p}`&RkYgC+b)O)Xt(->ELaeq$GWtMN@RU#<07zt@gm
zJ^Z{*a$rTj<fzi=!2BrOro1W?S(P^PN=ikNS0gF^O5jRT*iGMh_jA2tx&`{cq9Hiw
zKYDEF6kM+?qRfBz68`P^zrW(sEA87$Fcue!uYZ3eB@*yj^Ws(B_gCv5dg;rfAjDpM
z*)=C}kmZ;!WNB6)MPC+0R|x+@G&ImgjcN34NyhM8QWc&t0mrP__wK3qLEP3K1;Y4<
zUoyv?vcsL|5i`}|1s}DP>YLq}5TVD=H5qle-=)}9T$e?$Y;uB@97i&}Xc7;^`Tj7}
z!<eqgR^hmwgoQV$^Ed`p?GL?Z{VOicJ3puMx`=`K>YTwrg^u7B;KQtcbwTGy+Pt_y
zWB9#Vb{pk3mhPC{lII<)!}!y`ds+e}?#Y69KALL3NERm6nBVy9zH_tt*q!z1uYjoc
zbdwlME}>1`|M}fC2*0Y$Uq^Dbw<fCQ(V{$`XG-09r%)SNELebn?z7&7Vl-5NHAFx`
zd&CpTb32!?S~=6L<W_Au+06;=bfnuo^~J9KuxWzXsHC%A_2I_kS?h3Pv51ARM-Ha1
zctyDK&BY!-5kNW5fY^*Ocazk7ys3Q2;+x09c3Z-WxktV3Yde<D6<c{yowg7U8?WhQ
zUDGaJGD<N4fz2Dbq?_zeJ;WS=WPr#mVBt<{6JaQ2N})-%qHh;0s-}Ma+JTj(Ej|&(
z*ez9lEyMF_7W?1f2&U~Ta*K9{g@xKkulMsPtJ<E{VRH}L_mGxjn}M{au|CNWFm}yf
z(y32Y9N3BHgy1ufsrrvdJz`;ZrqOt(P@9j9>{)TPG~Md9#kkLYh@PCUsMwqv+~13^
z6lIG2tMx+Scmk*Vm6r}b#|y9*Ys)X7<|?vmi=6J}ju%A<7ZUi{)xo$m(P7#twRC=K
z%OV4QO%65t1Cbur{92ORdN-9N596LFj-#~PT0KfP+@#ZYIi;g5sc;xuUi4G7npv}v
zG&TMNXR_V9hFN&q&-vQ-h>(}!etAToFGlbRg_uF4^bpCR83rO=?Oym3C2b?XcvrFA
zn%uAr8pF4?72LU=CO#Wi<sS>rx6)B%{hJQV`}5SESKW!7Q#}4PT6&|?U+0b)3<~tn
zst)WY)}+Mg3zW~T()zQUc*PoxLb_NW&@F8iC2;@xSsn;*P*yte553lb@gu3>YWm+Y
z+Ao{}9GPrq|7Adm%)Po7sWddi#wbU{n#rGaASam6n3EmRBNY|6xh-?L7&q(c%wx`S
zxDm5yCsavm7_>BjP?Ak2$5WG<QoQCDK@ILEJhG3`ypO;8btyPAyw2&q*)@SgX!im#
zEyhn;zmEOOFtQzLN;Ds)+gUE}w%9qs@bYw+$?v>W4}$gXu!Pa^;X9{C<jkN_jaU20
zVqOOqELv8|ld7qATjv@kMf#jeex%+T*iJm#8@47yLJyuQ{qZ{&7xXwfFcPmt;qtSN
zX?ozLV2)SI22bC}k;|v_WT`rCXu<eM^G_FLw7cS26TN_gfOWl&(BQ?Atjv&eP8&-V
z9iWz4e{v6Ie7$o5PbH&VTpB*>fAE>MmhbmZKDSC!ka_BB_0N+C`77op9}+)2PVuYh
zO)ouP)*CdfS_>O|yggh%O@nhj@7hnq8<ZM=G3_Y^K7boM*N$QiXml8xiq2;bS34u1
zX^hd0_?(utQHnPM=aN#N7eH7LpZ&D9Lu-9k&uL=?-l<Rs6Milq+}671MHMe8RH!W&
zd2bs%z^59d-#H^8%|FVj;23k{&#=w*-u)=*_7%=hR}C7^NQ7^YsHl!Z8ii{tTPvS7
z+IgaVEG!r-ER;tb*!MTUCYJ%0Tb2XPTZYX<Z$4QT+KNb2Tw6;`G7uYm%zotUck9c~
zdY1IYHaggkob@B9rku1#StLN0yaOGOrTW&^K0v6gjSB^IFkfyST+vcotJ=?bu15Uw
zc$H(`=g(&TGbYvVZnZrUD#=d~q~{u4tB3)VUIzhFDH{awtA8fAVbHz>TIyF3ms_dR
zlg+ia2^a*~r6(Bce>XmrZb~Kxjzm>eAj(-d-&rJ%{fzg<d!hAV4|aFJzGfs62usid
z956f0<=H8f5IXe62RulmAQwcsQg9NBj!$YjxSzyJIWx^miTRgdQj}--jpHbTd*`2f
z1GwZHxz_hwH#UORsI@gmplY&-D;5sCZFXX&uW|YnFV+V;7~dk<VONg7GU{wbj<E8@
z;f78lcn__f#%N!8aYZFcP>JCa;P)~$@(8EpOFBZWqs;yHz;F;^RkFqs*kRF|)I_eC
zUEK(?mW3Jw>u|3C4=rc#7L2uh;=~FI$K)0zcLByr*SG&k*Mu!PRBJ6OIB->`{Px@(
zL(H#J^;>wnz3cQ%?86oI;n?W7AmjNmNV|V#Hj1il1`}OQA58Dr)p*At^qKqMMAN2<
z&8<_kG+EtBmIG4tIcl|Fq9CX183SX`4$f)K_R!a!G)BlL&ll$ei)u_k4!Axw0D_(Y
z{CkloS7kxJ9KgOgRUwN^qE;vQZ_?xPaj(#Nf7W&}s*HCu?L{REl1HtVeIBNK;BUIc
zaCtIvc>0Y``!xmS^T9Pae?{y<r;w2@UzNTsker5&{<UT7HH&Tw<{K;Qpp+&NGfF0O
z477gl18fkEo7ApfV#h65ma_7{9^GInwJ$nK98`{I{-`RNrXfVrC2*948{uanbC)eo
z^?mr}uupB~*hHaz)D^h>Ahz)n7V-wyk`lCSn{2$+UUR%NxY}IkMxv<1G2d8H_R{&$
zV*>aItLd-%lSv0%=_L`N3cT)Cfo71;8TV=chllawHQ=I(3eo_Q^We0Q?muASS2HUw
zTEkFD-99&xjj<l~@4QlaN9;9*D^Vjz7LzR!3n?!0>OQ5)Yf&o?^V`Q-rb6ifDoF2*
z)ZZcx^UP^GAAXJRqpU2ISVfb;-AGB4YTpu_zYUE^6)AC8r!64FF8U85<-DFQt&(2u
zq0_YQ&f#K+$yJS0kB#Ey{N@%i>}{nC{pWmqr95l0Nq|)f)cK*-ICc2Z&_p##HLvLB
zT*7bippkycsa|0KvOeSCmw;xU{w9-TQV>2rjgOl+OFrv!?TH8V>S&6Wr1WNkx$b+W
zJ{kQ!6b4rE>6;_HXj2(4UZMwJ1dh<Lotqh({+>aKlgqUdo8-5-H#N=5cij1DD92>+
z(SsCu4ZkYKPvyjljAX!u!96R5N5!k&BR%!@KaA!v(aE#XPYIyBZ-uR3-GQQ_Y2kVt
z))`3!pM2Coip+SjSroycEqekpdm*a4U|Q$!_VY*$yNFEdKy>FBAtwArq_Y!ri9?||
zoU>STO7fz7Bu4n+&G6PPntrwP!kORKyf&yhhU4uB`au}?pBJAQ#J;83FgZCkR%)jI
ze(D(CL^*lM4MYUCjZ8I}(?JYqy1}y6E_Obmgy60@_;dDDFGCyEQ3|n>!W~+SE}Wq<
z74z3r<+(}119@4=h7EH8g4bHw+s3)^x%8&M^0J$n(;o4pI`*+P5|&C_a#vNE8h8}7
zou5dX*S9y0=H-;*GPBTS*K<jYAf&Yfd<3PcMLk2>c~?nSMFwp}DX<aXovcE}4X?7Q
z0Ez4dd06?>nY{{TCLxpTfq)eG7N^6HP@^brxRj!xGn`${EMMUcaHOGD!epaZ*w`$h
zHhOa+qFQqD<bJQJ_|zHi-#>uJWBo`{veNnv8?SkmnQC?K(_ah7H$d=|^ih^Q$}Awh
zl{2<wAl%~~2oAEoe?M>LDu(zv)m$%>(Br*|6>S$PR<}%>lDcm`)bIzZ^R;Gl^a;ku
zBM^wr$lLRFUtNSo?EXK|^Z$C3{ok&=|8;A=W!xOb+;a-H=g&ubaK5m2h@H$WlL&wR
z_p_ZRf<@fsMYjV8S>1;o7qX|uRJ9Zv@37GlMWoMWxtC@9s&n9F^Imnb53P@CR_R3^
z+&s;icRt|d{!(oBO{D?515{2B;=Xw)wN!mk9^Plnbfbqe-rZ6b7o6$w)8AZN9XsW@
z@nl4l?9_Lvx<OV6PDY0ykHEo5PC*ODCr+mn#>B1)PM*D&RwCvd%bk_a$V&i)bAI;n
zj_V~H!8+^uC7@BfPxc?Uh-Y`)k)v{8q$|Ce<Q0a);^9%V)?lh>;cK6m)(QMGy)mq%
zu<5sKIbl=mNx|8>DT4=#+;Tm5(#9>@)^O}-LLQ+*jkt17rfy$3n<I_hOhvJ?SXwx^
z8<k9yIu0k^lWo*w{ZLe7l(hosO#{In?L`cl45H{L)N}vtQabRkOk~*1D<bP2s}|OI
z!w3%m^)}fR;TmeWi$j;;uh;nN#8Tpe0PCu+|JQ`VitQ#yHQ7~}15?v`_ds65dtiei
z^N90Jg<$5&+tva`)YgwcSN(y8W`Vs-md)-iPN9xDr*V4e;UL`!pxD5sN;Sei7eyut
zP7|V6Gcre)eCPZjE|qCjJ~a49^tPbR`v9<g@!OAY9-R%#_&nLab$>IkmDwhQH#jd_
zhH4ljC+>miFW&80LHFACw@l?u7xj2$DwViYP<ji)zNg8|h0dTCD^Wj&z~oh*D8=P4
zbKZ(mwZpdAElTe7HV8Z{lQ6PcUT$(quwPhdZW()%b@pZgUU#m~=~hmJ=d!m1XmoHT
zT~cpa`?=~9yMjH?SZN;!rS8sEF7)-5Bn@JV%uYx<rIJnGzKiw*oxPZOcrpCZ6_u17
zzh@xP&B1MJ4eRy&Br#g;iOqT4VA{Ctvv+$7^jq@hGcd-j=1!-e-^AW=xb*?o%V&T8
z#Ss24^}l1^q`zVpYx*(xU1nII$Ibrg56nzkc|Yp<xhx#~RBs0|k{|#Q=$@%dUnac+
z#u?}xfnE|Ko}&8I3))H76;a;k7!0sR7)KwFwe$E_*o6fKmS{`MC4-bH8>O@CQ{ibc
zc*Mf2+vsh0uGL4u@@nFvb<^IxbcHz6?+pJkFk}V$oO()eimsK0DgQ3rTlHcd`IB-$
zeU<7QI_4(#qJtT%FWGTz1cEKD)95-?Zr^#ZGR4~e<Bq}ScGG+B-S^QS^9MwkOWJ^V
z*|AYhU3w}*gSl4d@QS?}F}lgjkZgPcFr~sHOku<1Es69yRJkiR&P&vsdVy&k3g)G?
znxW_SG=4oT@Rt8h0u9`s-ct0V(E=W7S`;bx8zCRy1L$QtMN(1N`G9lwX|dS6BE|L^
zIe{7QS36uDX+BDWskYD(6ZT?yaSC&P1ZU$DQrbm2%z-lfS*jI?wdUCR_0jFr((&vQ
z^+oG!Vuw}2>>oJS?7ZR{i4plar15n<d0<aQR}olyJ&bk-=$&P4z9Ov~Vq6&E*Yd}U
zW}JD;;#9h%c+@k>7u}&L9bu;gkey*_iT5NuT-bb`B`>)QA5E{q0LwBeQ;-kLt!T&P
z(Wf`36|>arwIfxVVBKm&1^M->vpPPU5__;l0mt&o%wr=87r!^oTh?A9;HIrDu$wJL
z@l`9z-bVdkErZPR;v#W-E-y=U{RB#ro#S)AI^t|xLhrCyw5a=<b>F_O&F*4XTgL+3
zNqF8Gel^5|+Tn;^`eY4|;na5^R<gIX>o5;3B-n4r4s3~DYB|SkOdSrg>uW30ZUN+t
zG66x5nq{4=yQzBzkiLvGlO&GvYq}bTz~6%7vZg>^DhQMS1bm7Bo%78A6hh&j>1mS~
z4Livn#FPO4CQyIu4^fV@T$L|hjvrOW>xv3bk%6~`e!cb-U6jhOd!Mdbl}a0EHwwY!
zhLjR{ow2ee=~WQk#-`1Em~Pm9B-2Z{6*Za{Q9fHODL{ZIwrM)!ivzVWCqk#DRLjmR
zibudb4e|9f&*-@mN?sZ3iQdDIjkgsYPbLuuLR%8oA>Pvx>44Ks{Ryy!OQHM##d_BW
zu^P}{8vDnC=Ggywu&J<6787b-vEYEDm)H1#I+807mA3p^f9s0-2G@P!UCpv^9J-}F
zQ2j!F_B~6S&`-B_pG2uq(5S)=*FxO*vJ~CQa3rukkU*!#mnWyCtNz8`>1tX?Tqkt0
z6dA^a^v!XJhnm}_y4E#j$LelpGg~O~NHq1idA<y|&Jx)`j1Oo>I`PGa2a+rk7(0S`
zcEY-f3~l@t{7CD^!!;#|j**dTCj;rrGk&{O<_Gpp6us~bJ_DY&VZx$(2~)xTBJa}9
zWFB08w|(TX|M3#6-s;gW#b1*g59}hy6|w$x;+3M3;=#}&7)9csLY2d{o#mJ~Q3@l>
z>3tR0z~krO`_2Z@8Z7xkjM2qYI|k~Gd)4dyOfuggx9Kj8xsOKyOxKzfzP_HG3+f@4
zhBuLRSDfa`-3R7|^ez%Z5VO0bfcvW>z0UkFB8foiKXPx}Oq<TEZ~LHC_NrNW!Ghg1
zbxljG{N~*Ip$1qZ%X8fpdT^zJj|%7OEvuq~6iw+dC2Qp!YiMY(M&aU-=bpoc)0|DL
z{`kNg@N|$&7#M(|=<?mA>R_ddD@}g#Xdhuce{d8x_-(5c{?}OYPbOgr&oDPL<1VQ}
zOXW_S>r#<xmjY>_YS@iJcNlur_XXQ61=^J1a3O>-3|9&ykmQg;wN{H>9SQd#x^j#Y
zLfC6v@Q><}r8CIv$nnc9Y$3JI?b<KcN<bOUqCfb>l2Imj0GHTm%DkU7A`3}MQ=pR!
zSmpbKHP5H)mb<kaw*VsDXcBE?Y-sl;@BSOAy%2S=Dstk{S4k$Rmct{zcRs$<h()j2
znfOS>ixnbUYHSloJX=Su4K)uvM$yV&3ol<N(s5Kx;W6}Xm)rH5Cn*E|W!T@%NZ)a#
z?NvGBU9vC0#8iKH<Ut<r-Et7yt{rtB=G63XxMkve1;RQ%dCx{GOyomNkx3@;z>aO4
zX5ETHo7JKOVQ!h-+G#hY`T6FqwxX7b5Y7UKk~J6O1X{n1QQbdlkRX0+L&pu?xRqb8
z-EU~#_ai4iJnOer)$^VD`*KXj3j;xK)OL{<-?HuSb0gg`{VK#=UC>4fGQ)bTX2D~V
zd{iS6BF+Y}A&6490GWal2%ka|2*xi*P%vEX9*(qW5dGNH<NN5cy5&4%VA`UeQl4EP
zYqnX?)w5m1Adh~nWGj;&7@ty#2PVnY3{_|Ymlkt0QO5l?gWkb4=EBQHQ>Pq2@5a93
z!V#2Fe5$$snzVYCl&*hl94WP`)$b}^Jk2qWBa{evf01_rPz^Jjg|w5en)p;K$J~cs
z!g+Po1fe<yGcxx=DgxB$S4QQHurH9Y<#d&$sqcMtQQv#wJ@Sm3fyyqGo}(236%*j;
z9LJjVBj;OuKOrSpc3wM#+te>L(1hO6S2mxMvsd34E+szi*#9`DjgZAdgp~Vb>#B<0
zfajoCi3fuepsnkgrqfE_uwtbS^Nk7JU@<`vud+ef^iduW7Gjhoz5n-i%j4eus3m7k
zv7y9y-8adcU*7`tYH?ECe_c-o3v0OcIg23H@?IYmt@I|}dWvD}^zc)(m<S-{5N&_h
zP!Y<C79G=tEF3rUN#1meXJ6d-c8jjymUi32nuEks!F=EVtSD3GRkp^K*q_(wCUsda
z*GBgBg`Ysm9bYN2t|hd*_8y@W(l67W_Ddq9cI_Q-GetVgK<SM&>AUG44m{f2l!Kq)
z9vXK_d8MN{_PoBte$>D@?qXtaMq{R`%1OjBh!6H7xDZQF(9d10V^sxnv`*PZ;lC{<
ze<(NK8fyxYD>OSP(?RnouqIX9{g**LHa+puZC%H0uS)DqN2+)0f(Mt4xL{QKV-f03
z{10HR0b&@u;zeHGcc#4kRY7=~kAC;dKFto}_d}`E!tldV_rr%zR&nCs#9JUjn6&#s
z!4gjykWfT4=eeu?#Pt}3)flq5YjCKu`=x&6TAhy<?gg6P5^t-Ys3rb|)jRSi!#3uA
z!A>J@+xH=N=Wx9J#K4x!`3{!HSraViT0~5t1*v-w>H{D~<z<E03tx(D1%_VMiZVvU
zE^!AcBhp0NC5J+7GG=gjC1@V_W?DmC^f+Tv7W-y?3U&-}L!e+_ddDK0>*X<N#me<A
zlGp2|*LV3Bmoy|>=B>0coenHcTknC={Vf7I4I5f>4I2$#<#5vJj<W;%r0%NrL+8hB
z__IvJl+>QMbIiVLi2yjn0yVlf=a0`=&1wrJ{7txj@cjSKv;6<Mj4^eBUwj|Hb9bGy
zAO$ZAKk`V3X_@7F$lc`28+Dmu^0)UWHzXR#$qtZvL}!Xf1bIVBT)iQ~D@8OtU4gG7
zZ+}Z$x<6ry7uPk;0J(P-bV`=-;o&<~K{(l@<h_Z%0?MbDWB9i`-e#y}c0~94>g;Hg
z!V7NCB~SBp4{z8%|3^W(FiS#Sw#SM86jggeVKT#~Y07DNQUq{jEs{!y!@@zzmi2T|
znHdzarJr3l7h>=!ATqUg>Rjx-1xm{6r<0fA@wufbUJy`RyfTI14@wo(a5OH$YS3dv
zhm6w?BDWaz6so{Aq`g855{zTD$W_^NK;5O}36p;l@FlC^mG|&D3n>TNJ{h+S#!UL_
zEsHl2QKv=FW8^7ET7V2x2;wd=d9hb#rrNwg346`H7MB<gwS$i5+kSlov``Jbv1F8>
zCt-;%`jgEMGNQhMv^WRa)Nkj|2mSx~kobCq<FAqe)wzo|7F*X#UcJuV=f8CJ=1!}U
zXhxFOY?Gax`9u=MRGg1Ezn#{+9a80i&oD5S>=d2ME`#+q>dm0Vjm~!^H`@Zi3sAi;
z@7&$;PsC>(ErC5}tq<*{LI@8R+zqfnldn=a;}IAV<p^(mt_y}4xZ`qS^SAEv2at+_
z0Fp-{Iwb>D*2)5j$g%R*dtBM4VOuv_o+bt_DA~klk1QVVo%)@g?h`{mJ)~W|YVcS|
z;mP2U_{mwOqIO`H|80hq@<R5dF6NM+kjje`VGMjw?{pw@A`P3FlEySks@bcvP4n13
za<|6E|L5a&>EGV}rd{VhL;{Y*WKrItDyc@^x9vaon2N7HI==H^Ef!KioTfod=ux{i
zd|gQcL|3s*SP)Hjk+1glWH834vmI*CU9R>1nELCZjQ9AYwPE?p2TcP{#mL@_8Y>#!
zB{Or7tK#LOKexD{Z!`D}?kQ&=XZK5tGTe$QMFly2c!6{#h`tbkq|D!n#{9o|<y6I7
z=tv4*qj<s4et^WTK;F5=m`yl@xEgX&9QIpB|E}&N4EpMijtRNPL%r3Nft~k+(U3KZ
zCuGXx8SKH=YYa!}ZFaVhsI7~N(Jbk9!qYbE0kP0K0{iC8^n3VhGQ~S%r5ozK7$P9q
z=vv_paTu!n6X$Ud5Y&C6gB3@9kS8)nO$pML#zeo@8#k(%nd9h=3==&BOS>uq<EFu;
zum)Ybip_!irn}}h?<;3Vdq`Zl;huO?`F2p(bEIuv{d~1bVi`J)@V>AEYK&uY<g^sX
z5UG}skLTHLCqMDXvx#oEFSX(SInJ_~`h(cNy8I{WI<9{kW6o^A@(!=K5|nM8bH8NF
z6!K!hph-!#v(bXM<sSuj$7$eiFM-7*20kltDObkCF=&tB`CM34{xA(<9+qT1CgA!y
zyb3WA7JZ4_wOv$SXfIl&1_ar0>ovEkRV`B<f}b1*A{i0hHh_R)6^|fKoHog#BA--c
zNX{s9@qnBw_L5#bXaxO%zXrZEY_ngG%wO@?y~c8+#LV<trO1B1fo!hK)07j};D=aI
zEI$_=tlBYpzY-|vOHG7v92gSbHA}$Wp!IdZ4wJPd8Cit!c=~fg)$XCZcWgR#V0-oS
z@m)o$DbJ$bRv2;iv_YU-Geg+jQlxJW8=FeUfCk|38*u%;`4Jr<us^gS^GASdTd>=c
z{JO8g-AIrx;>9bkn!Q&ki|enF8@0Z^gK@WYI3|55`>x8^or(fzAx(LCTwa?nuuTC*
zy}ndIZ10(ghl^lq2j-E&ZI&cL+1MV%+d!hekCMopN^lO!8kWzl#<NZF-Zb6EN&2;~
z9|$W%?^-cb@m+ObwEGBqZ0gnKhLDhEZ<PJ<<kRT7T@~3f|M5;ZsbkD_*$XGwz8iZ0
zN~FhB_{Nu9N_;x_&_el}dgErs5sS^j;^6ASN*j@-%HA^Y*l=5CuFDHnzc>f<rGt-f
z829|HbUKeDcI#r0e<u&k0=e7V=e{xO>k4RF!tIxX*5GqJ-dKd4Tji!#B1+dB%?*V*
zVRVKo2<tl@`3XNjjoR^IuUyDik0fjLzI@lcB;!`z<80>0#^CO@*AfOYY?;m*or^0J
zqNkRPlY=y(_M~4yJpA3sE5DT-hfLX8*g#&0(F`o;mN4C+Zz7efKa%TVeYX<MTOJLG
z)3-<!gRv!2*Zm&zU9r3`ko~4!;kJRugw@T8sy(dlN(*pXzvGT}w)m6_j$Xh_)5Wj#
zi}#c^yox?;?29nxuk<7pjS1W6drxg{1I#^q*Bn+e=afD-B<-3cT^70qlXaKQMx+^1
zq}>JW9=~EbttF+Y^auOy2337W0au)(bMw-g>(o&gPCnN{<9OqMppUTi9)#c^JQgoB
zR%^bFamE_r;F2PxB5hr!g48wk7lv(mrgh@)lD<xleY(P+WIPtOixFh!hxRn}#es)|
zZSA%sp!|yCgG+CEVYEzoLlAuJd!GZME;(u79)vGjPQKS{RY-T#xx=3Qp=bh7>rHUA
zk9<mg?X8~L7hi*Kfv+o3Z8lU=(<B?kWP=wm`fq;Rz|XMfkFWeA2dGX-#<DdMWW9_+
zH-t<BRK>`k_Dn(%@O;hE-L2Rh-j(C*@IkT?+v~fS8GS|bESshA?Nj9E62mVUy!TQ-
zT0k_A%@x5(48&aDn`7y*WmSH)l4mNPTV9hGC<i$2Wd~-PwyCPI>8_4gD8pxwiE9J#
zd1HeI0Wh8E@Wu}I)41$Apk;;SzxC8)gh^w6D{Ni3$OT-H%#ciTyV?SHA7%^8vP8A3
zb})VKgW&=F!c#(|-ig@8-fH-O@wVAvk__1qp>!e_lO<CfI%JFi1XL}XQw~H_waid{
zN#bl3v~9|vuk0GMm0RR>*0sZDGYLUT8vV=4oduiY$={M$k{7Iztye!V{RBz3YKcdd
zsA}`_cTCg64~lV;SblWQ2M>13ks~C#iTfHcuV^W_=ALsH>xJxjv-8fWjL;Wt5}={E
zJ>@&KovOwd2gC-mV_00eCRH~3eK?UhhMehav1!nle)pLa<fG4Yf+O!E2eCX0$bMP&
z^pLw<!y0Gl?+LY-86aDO?Q&q4UAn11$;$D?lvI}<p)84vGgBBzAL_JTLnrmwsj5$B
z*h&uWh*5+v1IR1!KfXk=R+&1lE*TXNDM_RcgJa>)-d&Gj&HADajf865=?$LI*{7{V
zuFe4J(1(h5n8i3pZt5YQ>N>lr$@_0no6_n}7L0)GaHcvzEb03fm%bu$?+P0Nq|cx0
zXSAFFa<$d}ut|w7)HV5CG_N#fveB8?+@2|La%Ovt<F@{=4NVONSUeyw9WvKv-Fyo^
z1UDRWf8g=!l^uaXvI#PyeBmyLM4*kX<ww4)WX@TyVY2gAgKVCp<{AFp6_y1<Nud@a
zuh`xfEyCi+0rim!DRY)q_hnWojPGNYq8oejM`+urMyctwr75uG@zoX4Zna1Mix~a?
z>yq%FGYQ?b86Yv0JU-2bV!w|=$G>OZ<}Kj8%&mjAx21EK0KU@qXUq^<kmrD<qXT_k
zNHcyTR~8MzU;m8L_Qo3B+DZlMhYs}6hY7q~KhM*Ei76y*s?$;xGO1GN+iCVErIuxk
z>-RtF!Q$GZcJ6qmlaf*fY?Sf+dTu(5QRboltv5*Lly-CuyAp_#8W*-JqBE5#k1Wzl
zhUz`nL)nr&TRA45W?ZgtjxEsSd`sAOZ7b|4KzA?!Z_(YVAzEq=t@tt#sMLp(K^~3S
zACc;E?{-fXwUeeP_gk>JbeSO7x%~j;mh=JcuzGFgDfr%8SKKx!v_!K>F=RS9qS9r?
zj%WN&P3qbfFkyAnoECSmRKt-B@IFkGnk+VF>PwWoX*jQROTxU~$un`3*2<g^5r|+i
zG1d!r22baW|41q{@8MzkpJ!Y8r@x@M7ZSY=zc?S_!tZ9A@?H)L?-pedZpkI4p}~Ze
zu74RsM`;N`ME*{ozU{O%A4Tpg?8<XcC*-klT0$brSq+=d&g(q}hxi55!oIsiw+T6j
zc$)6l?@?Aec2Dh^S=5u8Eq_q`&C7$@y}dv(=?wPU4ID4DTfed%?7K2JqPW0b5<_Rx
z6ZUGx&gXVimysjg8z7{}@Av<)E?xS!@4s;l{m*CeN-3^T#}sZCMbCvs=Ul$6^MfZ%
zwiiMN5)b3q<U+X%7kihzX-V%+1&iFFHP@Sd&0KMLaw0aQ_BV2*%`5A?=2we(DbhYM
z$HOre&k~w3iViz11F8jw4yYa<nOT2?4b<B$h=S9HTke*p&Ojtp)cj#CNqk$3s~wxI
zw*0fb4}{fW#ryl^e_7VWx7)?84f!%1xV`A|1VR@*Lp++;B&TKs`7s;9ILy9V5gluF
ze9${p=KOmmL}RKLTD>V1XHG&tNC|gc3)bPsr)Qomgr+2Y84LR*k4)^IBCxHX47=<t
z;0dk+Z0B!=^lV8J;0wPlfCOH8GPJR*Q?~>;`Q7|%-!&763z3Zg@1-y}Afp#rloGU5
zPTA={75yphX^GUh!Czk1{KoF`8caCYHFcHPQz!^CuA_8~hIQ7lis_!W9ez|BV&0a(
zJ%|g73|yA*{pBd$&Gyw>k#F+BdJ?w-!u-U5x#xpx=alR8V$JkH)Z><pF3~oqeP>!U
z`x)zbXOn4B0i|`tWD{<mKWJQEsj_!4GqA2V)gB(9L3Gd1@bRCrHg~Zj5%uMVzpy+y
z-LHx)=bJ2e#2|A;#?&vww0^JG`{%r3po+#3n|UFZinec&5JlAGAeiFAR>C|J%lfKy
zt|F#c7JZf;kvBG8y9=e(X>pRUrbN0X7e*zoS8NiZoc_-EvLTV9a`unc^4;96c!t@(
zW@>5SBcrUcz4))%+XGt>#9n_fS*wk~u|$f^U`mKmtD%=`B3J?pI_)X@ALU(lP*eF9
z20;;Nt{MTCwhIW-OG1;fSkS<ONDC535E0!F0co}fs0b)c=?WN@5->nOfe;7=R1gIO
zfj}ai0HFs67<%;O?d<HhZkacu&hD%;_n&*`&b@P!^L?lMe&;*A^f*1gSOUz@d4HiV
z?-juM(bz%Ofr~x|x&=b;2CitIO5c29iJ3<9{vo_s@8Z##_ACK!(!yb6%{$n@m{sHo
z8?hQ<ulk;cGN}oz4Ot$nJv3M^G{3x*l*ovFf)TlKg@G7xzD84TlLbOlUR*b~urqb+
zzjJus_=ZF=RUeq%9x7S{oW_Ez^*IbwpiLvc0V?kNMXBOFjg$6bz&XOWV^>-%ro3-=
z?^N@n4M`5Rcu8+C0=Dlv|5gOOb6ouz>c=AIhl2G@`YLz&0It~Q8H(1x2V*PA<90}7
z_Ocf(2~J>)*|m2+;NVlY_+g!1O`m$h4Y9G8P?z9sb$ipvvv~yYw2!lQFV`-leD8#M
zZMd`nulkXJ5V5}2=BrzFGHIGZC-}k?AdN~Vo&Z|x;|xMF*K5VPiq?nycIB*4@<vO7
zy%orvq^kQ#8>YjSxZpE4C+KXdffpbjr4fa7-2HZXx><VH1!k>B<(#A<%k4RLjl!tT
zZ_GTu#q+0UD^o&~^SP~+L%t0IeKEAnkZ=67Z4A`X)#_9SbkguQQDx<0*_qkvv#o2U
z;&eD>zj2GU^|bB5H*GN+RXL*Q^HeiULO)WrRR2joD8*_{xPJw6?jUeLl}fM<K*wBR
zZtkMEBxc?fQ`#0X%L5o4Hdwqh-G5N)xB=Pl98lT)Z1p}$?v$okus(IUib>ztPdC~f
zq%-M}n}!^d$goh~TX7{3H?#4MzdC4a2`}(OX6yW8q<G{NMS2UsgDc%MLET+k`8L6T
zKXc_h4{j)f)_Sr`Ekw`e_+uZ<9gJFHJ+u(LYUv#U?dfnb%@IsyR7yNJ@le88&7-uA
z{-A@HugGn7(oD&RyB@jt(Z~kg-_<sL=MDb<TlkzEJ9D_{djyoHkgs#Q|5o|17vq+;
zF~cLuI14cM08;vavuUX2IVn%BSUFLWJI}4m`YZ)nw#uOztG;;8BNX0=8<rIT?@cdy
zQ$?z%D%-n1{gTsI|F^i!!zK#1h^kqq5)6+$guD(Y-h;b==j10=*#{1m-&Y3bi#mmP
z`9i5BkF$wmQIFPA1^r$;%s0+4x}n|eqhi`XkUHcr(G>>uvQ6&5QFWuoVwlQc>wqKn
z^yT^ns*;Q(%fW9Fqk8ph>?4LVDk|*WOzHW$0HPQu{^X!GI+&Fp-u?1ol*Gylp%||e
zJyRy*$A&V5)3O)2&l?%qT!SLW$fyrtQ|;dMzVw2~-hnP-mso}_i5)2_&1qPn8q4Fw
zj?%kS7OeNlyl`^}3H;r!ya^zz^<+C*CbVvqqaC%UCd?WS9agy^fyf-M4W+C-OfDBt
zQo2yfUg63i<idaW#S?y(*`hy3QXg;B`1>y|yq3OV_=vS@?`(@mMUIeQc1qt}Ecn>0
zvwQqDpueg}Z&IYdX6>kznZ<}41mxRPP0w4m$tt!TAhp#JKwwe{K2^2mVVRH4da?M3
z6cl3X&e92*cN8zf+2Hd94_KufD-=Y4<c5376XR;w36K4Z#aWw{nx-_jt<X2{=Eu$R
zQ3=<ijN(j>57qc4e@FI-*8j#8BidM6bSlGqANYjs-bq^6v2f(X;Nd;5PxT_0*4Xll
zQ*)Y-TLxA_QFX6P=|N1bSfk2U#pyvRCuZl2DqRK~wQ$uWXwrmI&V67mIw<(o>YYEw
zW9Vld8S&}S_Gh&bzFVvdVO(C?2|HS{3l&t(Oyu!h@@N#0^EVs{Y(<SP#>B;$pf77X
zu>88f=mIPWa%8ZHgU+<7+W^NjYHv?AN(eMeOwyPkOjHn4qG=B2Fnd_geEPTL^MlbC
z=Rme+B!rsR4ECw&l@ec&Zm<e*JXs}w#Z_$-H&AohDFgbn^WE!Ls|n#b+sdkwb~mRn
zn;a@Eo&3W&jHVJ<Tx+SC*mieD#{7o6utzj5rWx67Q4MP<-l1I@?pcV71goSqVG2_a
z14YFJ6Hnu>dV0zg?voVGJq2*TzSTfJKHlwA8WKpchJy-_GnA<`Gacmf%0`1Gb==`X
zAT<^^^S}N8%F7__k88`nFhl6)7j68kf8QrROf=MOo*75>ns=>_IFPFlZecFJ5+^7(
zZ<Zsf+WcyPBpG8enuQQr4uVcME+Z<4tsQLtqCr6NNqJZdqbZXtG(k%}Gl^dHxm28p
zo5E}&?Xf8thpz-@Y*nfQYorPCX(#9^-kE9}Etgl{$uAsDKjHT#qoAboQGrb?rxDm2
zyY_VYBHNFcp?+Z}GeB6BFY1ADh}+D>X;xGaGQa}J!mDEeo{*g=gU(uYw+Lj*BBq&t
zzOPRD`Qux_o!>X^di3z(sD-4H1%HIyR=L|*JfoVlXMQ+|)*bdS$s@|ud6^DfT<M8k
z*$fA)T`ahxP|zM{W21Y6rk15#ZP29A0$c+vyQVz2`bTLo-<Oc8`1>jS!>xk41QQL4
zV+9(0x2uki5Ea|hf79gL{<r|<hU?u|04Y!XZSk&+=tLq}oC23FAuU%I0`GNSe{oU^
zlmolb2lJZFO-&=g*H?2|ywmOH)u#_ZUC3K5=^OKDc$bDCnKnF7zI*Bo^>>7eX@<U-
zEGIw`bP1G*k+E5Wa~lTQ$Q|i~o_6r1ZrL*~Q^vND_G1yUAg-8@ZI233rQ<$IuYV~V
z61Crw<&=tp!F%0Yo(CaSTKJnJrA3dJ+?*;h(y5Sy@C%=;$xem^{ZwF|kfB)Sme`#~
z%1cCC@NpACgr5aDx0MLm7uwk9#3W}H%%*jLkqWcXpsbsJ!1<wHrg7<?#rgOk<mMT5
zk!J!H`~h9yS(B}DvhR5uxs@8Loz|2>fWnfu>hont&iY(Mdit>vClNak65L>moTT>4
z#*a79j?Ld6ft(gzJ3B{0`wpGWtht|EDfH64{%+4u!nPG{$Atna6)V^b=L(58*Q#~&
zz?PEVnsO5`-5I0&n;fcMk}Hx%7%gwNau^5KMrpK)EFSKeg0o{e6*)esdZhxfmGU_a
zBXdW-_qXk2+XXk61LivL+ve@VNa*56!I@_`@(e!%*c;1^9E5S%ZTN`t+a$$;Bv=<{
z#W|bd6%hW>BH=FHFSa@GlgalpQo$y}lW`Y&3>}QL<gV+|7C2%ITcdM0alz-7nt|+9
zcGu<LCC_L6`O2*#xWoNGCg;3CclkPAlu>e4$XGN0sB|G^n$ykGAIZmY4a?&(l+{_8
zjdk6Oxa{2&Q(o4k*aMsPiDO;v<qlklNZ}5aX<w_*>@NhYS7GN4z}DL*o8FE`w-<tN
zo!xFvL$@b<e^68*wlZPwY!NvmGK-G}n(ipv({2~dhTAW)C&_Cr9`*HfFLy70hcd`<
z8~st7>{vA6aT=M@KJYq)`D4Jx-Z=D&E_(iR;K}a^wJYMa(=wOj4;Bqg9S36-hPx>3
z`Lu<Ncl11pwa<Op!Hh}@qcoj)-q6*<pAD~SJYpM@Wg@m2?cIQSoh(ToXYNCZ(n6*d
zp-xfk^OGnUvYM+lkQ1Tm#t3#Gfe6?^(!hE;G!+?`9@#9n&cYM~lNgZGviX)wEEEKw
z-Y!C4_70jLduNS1WvT2aTh4wbh|hhzRGxaA?1W(yv_1LIeQ?#BzIYk%Gos{E5W9(2
zLSBw75dVW@BJ7fn($yr@$+hcdq|oY@(ntQIshdmJ@IqZc`<`K-RbAdmcFR`)R!3&0
ziUZ$q`|p;~E=PM?^bk>QTK-y3L<TaC1?z}a;zu+*$9vbT*u-EP=ep#HIdS5K8J%d8
zOE~l)00iFuoAb-xT9Y#GH6BB$fDGPF45v1gXs{&#jOY(7nfB?|RPpzASqr^R9GDw5
zz4zS*1ry$XcZT-YHv0us<^TGH{}43(%-Lf9{`maQeUsSN5UYN~4!@eF*jEwM{rb7T
b1)bPe5)J+KtA9BY`)UG?KY+g2`|ke&B(|Q`


From a4b184d8db284db1c279896fb50ef111bf4c91d2 Mon Sep 17 00:00:00 2001
From: Anish Shrigondekar <anish.shrigondekar@databricks.com>
Date: Wed, 10 Jan 2024 23:18:04 +0900
Subject: [PATCH 171/521] [SPARK-46547][SS] Swallow non-fatal exception in
 maintenance task to avoid deadlock between maintenance thread and streaming
 aggregation operator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Swallow non-fatal exception in maintenance task to avoid deadlock between maintenance thread and streaming aggregation operator

### Why are the changes needed?
This change fixes a race condition that causes a deadlock between the task thread and the maintenance thread. This is primarily only possible with the streaming aggregation operator. In this case, we use 2 physical operators - `StateStoreRestoreExec` and `StateStoreSaveExec`. The first one opens the store in read-only mode and the 2nd one does the actual commit.

However, the following sequence of events creates an issue
1. Task thread runs the `StateStoreRestoreExec` and gets the store instance and thereby the DB instance lock
2. Maintenance thread fails with an error for some reason
3. Maintenance thread takes the `loadedProviders` lock and tries to call `close` on all the loaded providers
4. Task thread tries to execute the StateStoreRDD for the `StateStoreSaveExec` operator and tries to acquire the `loadedProviders` lock which is held by the thread above

So basically if the maintenance thread is interleaved between the `restore/save` operations, there is a deadlock condition based on the `loadedProviders` lock and the DB instance lock.

The fix proposes to simply release the resources at the end of the `StateStoreRestoreExec` operator (note that `abort` for `ReadStateStore` is likely a misnomer - but we choose to follow the already provided API in this case)

Relevant Logs:
Link - https://github.com/anishshri-db/spark/actions/runs/7356847259/job/20027577445?pr=4
```
2023-12-27T09:59:02.6362466Z 09:59:02.635 WARN org.apache.spark.sql.execution.streaming.state.StateStore: Error in maintenanceThreadPool
2023-12-27T09:59:02.6365616Z java.io.FileNotFoundException: File file:/home/runner/work/spark/spark/target/tmp/spark-8ef51f34-b9de-48f2-b8df-07e14599b4c9/state/0/1 does not exist
2023-12-27T09:59:02.6367861Z 	at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:733)
2023-12-27T09:59:02.6369383Z 	at org.apache.hadoop.fs.DelegateToFileSystem.listStatus(DelegateToFileSystem.java:177)
2023-12-27T09:59:02.6370693Z 	at org.apache.hadoop.fs.ChecksumFs.listStatus(ChecksumFs.java:571)
2023-12-27T09:59:02.6371781Z 	at org.apache.hadoop.fs.FileContext$Util$1.next(FileContext.java:1940)
2023-12-27T09:59:02.6372876Z 	at org.apache.hadoop.fs.FileContext$Util$1.next(FileContext.java:1936)
2023-12-27T09:59:02.6373967Z 	at org.apache.hadoop.fs.FSLinkResolver.resolve(FSLinkResolver.java:90)
2023-12-27T09:59:02.6375104Z 	at org.apache.hadoop.fs.FileContext$Util.listStatus(FileContext.java:1942)
2023-12-27T09:59:02.6376676Z 09:59:02.636 WARN org.apache.spark.sql.execution.streaming.state.StateStore: Error running maintenance thread
2023-12-27T09:59:02.6379079Z java.io.FileNotFoundException: File file:/home/runner/work/spark/spark/target/tmp/spark-8ef51f34-b9de-48f2-b8df-07e14599b4c9/state/0/1 does not exist
2023-12-27T09:59:02.6381083Z 	at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:733)
2023-12-27T09:59:02.6382490Z 	at org.apache.hadoop.fs.DelegateToFileSystem.listStatus(DelegateToFileSystem.java:177)
2023-12-27T09:59:02.6383816Z 	at org.apache.hadoop.fs.ChecksumFs.listStatus(ChecksumFs.java:571)
2023-12-27T09:59:02.6384875Z 	at org.apache.hadoop.fs.FileContext$Util$1.next(FileContext.java:1940)
2023-12-27T09:59:02.6386294Z 	at org.apache.hadoop.fs.FileContext$Util$1.next(FileContext.java:1936)
2023-12-27T09:59:02.6387439Z 	at org.apache.hadoop.fs.FSLinkResolver.resolve(FSLinkResolver.java:90)
2023-12-27T09:59:02.6388674Z 	at org.apache.hadoop.fs.FileContext$Util.listStatus(FileContext.java:1942)
...
2023-12-27T10:01:02.4292831Z [0m[[0m[0minfo[0m] [0m[0m[31m- changing schema of state when restarting query - state format version 2 (RocksDBStateStore) *** FAILED *** (2 minutes)[0m[0m
2023-12-27T10:01:02.4295311Z [0m[[0m[0minfo[0m] [0m[0m[31m  Timed out waiting for stream: The code passed to failAfter did not complete within 120 seconds.[0m[0m
2023-12-27T10:01:02.4297271Z [0m[[0m[0minfo[0m] [0m[0m[31m  java.base/java.lang.Thread.getStackTrace(Thread.java:1619)[0m[0m
2023-12-27T10:01:02.4299084Z [0m[[0m[0minfo[0m] [0m[0m[31m  	org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:277)[0m[0m
2023-12-27T10:01:02.4300948Z [0m[[0m[0minfo[0m] [0m[0m[31m  	org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)[0m[0m
...
2023-12-27T10:01:02.6474472Z 10:01:02.646 WARN org.apache.spark.sql.execution.streaming.state.RocksDB StateStoreId(opId=0,partId=0,name=default): Error closing RocksDB
2023-12-27T10:01:02.6482792Z org.apache.spark.SparkException: [CANNOT_LOAD_STATE_STORE.UNRELEASED_THREAD_ERROR] An error occurred during loading state. StateStoreId(opId=0,partId=0,name=default): RocksDB instance could not be acquired by [ThreadId: Some(1858)] as it was not released by [ThreadId: Some(3835), task: partition 0.0 in stage 513.0, TID 1369] after 120009 ms.
2023-12-27T10:01:02.6488483Z Thread holding the lock has trace: app//org.apache.spark.sql.execution.streaming.state.StateStore$.getStateStoreProvider(StateStore.scala:577)
2023-12-27T10:01:02.6490896Z app//org.apache.spark.sql.execution.streaming.state.StateStore$.get(StateStore.scala:565)
2023-12-27T10:01:02.6493072Z app//org.apache.spark.sql.execution.streaming.state.StateStoreRDD.compute(StateStoreRDD.scala:128)
2023-12-27T10:01:02.6494915Z app//org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
2023-12-27T10:01:02.6496232Z app//org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
2023-12-27T10:01:02.6497655Z app//org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
2023-12-27T10:01:02.6499153Z app//org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
2023-12-27T10:01:02.6556758Z 10:01:02.654 WARN org.apache.spark.scheduler.TaskSetManager: Lost task 0.0 in stage 513.0 (TID 1369) (localhost executor driver): TaskKilled (Stage cancelled: [SPARK_JOB_CANCELLED] Job 260 cancelled part of cancelled job group cf26288c-0158-48ce-8a86-00a596dd45d8 SQLSTATE: XXKDA)
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existing unit tests
```
[info] Run completed in 6 minutes, 20 seconds.
[info] Total number of tests run: 80
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 80, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
```

### Was this patch authored or co-authored using generative AI tooling?
Yes

Closes #44542 from anishshri-db/task/SPARK-46547.

Authored-by: Anish Shrigondekar <anish.shrigondekar@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit f7b0b453791707b904ed0fa5508aa4b648d56bba)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/state/RocksDBStateStoreProvider.scala  | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index 10f207c7ec1fe..a19eb00a7b5ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
 
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkConf, SparkEnv}
@@ -202,7 +204,15 @@ private[sql] class RocksDBStateStoreProvider
   }
 
   override def doMaintenance(): Unit = {
-    rocksDB.doMaintenance()
+    try {
+      rocksDB.doMaintenance()
+    } catch {
+      // SPARK-46547 - Swallow non-fatal exception in maintenance task to avoid deadlock between
+      // maintenance thread and streaming aggregation operator
+      case NonFatal(ex) =>
+        logWarning(s"Ignoring error while performing maintenance operations with exception=",
+          ex)
+    }
   }
 
   override def close(): Unit = {

From 8a0f64274f44dd17a3e1f034c9f1f20a61ff0549 Mon Sep 17 00:00:00 2001
From: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Date: Fri, 12 Jan 2024 10:20:49 +0800
Subject: [PATCH 172/521] [SPARK-46640][SQL] Fix RemoveRedundantAlias by
 excluding subquery attributes

- In `RemoveRedundantAliases`, we have an `excluded` AttributeSet argument denoting the references for which we should not remove aliases. For a query with subquery expressions, adding the attributes references by the subquery in the `excluded` set prevents rewrites that might remove presumedly redundant aliases. (Changes in RemoveRedundantAlias)
- Added a configuration flag to disable this fix, if not needed.
- Added a unit test with Filter exists subquery expression to show how the alias would have been removed.

- `RemoveRedundantAliases` does not take into account the outer attributes of a `SubqueryExpression` when considering redundant aliases, potentially removing them if it thinks they are redundant.
- This can cause scenarios where a subquery expression has conditions like `a#x = a#x` i.e. both the attribute names and the expression ID(s) are the same. This can then lead to conflicting expression ID(s) error.
- For example, in the query example below, the `RemoveRedundantAliases` would remove the alias `a#0 as a#1` and replace `a#1` with `a#0` in the Filter exists subquery expression which would create an issue if the subquery expression had an attribute with reference `a#0` (possible due to different scan relation instances possibly having the same attribute ID(s) (Ref: #40662)
```
Filter exists [a#1 && (a#1 = b#2)]
:  +- LocalRelation <empty>, [b#2]
  +- Project [a#0 AS a#1]
  +- LocalRelation <empty>, [a#0]
```
becomes
```
Filter exists [a#0 && (a#0 = b#2)]
:  +- LocalRelation <empty>, [b#2]
  +- LocalRelation <empty>, [a#0]
```
- The changes are needed to fix this bug.

No

- Added a unit test with Filter exists subquery expression to show how the alias would have been removed.

No

Closes #44645 from nikhilsheoran-db/SPARK-46640.

Authored-by: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit bbeb8d7417bafa09ad5202347175a47b3217e27f)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    | 12 ++++-
 .../apache/spark/sql/internal/SQLConf.scala   |  9 ++++
 .../RemoveRedundantAliasAndProjectSuite.scala | 48 +++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index ec5f00d34cd8c..df17840d567e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -576,10 +576,20 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
         }
 
       case _ =>
+        val subQueryAttributes = if (conf.getConf(SQLConf
+          .EXCLUDE_SUBQUERY_EXP_REFS_FROM_REMOVE_REDUNDANT_ALIASES)) {
+          // Collect the references for all the subquery expressions in the plan.
+          AttributeSet.fromAttributeSets(plan.expressions.collect {
+            case e: SubqueryExpression => e.references
+          })
+        } else {
+          AttributeSet.empty
+        }
+
         // Remove redundant aliases in the subtree(s).
         val currentNextAttrPairs = mutable.Buffer.empty[(Attribute, Attribute)]
         val newNode = plan.mapChildren { child =>
-          val newChild = removeRedundantAliases(child, excluded)
+          val newChild = removeRedundantAliases(child, excluded ++ subQueryAttributes)
           currentNextAttrPairs ++= createAttributeMapping(child, newChild)
           newChild
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index de4a89667aff6..2e41374035c8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4352,6 +4352,15 @@ object SQLConf {
       .checkValue(_ >= 0, "The threshold of cached local relations must not be negative")
       .createWithDefault(64 * 1024 * 1024)
 
+  val EXCLUDE_SUBQUERY_EXP_REFS_FROM_REMOVE_REDUNDANT_ALIASES =
+    buildConf("spark.sql.optimizer.excludeSubqueryRefsFromRemoveRedundantAliases.enabled")
+      .internal()
+      .doc("When true, exclude the references from the subquery expressions (in, exists, etc.) " +
+        s"while removing redundant aliases.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val LEGACY_PERCENTILE_DISC_CALCULATION = buildConf("spark.sql.legacy.percentileDiscCalculation")
     .internal()
     .doc("If true, the old bogus percentile_disc calculation is used. The old calculation " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index cd19e5062ae1f..8a0a0466ca741 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.MetadataBuilder
 
 class RemoveRedundantAliasAndProjectSuite extends PlanTest {
@@ -130,4 +131,51 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest {
       correlated = false)
     comparePlans(optimized, expected)
   }
+
+  test("SPARK-46640: do not remove outer references from a subquery expression") {
+    val a = $"a".int
+    val a_alias = Alias(a, "a")()
+    val a_alias_attr = a_alias.toAttribute
+    val b = $"b".int
+
+    // The original input query
+    //  Filter exists [a#1 && (a#1 = b#2)]
+    //  :  +- LocalRelation <empty>, [b#2]
+    //    +- Project [a#0 AS a#1]
+    //    +- LocalRelation <empty>, [a#0]
+    val query = Filter(
+      Exists(
+        LocalRelation(b),
+        outerAttrs = Seq(a_alias_attr),
+        joinCond = Seq(EqualTo(a_alias_attr, b))
+      ),
+      Project(Seq(a_alias), LocalRelation(a))
+    )
+
+    // The alias would not be removed if excluding subquery references is enabled.
+    val expectedWhenExcluded = query
+
+    // The alias would have been removed if excluding subquery references is disabled.
+    //  Filter exists [a#0 && (a#0 = b#2)]
+    //  :  +- LocalRelation <empty>, [b#2]
+    //    +- LocalRelation <empty>, [a#0]
+    val expectedWhenNotExcluded = Filter(
+      Exists(
+        LocalRelation(b),
+        outerAttrs = Seq(a),
+        joinCond = Seq(EqualTo(a, b))
+      ),
+      LocalRelation(a)
+    )
+
+    withSQLConf(SQLConf.EXCLUDE_SUBQUERY_EXP_REFS_FROM_REMOVE_REDUNDANT_ALIASES.key -> "true") {
+      val optimized = Optimize.execute(query)
+      comparePlans(optimized, expectedWhenExcluded)
+    }
+
+    withSQLConf(SQLConf.EXCLUDE_SUBQUERY_EXP_REFS_FROM_REMOVE_REDUNDANT_ALIASES.key -> "false") {
+      val optimized = Optimize.execute(query)
+      comparePlans(optimized, expectedWhenNotExcluded)
+    }
+  }
 }

From d422aed4a2e82d671a592b096919015bddeb751f Mon Sep 17 00:00:00 2001
From: Takuya UESHIN <ueshin@databricks.com>
Date: Fri, 12 Jan 2024 13:19:12 +0900
Subject: [PATCH 173/521] [SPARK-46684][PYTHON][CONNECT][3.5] Fix
 CoGroup.applyInPandas/Arrow to pass arguments properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This is a backport of apache/spark#44695.

Fix `CoGroup.applyInPandas/Arrow` to pass arguments properly.

### Why are the changes needed?

In Spark Connect, `CoGroup.applyInPandas/Arrow` doesn't take arguments properly, so the arguments of the UDF can be broken:

```py
>>> import pandas as pd
>>>
>>> df1 = spark.createDataFrame(
...     [(1, 1.0, "a"), (2, 2.0, "b"), (1, 3.0, "c"), (2, 4.0, "d")], ("id", "v1", "v2")
... )
>>> df2 = spark.createDataFrame([(1, "x"), (2, "y"), (1, "z")], ("id", "v3"))
>>>
>>> def summarize(left, right):
...     return pd.DataFrame(
...         {
...             "left_rows": [len(left)],
...             "left_columns": [len(left.columns)],
...             "right_rows": [len(right)],
...             "right_columns": [len(right.columns)],
...         }
...     )
...
>>> df = (
...     df1.groupby("id")
...     .cogroup(df2.groupby("id"))
...     .applyInPandas(
...         summarize,
...         schema="left_rows long, left_columns long, right_rows long, right_columns long",
...     )
... )
>>>
>>> df.show()
+---------+------------+----------+-------------+
|left_rows|left_columns|right_rows|right_columns|
+---------+------------+----------+-------------+
|        2|           1|         2|            1|
|        2|           1|         1|            1|
+---------+------------+----------+-------------+
```

The result should be:

```py
+---------+------------+----------+-------------+
|left_rows|left_columns|right_rows|right_columns|
+---------+------------+----------+-------------+
|        2|           3|         2|            2|
|        2|           3|         1|            2|
+---------+------------+----------+-------------+
```

### Does this PR introduce _any_ user-facing change?

This is a bug fix.

### How was this patch tested?

Added the related tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44696 from ueshin/issues/SPARK-46684/3.5/cogroup.

Authored-by: Takuya UESHIN <ueshin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../connect/planner/SparkConnectPlanner.scala | 32 ++++++++---------
 python/pyspark/sql/dataframe.py               |  8 +++--
 .../tests/pandas/test_pandas_cogrouped_map.py | 35 +++++++++++++++++++
 3 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 50a55f5e6411d..709e0811e5de2 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -674,8 +674,6 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
         transformTypedCoGroupMap(rel, commonUdf)
 
       case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
-        val pythonUdf = transformPythonUDF(commonUdf)
-
         val inputCols =
           rel.getInputGroupingExpressionsList.asScala.toSeq.map(expr =>
             Column(transformExpression(expr)))
@@ -690,6 +688,10 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
           .ofRows(session, transformRelation(rel.getOther))
           .groupBy(otherCols: _*)
 
+        val pythonUdf = createUserDefinedPythonFunction(commonUdf)
+          .builder(input.df.logicalPlan.output ++ other.df.logicalPlan.output)
+          .asInstanceOf[PythonUDF]
+
         input.flatMapCoGroupsInPandas(other, pythonUdf).logicalPlan
 
       case _ =>
@@ -1587,17 +1589,23 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
 
   private def transformPythonFuncExpression(
       fun: proto.CommonInlineUserDefinedFunction): Expression = {
+    createUserDefinedPythonFunction(fun)
+      .builder(fun.getArgumentsList.asScala.map(transformExpression).toSeq) match {
+      case udaf: PythonUDAF => udaf.toAggregateExpression()
+      case other => other
+    }
+  }
+
+  private def createUserDefinedPythonFunction(
+      fun: proto.CommonInlineUserDefinedFunction): UserDefinedPythonFunction = {
     val udf = fun.getPythonUdf
+    val function = transformPythonFunction(udf)
     UserDefinedPythonFunction(
       name = fun.getFunctionName,
-      func = transformPythonFunction(udf),
+      func = function,
       dataType = transformDataType(udf.getOutputType),
       pythonEvalType = udf.getEvalType,
       udfDeterministic = fun.getDeterministic)
-      .builder(fun.getArgumentsList.asScala.map(transformExpression).toSeq) match {
-      case udaf: PythonUDAF => udaf.toAggregateExpression()
-      case other => other
-    }
   }
 
   private def transformPythonFunction(fun: proto.PythonUDF): SimplePythonFunction = {
@@ -2584,15 +2592,7 @@ class SparkConnectPlanner(val sessionHolder: SessionHolder) extends Logging {
   }
 
   private def handleRegisterPythonUDF(fun: proto.CommonInlineUserDefinedFunction): Unit = {
-    val udf = fun.getPythonUdf
-    val function = transformPythonFunction(udf)
-    val udpf = UserDefinedPythonFunction(
-      name = fun.getFunctionName,
-      func = function,
-      dataType = transformDataType(udf.getOutputType),
-      pythonEvalType = udf.getEvalType,
-      udfDeterministic = fun.getDeterministic)
-
+    val udpf = createUserDefinedPythonFunction(fun)
     session.udf.registerPython(fun.getFunctionName, udpf)
   }
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 5707ae2a31fec..7c382ab1c5a54 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -942,7 +942,11 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
         age  | 16
         name | Bob
         """
+        print(self._show_string(n, truncate, vertical))
 
+    def _show_string(
+        self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False
+    ) -> str:
         if not isinstance(n, int) or isinstance(n, bool):
             raise PySparkTypeError(
                 error_class="NOT_INT",
@@ -956,7 +960,7 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
             )
 
         if isinstance(truncate, bool) and truncate:
-            print(self._jdf.showString(n, 20, vertical))
+            return self._jdf.showString(n, 20, vertical)
         else:
             try:
                 int_truncate = int(truncate)
@@ -969,7 +973,7 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
                     },
                 )
 
-            print(self._jdf.showString(n, int_truncate, vertical))
+            return self._jdf.showString(n, int_truncate, vertical)
 
     def __repr__(self) -> str:
         if not self._support_repr_html and self.sparkSession._jconf.isReplEagerEvalEnabled():
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index b867156e71a5d..c3cd0f37b1038 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -445,6 +445,41 @@ def cogroup(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
         actual = df.orderBy("id", "day").take(days)
         self.assertEqual(actual, [Row(0, day, vals, vals) for day in range(days)])
 
+    def test_with_local_data(self):
+        df1 = self.spark.createDataFrame(
+            [(1, 1.0, "a"), (2, 2.0, "b"), (1, 3.0, "c"), (2, 4.0, "d")], ("id", "v1", "v2")
+        )
+        df2 = self.spark.createDataFrame([(1, "x"), (2, "y"), (1, "z")], ("id", "v3"))
+
+        def summarize(left, right):
+            return pd.DataFrame(
+                {
+                    "left_rows": [len(left)],
+                    "left_columns": [len(left.columns)],
+                    "right_rows": [len(right)],
+                    "right_columns": [len(right.columns)],
+                }
+            )
+
+        df = (
+            df1.groupby("id")
+            .cogroup(df2.groupby("id"))
+            .applyInPandas(
+                summarize,
+                schema="left_rows long, left_columns long, right_rows long, right_columns long",
+            )
+        )
+
+        self.assertEqual(
+            df._show_string(),
+            "+---------+------------+----------+-------------+\n"
+            "|left_rows|left_columns|right_rows|right_columns|\n"
+            "+---------+------------+----------+-------------+\n"
+            "|        2|           3|         2|            2|\n"
+            "|        2|           3|         1|            2|\n"
+            "+---------+------------+----------+-------------+\n",
+        )
+
     @staticmethod
     def _test_with_key(left, right, isLeft):
         def right_assign_key(key, lft, rgt):

From 2fe253e2f137dc901fd81a79c65019a8ea2312ed Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 12 Jan 2024 12:54:29 -0800
Subject: [PATCH 174/521] [SPARK-46704][CORE][UI] Fix `MasterPage` to sort
 `Running Drivers` table by `Duration` column correctly

### What changes were proposed in this pull request?

This PR aims to fix `MasterPage` to sort `Running Drivers` table by `Duration` column correctly.

### Why are the changes needed?

Since Apache Spark 3.0.0, `MasterPage` shows `Duration` column of `Running Drivers`.

**BEFORE**
<img width="111" src="https://github.com/apache/spark/assets/9700541/50276e34-01be-4474-803d-79066e06cb2c">

**AFTER**
<img width="111" src="https://github.com/apache/spark/assets/9700541/a427b2e6-eab0-4d73-9114-1d8ff9d052c2">

### Does this PR introduce _any_ user-facing change?

Yes, this is a bug fix of UI.

### How was this patch tested?

Manual.

Run a Spark standalone cluster.
```
$ SPARK_MASTER_OPTS="-Dspark.master.rest.enabled=true -Dspark.deploy.maxDrivers=2" sbin/start-master.sh
$ sbin/start-worker.sh spark://$(hostname):7077
```

Submit multiple jobs via REST API.
```
$ curl -s -k -XPOST http://localhost:6066/v1/submissions/create \
    --header "Content-Type:application/json;charset=UTF-8" \
    --data '{
      "appResource": "",
      "sparkProperties": {
        "spark.master": "spark://localhost:7077",
        "spark.app.name": "Test 1",
        "spark.submit.deployMode": "cluster",
        "spark.jars": "/Users/dongjoon/APACHE/spark-merge/examples/target/scala-2.13/jars/spark-examples_2.13-4.0.0-SNAPSHOT.jar"
      },
      "clientSparkVersion": "",
      "mainClass": "org.apache.spark.examples.SparkPi",
      "environmentVariables": {},
      "action": "CreateSubmissionRequest",
      "appArgs": [ "10000" ]
    }'
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44711 from dongjoon-hyun/SPARK-46704.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 25c680cfd4dc63aeb9d16a673ee431c57188b80d)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/deploy/master/ui/MasterPage.scala  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index e7e90aa0a37da..d8753a0ffcb59 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -354,7 +354,9 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       <td>{formatResourcesAddresses(driver.resources)}</td>
       <td>{driver.desc.command.arguments(2)}</td>
       {if (showDuration) {
-        <td>{UIUtils.formatDuration(System.currentTimeMillis() - driver.startTime)}</td>
+        <td sorttable_customkey={(-driver.startTime).toString}>
+          {UIUtils.formatDuration(System.currentTimeMillis() - driver.startTime)}
+        </td>
       }}
     </tr>
   }

From 679e4b6593a1c3aa439d9c8bb6237d203fba0e58 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 12 Jan 2024 13:49:24 -0800
Subject: [PATCH 175/521] [SPARK-46700][CORE] Count the last spilling for the
 shuffle disk spilling bytes metric

### What changes were proposed in this pull request?

This PR fixes a long-standing bug in ShuffleExternalSorter about the "spilled disk bytes" metrics. When we close the sorter, we will spill the remaining data in the buffer, with a flag `isLastFile = true`. This flag means the spilling will not increase the "spilled disk bytes" metrics. This makes sense if the sorter has never spilled before, then the final spill file will be used as the final shuffle output file, and we should keep the "spilled disk bytes" metrics as 0. However, if spilling did happen before, then we simply miscount the final spill file for the "spilled disk bytes" metrics today.

This PR fixes this issue, by setting that flag when closing the sorter only if this is the first spilling.

### Why are the changes needed?

make metrics accurate

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

updated tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44709 from cloud-fan/shuffle.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 4ea374257c1fdb276abcd6b953ba042593e4d5a3)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../shuffle/sort/ShuffleExternalSorter.java   | 34 +++++++++++--------
 .../shuffle/sort/UnsafeShuffleWriter.java     |  6 ----
 .../sort/UnsafeShuffleWriterSuite.java        | 20 +++++++----
 3 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
index a82f691d085d4..b097089282ce3 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/ShuffleExternalSorter.java
@@ -150,11 +150,21 @@ public long[] getChecksums() {
    * Sorts the in-memory records and writes the sorted records to an on-disk file.
    * This method does not free the sort data structures.
    *
-   * @param isLastFile if true, this indicates that we're writing the final output file and that the
-   *                   bytes written should be counted towards shuffle spill metrics rather than
-   *                   shuffle write metrics.
+   * @param isFinalFile if true, this indicates that we're writing the final output file and that
+   *                    the bytes written should be counted towards shuffle write metrics rather
+   *                    than shuffle spill metrics.
    */
-  private void writeSortedFile(boolean isLastFile) {
+  private void writeSortedFile(boolean isFinalFile) {
+    // Only emit the log if this is an actual spilling.
+    if (!isFinalFile) {
+      logger.info(
+        "Task {} on Thread {} spilling sort data of {} to disk ({} {} so far)",
+        taskContext.taskAttemptId(),
+        Thread.currentThread().getId(),
+        Utils.bytesToString(getMemoryUsage()),
+        spills.size(),
+        spills.size() != 1 ? " times" : " time");
+    }
 
     // This call performs the actual sort.
     final ShuffleInMemorySorter.ShuffleSorterIterator sortedRecords =
@@ -167,13 +177,14 @@ private void writeSortedFile(boolean isLastFile) {
 
     final ShuffleWriteMetricsReporter writeMetricsToUse;
 
-    if (isLastFile) {
+    if (isFinalFile) {
       // We're writing the final non-spill file, so we _do_ want to count this as shuffle bytes.
       writeMetricsToUse = writeMetrics;
     } else {
       // We're spilling, so bytes written should be counted towards spill rather than write.
       // Create a dummy WriteMetrics object to absorb these metrics, since we don't want to count
       // them towards shuffle bytes written.
+      // The actual shuffle bytes written will be counted when we merge the spill files.
       writeMetricsToUse = new ShuffleWriteMetrics();
     }
 
@@ -246,7 +257,7 @@ private void writeSortedFile(boolean isLastFile) {
       spills.add(spillInfo);
     }
 
-    if (!isLastFile) {  // i.e. this is a spill file
+    if (!isFinalFile) {  // i.e. this is a spill file
       // The current semantics of `shuffleRecordsWritten` seem to be that it's updated when records
       // are written to disk, not when they enter the shuffle sorting code. DiskBlockObjectWriter
       // relies on its `recordWritten()` method being called in order to trigger periodic updates to
@@ -281,12 +292,6 @@ public long spill(long size, MemoryConsumer trigger) throws IOException {
       return 0L;
     }
 
-    logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
-      Thread.currentThread().getId(),
-      Utils.bytesToString(getMemoryUsage()),
-      spills.size(),
-      spills.size() > 1 ? " times" : " time");
-
     writeSortedFile(false);
     final long spillSize = freeMemory();
     inMemSorter.reset();
@@ -440,8 +445,9 @@ public void insertRecord(Object recordBase, long recordOffset, int length, int p
    */
   public SpillInfo[] closeAndGetSpills() throws IOException {
     if (inMemSorter != null) {
-      // Do not count the final file towards the spill count.
-      writeSortedFile(true);
+      // Here we are spilling the remaining data in the buffer. If there is no spill before, this
+      // final spill file will be the final shuffle output file.
+      writeSortedFile(/* isFinalFile = */spills.isEmpty());
       freeMemory();
       inMemSorter.free();
       inMemSorter = null;
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 9c54184105951..d5b4eb138b1a6 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -327,12 +327,6 @@ private long[] mergeSpillsUsingStandardWriter(SpillInfo[] spills) throws IOExcep
         logger.debug("Using slow merge");
         mergeSpillsWithFileStream(spills, mapWriter, compressionCodec);
       }
-      // When closing an UnsafeShuffleExternalSorter that has already spilled once but also has
-      // in-memory records, we write out the in-memory records to a file but do not count that
-      // final write as bytes spilled (instead, it's accounted as shuffle write). The merge needs
-      // to be counted as shuffle write, but this will lead to double-counting of the final
-      // SpillInfo's bytes.
-      writeMetrics.decBytesWritten(spills[spills.length - 1].file.length());
       partitionLengths = mapWriter.commitAllPartitions(sorter.getChecksums()).getPartitionLengths();
     } catch (Exception e) {
       try {
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index d3aa93549a83a..1fa17b908699f 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -69,6 +69,7 @@ public class UnsafeShuffleWriterSuite implements ShuffleChecksumTestHelper {
   File tempDir;
   long[] partitionSizesInMergedFile;
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
+  long totalSpilledDiskBytes = 0;
   SparkConf conf;
   final Serializer serializer =
     new KryoSerializer(new SparkConf().set("spark.kryo.unsafe", "false"));
@@ -96,6 +97,7 @@ public void setUp() throws Exception {
     mergedOutputFile = File.createTempFile("mergedoutput", "", tempDir);
     partitionSizesInMergedFile = null;
     spillFilesCreated.clear();
+    totalSpilledDiskBytes = 0;
     conf = new SparkConf()
       .set(package$.MODULE$.BUFFER_PAGESIZE().key(), "1m")
       .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)
@@ -160,7 +162,11 @@ public void setUp() throws Exception {
 
     when(diskBlockManager.createTempShuffleBlock()).thenAnswer(invocationOnMock -> {
       TempShuffleBlockId blockId = new TempShuffleBlockId(UUID.randomUUID());
-      File file = File.createTempFile("spillFile", ".spill", tempDir);
+      File file = spy(File.createTempFile("spillFile", ".spill", tempDir));
+      when(file.delete()).thenAnswer(inv -> {
+        totalSpilledDiskBytes += file.length();
+        return inv.callRealMethod();
+      });
       spillFilesCreated.add(file);
       return Tuple2$.MODULE$.apply(blockId, file);
     });
@@ -284,6 +290,9 @@ public void writeWithoutSpilling() throws Exception {
     final Option<MapStatus> mapStatus = writer.stop(true);
     assertTrue(mapStatus.isDefined());
     assertTrue(mergedOutputFile.exists());
+    // Even if there is no spill, the sorter still writes its data to a spill file at the end,
+    // which will become the final shuffle file.
+    assertEquals(1, spillFilesCreated.size());
 
     long sumOfPartitionSizes = 0;
     for (long size: partitionSizesInMergedFile) {
@@ -425,9 +434,8 @@ private void testMergingSpills(
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
-    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
     assertTrue(taskMetrics.memoryBytesSpilled() > 0L);
+    assertEquals(totalSpilledDiskBytes, taskMetrics.diskBytesSpilled());
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 
@@ -517,9 +525,8 @@ public void writeEnoughDataToTriggerSpill() throws Exception {
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
-    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
     assertTrue(taskMetrics.memoryBytesSpilled()> 0L);
+    assertEquals(totalSpilledDiskBytes, taskMetrics.diskBytesSpilled());
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 
@@ -550,9 +557,8 @@ private void writeEnoughRecordsToTriggerSortBufferExpansionAndSpill() throws Exc
     assertSpillFilesWereCleanedUp();
     ShuffleWriteMetrics shuffleWriteMetrics = taskMetrics.shuffleWriteMetrics();
     assertEquals(dataToWrite.size(), shuffleWriteMetrics.recordsWritten());
-    assertTrue(taskMetrics.diskBytesSpilled() > 0L);
-    assertTrue(taskMetrics.diskBytesSpilled() < mergedOutputFile.length());
     assertTrue(taskMetrics.memoryBytesSpilled()> 0L);
+    assertEquals(totalSpilledDiskBytes, taskMetrics.diskBytesSpilled());
     assertEquals(mergedOutputFile.length(), shuffleWriteMetrics.bytesWritten());
   }
 

From 10d5d8956b4cc22f17b1752dc91766398e1540ee Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Tue, 16 Jan 2024 14:16:03 +0800
Subject: [PATCH 176/521] [SPARK-46715][INFRA][3.5] Pin `sphinxcontrib-*`

### What changes were proposed in this pull request?
backport https://github.com/apache/spark/pull/44727 to branch-3.5

### Why are the changes needed?
to restore doc build

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #44744 from zhengruifeng/infra_pin_shinxcontrib.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 32f6a44102bf9..b0b72a0d1a689 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -678,7 +678,7 @@ jobs:
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
+        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421

From 5a0bc96d5f5e42fa5e6ea9d024da572343f239a9 Mon Sep 17 00:00:00 2001
From: xieshuaihu <xieshuaihu@agora.io>
Date: Wed, 17 Jan 2024 15:24:58 +0900
Subject: [PATCH 177/521] [SPARK-46732][CONNECT][3.5] Make Subquery/Broadcast
 thread work with Connect's artifact management

### What changes were proposed in this pull request?

Similar with SPARK-44794, propagate JobArtifactState to broadcast/subquery thread.

This is an example:

```scala
val add1 = udf((i: Long) => i + 1)
val tableA = spark.range(2).alias("a")
val tableB = broadcast(spark.range(2).select(add1(col("id")).alias("id"))).alias("b")
tableA.join(tableB).
  where(col("a.id")===col("b.id")).
  select(col("a.id").alias("a_id"), col("b.id").alias("b_id")).
  collect().
  mkString("[", ", ", "]")
```

Before this pr, this example will throw exception `ClassNotFoundException`. Subquery and Broadcast execution use a separate ThreadPool which don't have the `JobArtifactState`.

### Why are the changes needed?
Fix bug. Make Subquery/Broadcast thread work with Connect's artifact management.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add a new test to `ReplE2ESuite`

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44763 from xieshuaihu/SPARK-46732backport.

Authored-by: xieshuaihu <xieshuaihu@agora.io>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../spark/sql/application/ReplE2ESuite.scala     | 16 ++++++++++++++++
 .../spark/sql/execution/SQLExecution.scala       |  5 +++--
 2 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
index 5bb8cbf3543b0..9d61b4d56e1ed 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
@@ -362,4 +362,20 @@ class ReplE2ESuite extends RemoteSparkSession with BeforeAndAfterEach {
     val output = runCommandsInShell(input)
     assertContains("noException: Boolean = true", output)
   }
+
+  test("broadcast works with REPL generated code") {
+    val input =
+      """
+        |val add1 = udf((i: Long) => i + 1)
+        |val tableA = spark.range(2).alias("a")
+        |val tableB = broadcast(spark.range(2).select(add1(col("id")).alias("id"))).alias("b")
+        |tableA.join(tableB).
+        |  where(col("a.id")===col("b.id")).
+        |  select(col("a.id").alias("a_id"), col("b.id").alias("b_id")).
+        |  collect().
+        |  mkString("[", ", ", "]")
+        |""".stripMargin
+    val output = runCommandsInShell(input)
+    assertContains("""String = "[[1,1]]"""", output)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index daeac699c2791..b4cbb61352235 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFuture}
 import java.util.concurrent.atomic.AtomicLong
 
-import org.apache.spark.{ErrorMessageFormat, SparkContext, SparkThrowable, SparkThrowableHelper}
+import org.apache.spark.{ErrorMessageFormat, JobArtifactSet, SparkContext, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.internal.config.{SPARK_DRIVER_PREFIX, SPARK_EXECUTOR_PREFIX}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
@@ -215,7 +215,8 @@ object SQLExecution {
     val activeSession = sparkSession
     val sc = sparkSession.sparkContext
     val localProps = Utils.cloneProperties(sc.getLocalProperties)
-    exec.submit(() => {
+    val artifactState = JobArtifactSet.getCurrentJobArtifactState.orNull
+    exec.submit(() => JobArtifactSet.withActiveJobArtifactState(artifactState) {
       val originalSession = SparkSession.getActiveSession
       val originalLocalProps = sc.getLocalProperties
       SparkSession.setActiveSession(activeSession)

From d083da76b4d6b4f1351f2b4597840e2cc1a8683a Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong@apache.org>
Date: Thu, 18 Jan 2024 09:01:05 +0900
Subject: [PATCH 178/521] [SPARK-46663][PYTHON][3.5] Disable memory profiler
 for pandas UDFs with iterators

### What changes were proposed in this pull request?
When using pandas UDFs with iterators, if users enable the profiling spark conf, a warning indicating non-support should be raised, and profiling should be disabled.

However, currently, after raising the not-supported warning, the memory profiler is still being enabled.

The PR proposed to fix that.

### Why are the changes needed?
A bug fix to eliminate misleading behavior.

### Does this PR introduce _any_ user-facing change?
The noticeable changes will affect only those using the PySpark shell. This is because, in the PySpark shell, the memory profiler will raise an error, which in turn blocks the execution of the UDF.

### How was this patch tested?
Manual test.

### Was this patch authored or co-authored using generative AI tooling?
Setup:
```py
$ ./bin/pyspark --conf spark.python.profile=true

>>> from typing import Iterator
>>> from pyspark.sql.functions import *
>>> import pandas as pd
>>> pandas_udf("long")
... def plus_one(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
...     for s in iterator:
...         yield s + 1
...
>>> df = spark.createDataFrame(pd.DataFrame([1, 2, 3], columns=["v"]))
```

Before:
```
>>> df.select(plus_one(df.v)).show()
UserWarning: Profiling UDFs with iterators input/output is not supported.
Traceback (most recent call last):
...
OSError: could not get source code
```

After:
```
>>> df.select(plus_one(df.v)).show()
/Users/xinrong.meng/spark/python/pyspark/sql/udf.py:417: UserWarning: Profiling UDFs with iterators input/output is not supported.
+-----------+
|plus_one(v)|
+-----------+
|          2|
|          3|
|          4|
+-----------+
```

Closes #44760 from xinrong-meng/PR_TOOL_PICK_PR_44668_BRANCH-3.5.

Authored-by: Xinrong Meng <xinrong@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_udf_profiler.py | 53 ++++++++++++++++++-
 python/pyspark/sql/udf.py                     | 32 ++++++-----
 2 files changed, 67 insertions(+), 18 deletions(-)

diff --git a/python/pyspark/sql/tests/test_udf_profiler.py b/python/pyspark/sql/tests/test_udf_profiler.py
index 136f423d0a35c..019e502ec67cf 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -19,11 +19,19 @@
 import unittest
 import os
 import sys
+import warnings
 from io import StringIO
+from typing import Iterator
 
 from pyspark import SparkConf
 from pyspark.sql import SparkSession
-from pyspark.sql.functions import udf
+from pyspark.sql.functions import udf, pandas_udf
+from pyspark.testing.sqlutils import (
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
 from pyspark.profiler import UDFBasicProfiler
 
 
@@ -101,6 +109,49 @@ def add2(x):
         df = self.spark.range(10)
         df.select(add1("id"), add2("id"), add1("id")).collect()
 
+    # Unsupported
+    def exec_pandas_udf_iter_to_iter(self):
+        import pandas as pd
+
+        @pandas_udf("int")
+        def iter_to_iter(batch_ser: Iterator[pd.Series]) -> Iterator[pd.Series]:
+            for ser in batch_ser:
+                yield ser + 1
+
+        self.spark.range(10).select(iter_to_iter("id")).collect()
+
+    # Unsupported
+    def exec_map(self):
+        import pandas as pd
+
+        def map(pdfs: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
+            for pdf in pdfs:
+                yield pdf[pdf.id == 1]
+
+        df = self.spark.createDataFrame([(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0)], ("id", "v"))
+        df.mapInPandas(map, schema=df.schema).collect()
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
+    @unittest.skipIf(not have_pyarrow, pyarrow_requirement_message)  # type: ignore
+    def test_unsupported(self):
+        with warnings.catch_warnings(record=True) as warns:
+            warnings.simplefilter("always")
+            self.exec_pandas_udf_iter_to_iter()
+            user_warns = [warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+            self.assertTrue(len(user_warns) > 0)
+            self.assertTrue(
+                "Profiling UDFs with iterators input/output is not supported" in str(user_warns[0])
+            )
+
+        with warnings.catch_warnings(record=True) as warns:
+            warnings.simplefilter("always")
+            self.exec_map()
+            user_warns = [warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+            self.assertTrue(len(user_warns) > 0)
+            self.assertTrue(
+                "Profiling UDFs with iterators input/output is not supported" in str(user_warns[0])
+            )
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_udf_profiler import *  # noqa: F401
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 7d7784dd5226d..bdd3aba502b89 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -28,7 +28,6 @@
 from py4j.java_gateway import JavaObject
 
 from pyspark import SparkContext
-from pyspark.profiler import Profiler
 from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType
 from pyspark.sql.column import Column, _to_java_column, _to_java_expr, _to_seq
 from pyspark.sql.types import (
@@ -338,24 +337,23 @@ def _create_judf(self, func: Callable[..., Any]) -> JavaObject:
 
     def __call__(self, *cols: "ColumnOrName") -> Column:
         sc = get_active_spark_context()
-        profiler: Optional[Profiler] = None
-        memory_profiler: Optional[Profiler] = None
-        if sc.profiler_collector:
-            profiler_enabled = sc._conf.get("spark.python.profile", "false") == "true"
-            memory_profiler_enabled = sc._conf.get("spark.python.profile.memory", "false") == "true"
+        profiler_enabled = sc._conf.get("spark.python.profile", "false") == "true"
+        memory_profiler_enabled = sc._conf.get("spark.python.profile.memory", "false") == "true"
 
+        if profiler_enabled or memory_profiler_enabled:
             # Disable profiling Pandas UDFs with iterators as input/output.
-            if profiler_enabled or memory_profiler_enabled:
-                if self.evalType in [
-                    PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
-                    PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
-                    PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
-                ]:
-                    profiler_enabled = memory_profiler_enabled = False
-                    warnings.warn(
-                        "Profiling UDFs with iterators input/output is not supported.",
-                        UserWarning,
-                    )
+            if self.evalType in [
+                PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
+                PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
+                PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
+            ]:
+                warnings.warn(
+                    "Profiling UDFs with iterators input/output is not supported.",
+                    UserWarning,
+                )
+                judf = self._judf
+                jPythonUDF = judf.apply(_to_seq(sc, cols, _to_java_column))
+                return Column(jPythonUDF)
 
             # Disallow enabling two profilers at the same time.
             if profiler_enabled and memory_profiler_enabled:

From b27d169c85e99ceffb20a6df8d10340749ab2129 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 18 Jan 2024 08:19:52 -0800
Subject: [PATCH 179/521] [MINOR][DOCS] Add zstandard as a candidate to fix the
 desc of spark.sql.avro.compression.codec

### What changes were proposed in this pull request?

Add zstandard as a candidate to fix the desc of spark.sql.avro.compression.codec

### Why are the changes needed?

docfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

doc build
### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44783 from yaooqinn/avro_minor.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit c040824fd75c955dbc8e5712bc473a0ddb9a8c0f)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-data-sources-avro.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index c846116ebf3e3..1da75e5d7b17e 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -348,7 +348,7 @@ Configuration of Avro can be done using the `setConf` method on SparkSession or
     <td>snappy</td>
     <td>
       Compression codec used in writing of AVRO files. Supported codecs: uncompressed, deflate,
-      snappy, bzip2 and xz. Default codec is snappy.
+      snappy, bzip2, xz and zstandard. Default codec is snappy.
     </td>
     <td>2.4.0</td>
   </tr>

From fa6bf22112b4300dae1e7617f1480c0d12124b90 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Fri, 19 Jan 2024 11:38:53 +0900
Subject: [PATCH 180/521] [SPARK-46676][SS] dropDuplicatesWithinWatermark
 should not fail on canonicalization of the plan

### What changes were proposed in this pull request?

This PR proposes to fix the bug on canonicalizing the plan which contains the physical node of dropDuplicatesWithinWatermark (`StreamingDeduplicateWithinWatermarkExec`).

### Why are the changes needed?

Canonicalization of the plan will replace the expressions (including attributes) to remove out cosmetic, including name, "and metadata", which denotes the event time column marker.

StreamingDeduplicateWithinWatermarkExec assumes that the input attributes of child node contain the event time column, and it is determined at the initialization of the node instance. Once canonicalization is being triggered, child node will lose the notion of event time column from its attributes, and copy of StreamingDeduplicateWithinWatermarkExec will be performed which instantiating a new node of `StreamingDeduplicateWithinWatermarkExec` with new child node, which no longer has an event time column, hence instantiation will fail.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New UT added.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44688 from HeartSaVioR/SPARK-46676.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit c1ed3e60e67f53bb323e2b9fa47789fcde70a75a)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/statefulOperators.scala         | 10 ++++++---
 ...ingDeduplicationWithinWatermarkSuite.scala | 21 +++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index b31f6151fce23..b597c9723f5cf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -1037,10 +1037,14 @@ case class StreamingDeduplicateWithinWatermarkExec(
 
   protected val extraOptionOnStateStore: Map[String, String] = Map.empty
 
-  private val eventTimeCol: Attribute = WatermarkSupport.findEventTimeColumn(child.output,
+  // Below three variables are defined as lazy, as evaluating these variables does not work with
+  // canonicalized plan. Specifically, attributes in child won't have an event time column in
+  // the canonicalized plan. These variables are NOT referenced in canonicalized plan, hence
+  // defining these variables as lazy would avoid such error.
+  private lazy val eventTimeCol: Attribute = WatermarkSupport.findEventTimeColumn(child.output,
     allowMultipleEventTimeColumns = false).get
-  private val delayThresholdMs = eventTimeCol.metadata.getLong(EventTimeWatermark.delayKey)
-  private val eventTimeColOrdinal: Int = child.output.indexOf(eventTimeCol)
+  private lazy val delayThresholdMs = eventTimeCol.metadata.getLong(EventTimeWatermark.delayKey)
+  private lazy val eventTimeColOrdinal: Int = child.output.indexOf(eventTimeCol)
 
   protected def initializeReusedDupInfoRow(): Option[UnsafeRow] = {
     val timeoutToUnsafeRow = UnsafeProjection.create(schemaForValueRow)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
index 595fc1cb9cea8..9a02ab3df7dd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationWithinWatermarkSuite.scala
@@ -199,4 +199,25 @@ class StreamingDeduplicationWithinWatermarkSuite extends StateStoreMetricsTest {
       )
     }
   }
+
+  test("SPARK-46676: canonicalization of StreamingDeduplicateWithinWatermarkExec should work") {
+    withTempDir { checkpoint =>
+      val dedupeInputData = MemoryStream[(String, Int)]
+      val dedupe = dedupeInputData.toDS()
+        .withColumn("eventTime", timestamp_seconds($"_2"))
+        .withWatermark("eventTime", "10 second")
+        .dropDuplicatesWithinWatermark("_1")
+        .select($"_1", $"eventTime".cast("long").as[Long])
+
+      testStream(dedupe, Append)(
+        StartStream(checkpointLocation = checkpoint.getCanonicalPath),
+        AddData(dedupeInputData, "a" -> 1),
+        CheckNewAnswer("a" -> 1),
+        Execute { q =>
+          // This threw out error before SPARK-46676.
+          q.lastExecution.executedPlan.canonicalized
+        }
+      )
+    }
+  }
 }

From c19bf01b5208bb3aad0e6264b64597e0809b1efe Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Sat, 20 Jan 2024 20:57:09 +0800
Subject: [PATCH 181/521] [SPARK-46769][SQL] Refine timestamp related schema
 inference

This is a refinement of https://github.com/apache/spark/pull/43243 . This PR enforces one thing: we only infer TIMESTAMP NTZ type using NTZ parser, and only infer LTZ type using LTZ parser. This consistency is important to avoid nondeterministic behaviors.

Avoid non-deterministic behaviors. After https://github.com/apache/spark/pull/43243 , we can still have inconsistency if the LEGACY mode is enabled.

Yes for the legacy parser. Now it's more likely to infer string type instead of inferring timestamp type "by luck"

existing tests

no

Closes https://github.com/apache/spark/pull/44789

Closes #44800 from cloud-fan/infer.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit e4e40762ca41931646b8f201028b1f2298252d96)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/csv/CSVInferSchema.scala     | 18 ++++----
 .../sql/catalyst/json/JsonInferSchema.scala   | 32 ++++++++++----
 .../execution/datasources/csv/CSVSuite.scala  | 42 +++++++++----------
 3 files changed, 55 insertions(+), 37 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index ec01b56f9eb7c..2c27da3cf6e15 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class CSVInferSchema(val options: CSVOptions) extends Serializable {
@@ -66,6 +66,8 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private val LENIENT_TS_FORMATTER_SUPPORTED_DATE_FORMATS = Set(
     "yyyy-MM-dd", "yyyy-M-d", "yyyy-M-dd", "yyyy-MM-d", "yyyy-MM", "yyyy-M", "yyyy")
 
+  private val isDefaultNTZ = SQLConf.get.timestampType == TimestampNTZType
+
   /**
    * Similar to the JSON schema inference
    *     1. Infer type of each row
@@ -199,14 +201,12 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   }
 
   private def tryParseTimestampNTZ(field: String): DataType = {
-    // We can only parse the value as TimestampNTZType if it does not have zone-offset or
-    // time-zone component and can be parsed with the timestamp formatter.
-    // Otherwise, it is likely to be a timestamp with timezone.
-    val timestampType = SQLConf.get.timestampType
-    if ((SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY ||
-        timestampType == TimestampNTZType) &&
-        timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
-      timestampType
+    // For text-based format, it's ambiguous to infer a timestamp string without timezone, as it can
+    // be both TIMESTAMP LTZ and NTZ. To avoid behavior changes with the new support of NTZ, here
+    // we only try to infer NTZ if the config is set to use NTZ by default.
+    if (isDefaultNTZ &&
+      timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
+      TimestampNTZType
     } else {
       tryParseTimestamp(field)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index 4123c5290b6a1..f6d32f39f64ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
@@ -53,6 +54,9 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
     isParsing = true,
     forTimestampNTZ = true)
 
+  private val isDefaultNTZ = SQLConf.get.timestampType == TimestampNTZType
+  private val legacyMode = SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY
+
   private def handleJsonErrorsByParseMode(parseMode: ParseMode,
       columnNameOfCorruptRecord: String, e: Throwable): Option[StructType] = {
     parseMode match {
@@ -148,16 +152,30 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
           val bigDecimal = decimalParser(field)
             DecimalType(bigDecimal.precision, bigDecimal.scale)
         }
-        val timestampType = SQLConf.get.timestampType
         if (options.prefersDecimal && decimalTry.isDefined) {
           decimalTry.get
-        } else if (options.inferTimestamp && (SQLConf.get.legacyTimeParserPolicy ==
-          LegacyBehaviorPolicy.LEGACY || timestampType == TimestampNTZType) &&
+        } else if (options.inferTimestamp) {
+          // For text-based format, it's ambiguous to infer a timestamp string without timezone, as
+          // it can be both TIMESTAMP LTZ and NTZ. To avoid behavior changes with the new support
+          // of NTZ, here we only try to infer NTZ if the config is set to use NTZ by default.
+          if (isDefaultNTZ &&
             timestampNTZFormatter.parseWithoutTimeZoneOptional(field, false).isDefined) {
-          timestampType
-        } else if (options.inferTimestamp &&
-            timestampFormatter.parseOptional(field).isDefined) {
-          TimestampType
+            TimestampNTZType
+          } else if (timestampFormatter.parseOptional(field).isDefined) {
+            TimestampType
+          } else if (legacyMode) {
+            val utf8Value = UTF8String.fromString(field)
+            // There was a mistake that we use TIMESTAMP NTZ parser to infer LTZ type with legacy
+            // mode. The mistake makes it easier to infer TIMESTAMP LTZ type and we have to keep
+            // this behavior now. See SPARK-46769 for more details.
+            if (SparkDateTimeUtils.stringToTimestampWithoutTimeZone(utf8Value, false).isDefined) {
+              TimestampType
+            } else {
+              StringType
+            }
+          } else {
+            StringType
+          }
         } else {
           StringType
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 3bd45ca0dcdb3..78266acfd7de9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -1105,10 +1105,12 @@ abstract class CSVSuite
 
   test("SPARK-37326: Timestamp type inference for a column with TIMESTAMP_NTZ values") {
     withTempPath { path =>
-      val exp = spark.sql("""
-        select timestamp_ntz'2020-12-12 12:12:12' as col0 union all
-        select timestamp_ntz'2020-12-12 12:12:12' as col0
-        """)
+      val exp = spark.sql(
+        """
+          |select *
+          |from values (timestamp_ntz'2020-12-12 12:12:12'), (timestamp_ntz'2020-12-12 12:12:12')
+          |as t(col0)
+          |""".stripMargin)
 
       exp.write.format("csv").option("header", "true").save(path.getAbsolutePath)
 
@@ -1126,6 +1128,15 @@ abstract class CSVSuite
 
           if (timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
             checkAnswer(res, exp)
+          } else if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+            // When legacy parser is enabled, we can't parse the NTZ string to LTZ, and eventually
+            // infer string type.
+            val expected = spark.read
+              .format("csv")
+              .option("inferSchema", "false")
+              .option("header", "true")
+              .load(path.getAbsolutePath)
+            checkAnswer(res, expected)
           } else {
             checkAnswer(
               res,
@@ -2862,13 +2873,12 @@ abstract class CSVSuite
 
   test("SPARK-40474: Infer schema for columns with a mix of dates and timestamp") {
     withTempPath { path =>
-      Seq(
-        "1765-03-28",
+      val input = Seq(
         "1423-11-12T23:41:00",
+        "1765-03-28",
         "2016-01-28T20:00:00"
-      ).toDF()
-        .repartition(1)
-        .write.text(path.getAbsolutePath)
+      ).toDF().repartition(1)
+      input.write.text(path.getAbsolutePath)
 
       if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
         val options = Map(
@@ -2879,12 +2889,7 @@ abstract class CSVSuite
           .format("csv")
           .options(options)
           .load(path.getAbsolutePath)
-        val expected = Seq(
-          Row(Timestamp.valueOf("1765-03-28 00:00:00.0")),
-          Row(Timestamp.valueOf("1423-11-12 23:41:00.0")),
-          Row(Timestamp.valueOf("2016-01-28 20:00:00.0"))
-        )
-        checkAnswer(df, expected)
+        checkAnswer(df, input)
       } else {
         // When timestampFormat is specified, infer and parse the column as strings
         val options1 = Map(
@@ -2895,12 +2900,7 @@ abstract class CSVSuite
           .format("csv")
           .options(options1)
           .load(path.getAbsolutePath)
-        val expected1 = Seq(
-          Row("1765-03-28"),
-          Row("1423-11-12T23:41:00"),
-          Row("2016-01-28T20:00:00")
-        )
-        checkAnswer(df1, expected1)
+        checkAnswer(df1, input)
 
         // When timestampFormat is not specified, infer and parse the column as
         // timestamp type if possible

From b98cf95896a2b14e3692f1e7d58660583a2bb175 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 20 Jan 2024 17:51:12 -0800
Subject: [PATCH 182/521] [SPARK-46786][K8S] Fix `MountVolumesFeatureStep` to
 use `ReadWriteOncePod` instead of `ReadWriteOnce`

This PR aims to fix a duplicated volume mounting bug by using `ReadWriteOncePod` instead of `ReadWriteOnce`.

This bug fix is based on the stable K8s feature which is available since v1.22.

- [KEP-2485: ReadWriteOncePod PersistentVolume AccessMode](https://github.com/kubernetes/enhancements/blob/master/keps/sig-storage/2485-read-write-once-pod-pv-access-mode/README.md)
- https://kubernetes.io/docs/concepts/storage/persistent-volumes/#access-modes
    - v1.22 Alpha
    - v1.27 Beta
    - v1.29 Stable

For the record, the minimum K8s version of GKE/EKS/AKE is **v1.24** as of today and the latest v1.29 is supported like the following.
- [2024.01 (GKE Regular Channel)](https://cloud.google.com/kubernetes-engine/docs/release-schedule)
- [2024.02 (AKE GA)](https://learn.microsoft.com/en-us/azure/aks/supported-kubernetes-versions?tabs=azure-cli#aks-kubernetes-release-calendar)

This is a bug fix.

Pass the CIs with the existing PV-related tests.

No.

Closes #44817 from dongjoon-hyun/SPARK-46786.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 45ec74415a4a89851968941b80c490e37ee88daf)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/k8s/features/MountVolumesFeatureStep.scala     | 2 +-
 .../apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 78dd6ec21ed34..cbbbb9c0bdf57 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -126,5 +126,5 @@ private[spark] object MountVolumesFeatureStep {
   val PVC_ON_DEMAND = "OnDemand"
   val PVC = "PersistentVolumeClaim"
   val PVC_POSTFIX = "-pvc"
-  val PVC_ACCESS_MODE = "ReadWriteOnce"
+  val PVC_ACCESS_MODE = "ReadWriteOncePod"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
index 1d373f3f8066e..f8e76012638cf 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
@@ -166,7 +166,7 @@ private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
     }
   }
 
-  test("PVs with local hostpath and storageClass on statefulsets", k8sTestTag, MinikubeTag) {
+  ignore("PVs with local hostpath and storageClass on statefulsets", k8sTestTag, MinikubeTag) {
     sparkAppConf
       .set(s"spark.kubernetes.driver.volumes.persistentVolumeClaim.data.mount.path",
         CONTAINER_MOUNT_PATH)

From 687c2979959f13ac2c32e596f745f79284144735 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 20 Jan 2024 18:53:21 -0800
Subject: [PATCH 183/521] [SPARK-44495][INFRA][K8S][3.5] Use the latest
 minikube in K8s IT

### What changes were proposed in this pull request?

This is a backport of #44813 .

This PR aims to recover GitHub Action K8s IT to use the latest Minikube and to make it sure that Apache Spark K8s module are tested with all Minikubes without any issues.

**BEFORE**
- Minikube: v1.30.1
- K8s: v1.26.3

**AFTER**
- Minikube: v1.32.0
- K8s: v1.28.3

### Why are the changes needed?

- Previously, it was pinned due to the failure.
- After this PR, we will track the latest Minikube and K8s version always.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44819 from dongjoon-hyun/SPARK-44495-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml                        | 6 ++----
 .../k8s/integrationtest/KubernetesTestComponents.scala      | 2 ++
 .../spark/deploy/k8s/integrationtest/PVTestsSuite.scala     | 4 +++-
 .../deploy/k8s/integrationtest/VolcanoTestsSuite.scala      | 4 ++--
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b0b72a0d1a689..ad8685754b316 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -1016,9 +1016,7 @@ jobs:
       - name: start minikube
         run: |
           # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
-          # curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
-          # TODO(SPARK-44495): Resume to use the latest minikube for k8s-integration-tests.
-          curl -LO https://storage.googleapis.com/minikube/releases/v1.30.1/minikube-linux-amd64
+          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
           sudo install minikube-linux-amd64 /usr/local/bin/minikube
           # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
           minikube start --cpus 2 --memory 6144
@@ -1036,7 +1034,7 @@ jobs:
           kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
           kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
           eval $(minikube docker-env)
-          build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
+          build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
       - name: Upload Spark on K8S integration tests log files
         if: failure()
         uses: actions/upload-artifact@v3
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
index 4aba11bdb9d8f..4ebf44ce9a4bc 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesTestComponents.scala
@@ -75,6 +75,8 @@ private[spark] class KubernetesTestComponents(val kubernetesClient: KubernetesCl
       .set(UI_ENABLED.key, "true")
       .set("spark.kubernetes.submission.waitAppCompletion", "false")
       .set("spark.kubernetes.authenticate.driver.serviceAccountName", serviceAccountName)
+      .set("spark.kubernetes.driver.request.cores", "0.2")
+      .set("spark.kubernetes.executor.request.cores", "0.2")
   }
 }
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
index f8e76012638cf..a699ef674cdcd 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/PVTestsSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
 import org.scalatest.time.{Milliseconds, Span}
 
 import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
+import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.MinikubeTestBackend
 
 private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
   import PVTestsSuite._
@@ -54,6 +55,7 @@ private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
 
     setupLocalStorageClass()
 
+    val hostname = if (testBackend == MinikubeTestBackend) "minikube" else "docker-desktop"
     val pvBuilder = new PersistentVolumeBuilder()
       .withKind("PersistentVolume")
       .withApiVersion("v1")
@@ -72,7 +74,7 @@ private[spark] trait PVTestsSuite { k8sSuite: KubernetesSuite =>
                 .withMatchExpressions(new NodeSelectorRequirementBuilder()
                   .withKey("kubernetes.io/hostname")
                   .withOperator("In")
-                  .withValues("minikube", "m01", "docker-for-desktop", "docker-desktop")
+                  .withValues(hostname)
                   .build()).build())
             .endRequired()
           .endNodeAffinity()
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 06d6f7dc100f3..e7143e32db61e 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -496,8 +496,8 @@ private[spark] object VolcanoTestsSuite extends SparkFunSuite {
   val DRIVER_PG_TEMPLATE_MEMORY_3G = new File(
     getClass.getResource("/volcano/driver-podgroup-template-memory-3g.yml").getFile
   ).getAbsolutePath
-  val DRIVER_REQUEST_CORES = sys.props.get(CONFIG_DRIVER_REQUEST_CORES).getOrElse("1")
-  val EXECUTOR_REQUEST_CORES = sys.props.get(CONFIG_EXECUTOR_REQUEST_CORES).getOrElse("1")
+  val DRIVER_REQUEST_CORES = sys.props.get(CONFIG_DRIVER_REQUEST_CORES).getOrElse("0.2")
+  val EXECUTOR_REQUEST_CORES = sys.props.get(CONFIG_EXECUTOR_REQUEST_CORES).getOrElse("0.2")
   val VOLCANO_MAX_JOB_NUM = sys.props.get(CONFIG_KEY_VOLCANO_MAX_JOB_NUM).getOrElse("2")
   val TEMP_DIR = "/tmp/"
 }

From 04d32493fde779021871c88709dbbae32f18e512 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 22 Jan 2024 16:19:39 +0800
Subject: [PATCH 184/521] [SPARK-46789][K8S][TESTS] Add `VolumeSuite` to K8s IT

### What changes were proposed in this pull request?

This PR aims to add `VolumeSuite` to K8s IT.

### Why are the changes needed?

To improve the test coverage on various K8s volume use cases.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44827 from dongjoon-hyun/SPARK-46789.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../k8s/integrationtest/KubernetesSuite.scala |   4 +-
 .../k8s/integrationtest/VolumeSuite.scala     | 173 ++++++++++++++++++
 2 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolumeSuite.scala

diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index f52af87a745ca..54ef1f6cee30d 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -45,8 +45,8 @@ import org.apache.spark.internal.config._
 class KubernetesSuite extends SparkFunSuite
   with BeforeAndAfterAll with BeforeAndAfter with BasicTestsSuite with SparkConfPropagateSuite
   with SecretsTestsSuite with PythonTestsSuite with ClientModeTestsSuite with PodTemplateSuite
-  with PVTestsSuite with DepsTestsSuite with DecommissionSuite with RTestsSuite with Logging
-  with Eventually with Matchers {
+  with VolumeSuite with PVTestsSuite with DepsTestsSuite with DecommissionSuite with RTestsSuite
+  with Logging with Eventually with Matchers {
 
 
   import KubernetesSuite._
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolumeSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolumeSuite.scala
new file mode 100644
index 0000000000000..c57e4b4578d6c
--- /dev/null
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolumeSuite.scala
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+import scala.jdk.CollectionConverters._
+
+import io.fabric8.kubernetes.api.model._
+import org.scalatest.concurrent.PatienceConfiguration
+import org.scalatest.time.{Seconds, Span}
+
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite._
+import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.MinikubeTestBackend
+
+private[spark] trait VolumeSuite { k8sSuite: KubernetesSuite =>
+  val IGNORE = Some((Some(PatienceConfiguration.Interval(Span(0, Seconds))), None))
+
+  private def checkDisk(pod: Pod, path: String, expected: String) = {
+    eventually(PatienceConfiguration.Timeout(Span(10, Seconds)), INTERVAL) {
+      implicit val podName: String = pod.getMetadata.getName
+      implicit val components: KubernetesTestComponents = kubernetesTestComponents
+      assert(Utils.executeCommand("df", path).contains(expected))
+    }
+  }
+
+  test("A driver-only Spark job with a tmpfs-backed localDir volume", k8sTestTag) {
+    sparkAppConf
+      .set("spark.kubernetes.driver.master", "local[10]")
+      .set("spark.kubernetes.local.dirs.tmpfs", "true")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("local[10]", "Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        val path = driverPod.getSpec.getContainers.get(0).getEnv.asScala
+          .filter(_.getName == "SPARK_LOCAL_DIRS").map(_.getValue).head
+        checkDisk(driverPod, path, "tmpfs")
+      },
+      _ => (),
+      isJVM = true,
+      executorPatience = IGNORE)
+  }
+
+  test("A driver-only Spark job with a tmpfs-backed emptyDir data volume", k8sTestTag) {
+    sparkAppConf
+      .set("spark.kubernetes.driver.master", "local[10]")
+      .set("spark.kubernetes.driver.volumes.emptyDir.data.mount.path", "/data")
+      .set("spark.kubernetes.driver.volumes.emptyDir.data.options.medium", "Memory")
+      .set("spark.kubernetes.driver.volumes.emptyDir.data.options.sizeLimit", "1G")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("local[10]", "Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        checkDisk(driverPod, "/data", "tmpfs")
+      },
+      _ => (),
+      isJVM = true,
+      executorPatience = IGNORE)
+  }
+
+  test("A driver-only Spark job with a disk-backed emptyDir volume", k8sTestTag) {
+    sparkAppConf
+      .set("spark.kubernetes.driver.master", "local[10]")
+      .set("spark.kubernetes.driver.volumes.emptyDir.data.mount.path", "/data")
+      .set("spark.kubernetes.driver.volumes.emptyDir.data.mount.sizeLimit", "1G")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("local[10]", "Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        checkDisk(driverPod, "/data", "/dev/")
+      },
+      _ => (),
+      isJVM = true,
+      executorPatience = IGNORE)
+  }
+
+  test("A driver-only Spark job with an OnDemand PVC volume", k8sTestTag) {
+    val storageClassName = if (testBackend == MinikubeTestBackend) "standard" else "hostpath"
+    val DRIVER_PREFIX = "spark.kubernetes.driver.volumes.persistentVolumeClaim"
+    sparkAppConf
+      .set("spark.kubernetes.driver.master", "local[10]")
+      .set(s"$DRIVER_PREFIX.data.options.claimName", "OnDemand")
+      .set(s"$DRIVER_PREFIX.data.options.storageClass", storageClassName)
+      .set(s"$DRIVER_PREFIX.data.options.sizeLimit", "1Gi")
+      .set(s"$DRIVER_PREFIX.data.mount.path", "/data")
+      .set(s"$DRIVER_PREFIX.data.mount.readOnly", "false")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("local[10]", "Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        checkDisk(driverPod, "/data", "/dev/")
+      },
+      _ => (),
+      isJVM = true,
+      executorPatience = IGNORE)
+  }
+
+  test("A Spark job with tmpfs-backed localDir volumes", k8sTestTag) {
+    sparkAppConf
+      .set("spark.kubernetes.local.dirs.tmpfs", "true")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      driverPodChecker = (driverPod: Pod) => {
+        doBasicDriverPodCheck(driverPod)
+        val path = driverPod.getSpec.getContainers.get(0).getEnv.asScala
+          .filter(_.getName == "SPARK_LOCAL_DIRS").map(_.getValue).head
+        checkDisk(driverPod, path, "tmpfs")
+      },
+      executorPodChecker = (executorPod: Pod) => {
+        doBasicExecutorPodCheck(executorPod)
+        val path = executorPod.getSpec.getContainers.get(0).getEnv.asScala
+          .filter(_.getName == "SPARK_LOCAL_DIRS").map(_.getValue).head
+        checkDisk(executorPod, path, "tmpfs")
+      },
+      isJVM = true)
+  }
+
+  test("A Spark job with two executors with OnDemand PVC volumes", k8sTestTag) {
+    val storageClassName = if (testBackend == MinikubeTestBackend) "standard" else "hostpath"
+    val EXECUTOR_PREFIX = "spark.kubernetes.executor.volumes.persistentVolumeClaim"
+    sparkAppConf
+      .set("spark.executor.instances", "2")
+      .set(s"$EXECUTOR_PREFIX.data.options.claimName", "OnDemand")
+      .set(s"$EXECUTOR_PREFIX.data.options.storageClass", storageClassName)
+      .set(s"$EXECUTOR_PREFIX.data.options.sizeLimit", "1Gi")
+      .set(s"$EXECUTOR_PREFIX.data.mount.path", "/data")
+      .set(s"$EXECUTOR_PREFIX.data.mount.readOnly", "false")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      _ => (),
+      executorPodChecker = (executorPod: Pod) => {
+        doBasicExecutorPodCheck(executorPod)
+        checkDisk(executorPod, "/data", "/dev/")
+      },
+      isJVM = true)
+  }
+}

From 68d9f353300ed7de0b47c26cb30236bada896d25 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Mon, 22 Jan 2024 11:09:01 -0800
Subject: [PATCH 185/521] [SPARK-46779][SQL] `InMemoryRelation` instances of
 the same cached plan should be semantically equivalent

When canonicalizing `output` in `InMemoryRelation`, use `output` itself as the schema for determining the ordinals, rather than `cachedPlan.output`.

`InMemoryRelation.output` and `InMemoryRelation.cachedPlan.output` don't necessarily use the same exprIds. E.g.:
```
+- InMemoryRelation [c1#340, c2#341], StorageLevel(disk, memory, deserialized, 1 replicas)
      +- LocalTableScan [c1#254, c2#255]

```
Because of this, `InMemoryRelation` will sometimes fail to fully canonicalize, resulting in cases where two semantically equivalent `InMemoryRelation` instances appear to be semantically nonequivalent.

Example:
```
create or replace temp view data(c1, c2) as values
(1, 2),
(1, 3),
(3, 7),
(4, 5);

cache table data;

select c1, (select count(*) from data d1 where d1.c1 = d2.c1), count(c2) from data d2 group by all;
```
If plan change validation checking is on (i.e., `spark.sql.planChangeValidation=true`), the failure is:
```
[PLAN_VALIDATION_FAILED_RULE_EXECUTOR] The input plan of org.apache.spark.sql.internal.BaseSessionStateBuilder$$anon$2 is invalid: Aggregate: Aggregate [c1#78, scalar-subquery#77 [c1#78]], [c1#78, scalar-subquery#77 [c1#78] AS scalarsubquery(c1)#90L, count(c2#79) AS count(c2)#83L]
...
is not a valid aggregate expression: [SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION] The correlated scalar subquery '"scalarsubquery(c1)"' is neither present in GROUP BY, nor in an aggregate function.
```
If plan change validation checking is off, the failure is more mysterious:
```
[INTERNAL_ERROR] Couldn't find count(1)#163L in [c1#78,_groupingexpression#149L,count(1)#82L] SQLSTATE: XX000
org.apache.spark.SparkException: [INTERNAL_ERROR] Couldn't find count(1)#163L in [c1#78,_groupingexpression#149L,count(1)#82L] SQLSTATE: XX000
```
If you remove the cache command, the query succeeds.

The above failures happen because the subquery in the aggregate expressions and the subquery in the grouping expressions seem semantically nonequivalent since the `InMemoryRelation` in one of the subquery plans failed to completely canonicalize.

In `CacheManager#useCachedData`, two lookups for the same cached plan may create `InMemoryRelation` instances that have different exprIds in `output`. That's because the plan fragments used as lookup keys  may have been deduplicated by `DeduplicateRelations`, and thus have different exprIds in their respective output schemas. When `CacheManager#useCachedData` creates an `InMemoryRelation` instance, it borrows the output schema of the plan fragment used as the lookup key.

The failure to fully canonicalize has other effects. For example, this query fails to reuse the exchange:
```
create or replace temp view data(c1, c2) as values
(1, 2),
(1, 3),
(2, 4),
(3, 7),
(7, 22);

cache table data;

set spark.sql.autoBroadcastJoinThreshold=-1;
set spark.sql.adaptive.enabled=false;

select *
from data l
join data r
on l.c1 = r.c1;
```

No.

New tests.

No.

Closes #44806 from bersprockets/plan_validation_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit b80e8cb4552268b771fc099457b9186807081c4a)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/execution/columnar/InMemoryRelation.scala |  2 +-
 .../spark/sql/DataFrameAggregateSuite.scala       | 15 +++++++++++++++
 .../columnar/InMemoryRelationSuite.scala          |  7 +++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 65f7835b42cf8..5bab8e53eb163 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -405,7 +405,7 @@ case class InMemoryRelation(
   }
 
   override def doCanonicalize(): logical.LogicalPlan =
-    copy(output = output.map(QueryPlan.normalizeExpressions(_, cachedPlan.output)),
+    copy(output = output.map(QueryPlan.normalizeExpressions(_, output)),
       cacheBuilder,
       outputOrdering)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d78771a8f19bc..631fcd8c0d87d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -2102,6 +2102,21 @@ class DataFrameAggregateSuite extends QueryTest
       Seq(Row(1))
     )
   }
+
+  test("SPARK-46779: Group by subquery with a cached relation") {
+    withTempView("data") {
+      sql(
+        """create or replace temp view data(c1, c2) as values
+          |(1, 2),
+          |(1, 3),
+          |(3, 7)""".stripMargin)
+      sql("cache table data")
+      val df = sql(
+        """select c1, (select count(*) from data d1 where d1.c1 = d2.c1), count(c2)
+          |from data d2 group by all""".stripMargin)
+      checkAnswer(df, Row(1, 2, 2) :: Row(3, 1, 1) :: Nil)
+    }
+  }
 }
 
 case class B(c: Option[Double])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
index 72b3a4bc1095a..a5c5ec40af6fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
@@ -34,4 +34,11 @@ class InMemoryRelationSuite extends SparkFunSuite with SharedSparkSessionBase {
     assert(!relationCachedPlan.eq(clonedCachedPlan))
     assert(relationCachedPlan === clonedCachedPlan)
   }
+
+  test("SPARK-46779: InMemoryRelations with the same cached plan are semantically equivalent") {
+    val d = spark.range(1)
+    val r1 = InMemoryRelation(StorageLevel.MEMORY_ONLY, d.queryExecution, None)
+    val r2 = r1.withOutput(r1.output.map(_.newInstance()))
+    assert(r1.sameResult(r2))
+  }
 }

From a6869b25fb9a7ac0e7e5015d342435e5c1b5f044 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 22 Jan 2024 17:06:59 -0800
Subject: [PATCH 186/521] [SPARK-46801][PYTHON][TESTS] Do not treat exit code 5
 as a test failure in Python testing script

### What changes were proposed in this pull request?

This PR proposes to avoid treating the exit code 5 as a test failure in Python testing script.

### Why are the changes needed?

```
...
========================================================================
Running PySpark tests
========================================================================
Running PySpark tests. Output is in /__w/spark/spark/python/unit-tests.log
Will test against the following Python executables: ['python3.12']
Will test the following Python modules: ['pyspark-core', 'pyspark-streaming', 'pyspark-errors']
python3.12 python_implementation is CPython
python3.12 version is: Python 3.12.1
Starting test(python3.12): pyspark.streaming.tests.test_context (temp output: /__w/spark/spark/python/target/8674ed86-36bd-47d1-863b-abb0405557f6/python3.12__pyspark.streaming.tests.test_context__umu69c3v.log)
Finished test(python3.12): pyspark.streaming.tests.test_context (12s)
Starting test(python3.12): pyspark.streaming.tests.test_dstream (temp output: /__w/spark/spark/python/target/847eb56b-3c5f-49ab-8a83-3326bb96bc5d/python3.12__pyspark.streaming.tests.test_dstream__rorhk0lc.log)
Finished test(python3.12): pyspark.streaming.tests.test_dstream (102s)
Starting test(python3.12): pyspark.streaming.tests.test_kinesis (temp output: /__w/spark/spark/python/target/78f23c83-c24d-4fa1-abbd-edb90f48dff1/python3.12__pyspark.streaming.tests.test_kinesis__q5l1pv0h.log)
test_kinesis_stream (pyspark.streaming.tests.test_kinesis.KinesisStreamTests.test_kinesis_stream) ... skipped "Skipping all Kinesis Python tests as environmental variable 'ENABLE_KINESIS_TESTS' was not set."
test_kinesis_stream_api (pyspark.streaming.tests.test_kinesis.KinesisStreamTests.test_kinesis_stream_api) ... skipped "Skipping all Kinesis Python tests as environmental variable 'ENABLE_KINESIS_TESTS' was not set."

----------------------------------------------------------------------
Ran 0 tests in 0.000s

NO TESTS RAN (skipped=2)

Had test failures in pyspark.streaming.tests.test_kinesis with python3.12; see logs.
Error:  running /__w/spark/spark/python/run-tests --modules=pyspark-core,pyspark-streaming,pyspark-errors --parallelism=1 --python-executables=python3.12 ; received return code 255
Error: Process completed with exit code 19.
```

Scheduled job fails because of exit 5, see https://github.com/pytest-dev/pytest/issues/2393. This isn't a test failure.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually tested.

### Was this patch authored or co-authored using generative AI tooling?

No,

Closes #44841 from HyukjinKwon/SPARK-46801.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 52b62921cadb05da5b1183f979edf7d608256f2e)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/run-tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/run-tests.py b/python/run-tests.py
index 19e39c822cbb4..b9031765d9437 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -147,8 +147,8 @@ def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_
         # this code is invoked from a thread other than the main thread.
         os._exit(1)
     duration = time.time() - start_time
-    # Exit on the first failure.
-    if retcode != 0:
+    # Exit on the first failure but exclude the code 5 for no test ran, see SPARK-46801.
+    if retcode != 0 and retcode != 5:
         try:
             with FAILURE_REPORTING_LOCK:
                 with open(LOG_FILE, 'ab') as log_file:

From 6403a84b6854214a4ed7d5c0c800e877e0748964 Mon Sep 17 00:00:00 2001
From: jackylee-ch <lijunqing@baidu.com>
Date: Tue, 23 Jan 2024 16:10:37 +0800
Subject: [PATCH 187/521] [SPARK-46590][SQL] Fix coalesce failed with
 unexpected partition indeces

### What changes were proposed in this pull request?
As outlined in JIRA issue [SPARK-46590](https://issues.apache.org/jira/browse/SPARK-46590), when a broadcast join follows a union within the same stage, the [collectCoalesceGroups](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala#L144) method will indiscriminately traverse all sub-plans, aggregating them into a single group, which is not expected.

### Why are the changes needed?
In fact, for broadcastjoin, we do not expect broadcast exchange has same partition number. Therefore, we can safely disregard the broadcast join and continue traversing the subplan.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Newly added unit test. It would fail without this pr.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44661 from jackylee-ch/fix_coalesce_problem_with_broadcastjoin_and_union.

Authored-by: jackylee-ch <lijunqing@baidu.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit de0c4ad3947f1188f02aaa612df8278d1c7c3ce5)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/CoalesceShufflePartitions.scala  | 10 ++-
 .../adaptive/ShufflePartitionsUtil.scala      |  6 +-
 .../CoalesceShufflePartitionsSuite.scala      | 61 +++++++++++++++++++
 .../ShufflePartitionsUtilSuite.scala          | 31 +++++-----
 4 files changed, 86 insertions(+), 22 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 34399001c726f..26e5ac649dbb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
 import org.apache.spark.sql.execution.{ShufflePartitionSpec, SparkPlan, UnaryExecNode, UnionExec}
 import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, REBALANCE_PARTITIONS_BY_COL, REBALANCE_PARTITIONS_BY_NONE, REPARTITION_BY_COL, ShuffleExchangeLike, ShuffleOrigin}
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, CartesianProductExec}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
 
@@ -146,13 +147,16 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
       Seq(collectShuffleStageInfos(r))
     case unary: UnaryExecNode => collectCoalesceGroups(unary.child)
     case union: UnionExec => union.children.flatMap(collectCoalesceGroups)
-    // If not all leaf nodes are exchange query stages, it's not safe to reduce the number of
-    // shuffle partitions, because we may break the assumption that all children of a spark plan
-    // have same number of output partitions.
+    case join: CartesianProductExec => join.children.flatMap(collectCoalesceGroups)
     // Note that, `BroadcastQueryStageExec` is a valid case:
     // If a join has been optimized from shuffled join to broadcast join, then the one side is
     // `BroadcastQueryStageExec` and other side is `ShuffleQueryStageExec`. It can coalesce the
     // shuffle side as we do not expect broadcast exchange has same partition number.
+    case join: BroadcastHashJoinExec => join.children.flatMap(collectCoalesceGroups)
+    case join: BroadcastNestedLoopJoinExec => join.children.flatMap(collectCoalesceGroups)
+    // If not all leaf nodes are exchange query stages, it's not safe to reduce the number of
+    // shuffle partitions, because we may break the assumption that all children of a spark plan
+    // have same number of output partitions.
     case p if p.collectLeaves().forall(_.isInstanceOf[ExchangeQueryStageExec]) =>
       val shuffleStages = collectShuffleStageInfos(p)
       // ShuffleExchanges introduced by repartition do not support partition number change.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
index dbed66683b017..9370b3d8d1d74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -128,8 +128,10 @@ object ShufflePartitionsUtil extends Logging {
 
     // There should be no unexpected partition specs and the start indices should be identical
     // across all different shuffles.
-    assert(partitionIndicesSeq.distinct.length == 1 && partitionIndicesSeq.head.forall(_ >= 0),
-      s"Invalid shuffle partition specs: $inputPartitionSpecs")
+    if (partitionIndicesSeq.distinct.length > 1 || partitionIndicesSeq.head.exists(_ < 0)) {
+      logWarning(s"Could not apply partition coalescing because of unexpected partition indices.")
+      return Seq.empty
+    }
 
     // The indices may look like [0, 1, 2, 2, 2, 3, 4, 4, 5], and the repeated `2` and `4` mean
     // skewed partitions.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index 24a98dd83f33a..e11191da6a952 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -310,6 +310,67 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite {
     }
   }
 
+  test("SPARK-46590 adaptive query execution works correctly with broadcast join and union") {
+    val test: SparkSession => Unit = { spark: SparkSession =>
+      import spark.implicits._
+      spark.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "1KB")
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key, "10KB")
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR, 2.0)
+      val df00 = spark.range(0, 1000, 2)
+        .selectExpr("id as key", "id as value")
+        .union(Seq.fill(100000)((600, 600)).toDF("key", "value"))
+      val df01 = spark.range(0, 1000, 3)
+        .selectExpr("id as key", "id as value")
+      val df10 = spark.range(0, 1000, 5)
+        .selectExpr("id as key", "id as value")
+        .union(Seq.fill(500000)((600, 600)).toDF("key", "value"))
+      val df11 = spark.range(0, 1000, 7)
+        .selectExpr("id as key", "id as value")
+      val df20 = spark.range(0, 10).selectExpr("id as key", "id as value")
+
+      df20.join(df00.join(df01, Array("key", "value"), "left_outer")
+        .union(df10.join(df11, Array("key", "value"), "left_outer")))
+        .write
+        .format("noop")
+        .mode("overwrite")
+        .save()
+    }
+    withSparkSession(test, 12000, None)
+  }
+
+  test("SPARK-46590 adaptive query execution works correctly with cartesian join and union") {
+    val test: SparkSession => Unit = { spark: SparkSession =>
+      import spark.implicits._
+      spark.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key, "100B")
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR, 2.0)
+      val df00 = spark.range(0, 10, 2)
+        .selectExpr("id as key", "id as value")
+        .union(Seq.fill(1000)((600, 600)).toDF("key", "value"))
+      val df01 = spark.range(0, 10, 3)
+        .selectExpr("id as key", "id as value")
+      val df10 = spark.range(0, 10, 5)
+        .selectExpr("id as key", "id as value")
+        .union(Seq.fill(5000)((600, 600)).toDF("key", "value"))
+      val df11 = spark.range(0, 10, 7)
+        .selectExpr("id as key", "id as value")
+      val df20 = spark.range(0, 10)
+        .selectExpr("id as key", "id as value")
+        .union(Seq.fill(1000)((11, 11)).toDF("key", "value"))
+      val df21 = spark.range(0, 10)
+        .selectExpr("id as key", "id as value")
+
+      df20.join(df21.hint("shuffle_hash"), Array("key", "value"), "left_outer")
+        .join(df00.join(df01.hint("shuffle_hash"), Array("key", "value"), "left_outer")
+          .union(df10.join(df11.hint("shuffle_hash"), Array("key", "value"), "left_outer")))
+        .write
+        .format("noop")
+        .mode("overwrite")
+        .save()
+    }
+    withSparkSession(test, 100, None)
+  }
+
   test("SPARK-24705 adaptive query execution works correctly when exchange reuse enabled") {
     val test: SparkSession => Unit = { spark: SparkSession =>
       spark.sql("SET spark.sql.exchange.reuse=true")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
index da05373125d31..f8b7964368476 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ShufflePartitionsUtilSuite.scala
@@ -567,14 +567,13 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
     }
 
     {
-      // Assertion error if shuffle partition specs contain `CoalescedShuffleSpec` that has
-      // `end` - `start` > 1.
+      // If shuffle partition specs contain `CoalescedShuffleSpec` that has
+      // `end` - `start` > 1, return empty result.
       val bytesByPartitionId1 = Array[Long](10, 10, 10, 10, 10)
       val bytesByPartitionId2 = Array[Long](10, 10, 10, 10, 10)
       val specs1 = Seq(CoalescedPartitionSpec(0, 1), CoalescedPartitionSpec(1, 5))
       val specs2 = specs1
-      intercept[AssertionError] {
-        ShufflePartitionsUtil.coalescePartitions(
+      val coalesced = ShufflePartitionsUtil.coalescePartitions(
           Array(
             Some(new MapOutputStatistics(0, bytesByPartitionId1)),
             Some(new MapOutputStatistics(1, bytesByPartitionId2))),
@@ -582,17 +581,16 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
             Some(specs1),
             Some(specs2)),
           targetSize, 1, 0)
-      }
+      assert(coalesced.isEmpty)
     }
 
     {
-      // Assertion error if shuffle partition specs contain `PartialMapperShuffleSpec`.
+      // If shuffle partition specs contain `PartialMapperShuffleSpec`, return empty result.
       val bytesByPartitionId1 = Array[Long](10, 10, 10, 10, 10)
       val bytesByPartitionId2 = Array[Long](10, 10, 10, 10, 10)
       val specs1 = Seq(CoalescedPartitionSpec(0, 1), PartialMapperPartitionSpec(1, 0, 1))
       val specs2 = specs1
-      intercept[AssertionError] {
-        ShufflePartitionsUtil.coalescePartitions(
+      val coalesced = ShufflePartitionsUtil.coalescePartitions(
           Array(
             Some(new MapOutputStatistics(0, bytesByPartitionId1)),
             Some(new MapOutputStatistics(1, bytesByPartitionId2))),
@@ -600,18 +598,17 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
             Some(specs1),
             Some(specs2)),
           targetSize, 1, 0)
-      }
+      assert(coalesced.isEmpty)
     }
 
     {
-      // Assertion error if partition specs of different shuffles have different lengths.
+      // If partition specs of different shuffles have different lengths, return empty result.
       val bytesByPartitionId1 = Array[Long](10, 10, 10, 10, 10)
       val bytesByPartitionId2 = Array[Long](10, 10, 10, 10, 10)
       val specs1 = Seq.tabulate(4)(i => CoalescedPartitionSpec(i, i + 1)) ++
         Seq.tabulate(2)(i => PartialReducerPartitionSpec(4, i, i + 1, 10L))
       val specs2 = Seq.tabulate(5)(i => CoalescedPartitionSpec(i, i + 1))
-      intercept[AssertionError] {
-        ShufflePartitionsUtil.coalescePartitions(
+      val coalesced = ShufflePartitionsUtil.coalescePartitions(
           Array(
             Some(new MapOutputStatistics(0, bytesByPartitionId1)),
             Some(new MapOutputStatistics(1, bytesByPartitionId2))),
@@ -619,11 +616,12 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
             Some(specs1),
             Some(specs2)),
           targetSize, 1, 0)
-      }
+      assert(coalesced.isEmpty)
     }
 
     {
-      // Assertion error if start indices of partition specs are not identical among all shuffles.
+      // If start indices of partition specs are not identical among all shuffles,
+      // return empty result.
       val bytesByPartitionId1 = Array[Long](10, 10, 10, 10, 10)
       val bytesByPartitionId2 = Array[Long](10, 10, 10, 10, 10)
       val specs1 = Seq.tabulate(4)(i => CoalescedPartitionSpec(i, i + 1)) ++
@@ -631,8 +629,7 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
       val specs2 = Seq.tabulate(2)(i => CoalescedPartitionSpec(i, i + 1)) ++
         Seq.tabulate(2)(i => PartialReducerPartitionSpec(2, i, i + 1, 10L)) ++
         Seq.tabulate(2)(i => CoalescedPartitionSpec(i + 3, i + 4))
-      intercept[AssertionError] {
-        ShufflePartitionsUtil.coalescePartitions(
+      val coalesced = ShufflePartitionsUtil.coalescePartitions(
           Array(
             Some(new MapOutputStatistics(0, bytesByPartitionId1)),
             Some(new MapOutputStatistics(1, bytesByPartitionId2))),
@@ -640,7 +637,7 @@ class ShufflePartitionsUtilSuite extends SparkFunSuite with LocalSparkContext {
             Some(specs1),
             Some(specs2)),
           targetSize, 1, 0)
-      }
+      assert(coalesced.isEmpty)
     }
 
     {

From a559ff7bb9d3c34429f80760741f1bbd40696f32 Mon Sep 17 00:00:00 2001
From: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Date: Tue, 23 Jan 2024 17:15:30 +0800
Subject: [PATCH 188/521] [SPARK-46763] Fix assertion failure in
 ReplaceDeduplicateWithAggregate for duplicate attributes

### What changes were proposed in this pull request?

- Updated the `ReplaceDeduplicateWithAggregate` implementation to reuse aliases generated for an attribute.
- Added a unit test to ensure scenarios with duplicate non-grouping keys are correctly optimized.

### Why are the changes needed?

- `ReplaceDeduplicateWithAggregate` replaces `Deduplicate` with an `Aggregate` operator with grouping expressions for the deduplication keys and aggregate expressions for the non-grouping keys (to preserve the output schema and keep the non-grouping columns).
- For non-grouping key `a#X`, it generates an aggregate expression of the form `first(a#X, false) AS a#Y`
- In case the non-grouping keys have a repeated attribute (with the same name and exprId), the existing logic would generate two different aggregate expressions both having two different exprId.
- This then leads to duplicate rewrite attributes error (in `transformUpWithNewOutput`) when transforming the remaining tree.

- For example, for the query
```
Project [a#0, b#1]
+- Deduplicate [b#1]
   +- Project [a#0, a#0, b#1]
      +- LocalRelation <empty>, [a#0, b#1]
```
the existing logic would transform it to
```
Project [a#3, b#1]
+- Aggregate [b#1], [first(a#0, false) AS a#3, first(a#0, false) AS a#5, b#1]
   +- Project [a#0, a#0, b#1]
      +- LocalRelation <empty>, [a#0, b#1]
```
with the aggregate mapping having two entries `a#0 -> a#3, a#0 -> a#5`.

The correct transformation would be
```
Project [a#3, b#1]
+- Aggregate [b#1], [first(a#0, false) AS a#3, first(a#0, false) AS a#3, b#1]
   +- Project [a#0, a#0, b#1]
      +- LocalRelation <empty>, [a#0, b#1]
```
with the aggregate mapping having only one entry `a#0 -> a#3`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added a unit test in `ResolveOperatorSuite`.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #44835 from nikhilsheoran-db/SPARK-46763.

Authored-by: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 715b43428913d6a631f8f9043baac751b88cb5d4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  6 +++-
 .../optimizer/ReplaceOperatorSuite.scala      | 31 +++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index df17840d567e0..04d3eb962ed44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -2195,11 +2195,15 @@ object ReplaceDeduplicateWithAggregate extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
     case d @ Deduplicate(keys, child) if !child.isStreaming =>
       val keyExprIds = keys.map(_.exprId)
+      val generatedAliasesMap = new mutable.HashMap[Attribute, Alias]();
       val aggCols = child.output.map { attr =>
         if (keyExprIds.contains(attr.exprId)) {
           attr
         } else {
-          Alias(new First(attr).toAggregateExpression(), attr.name)()
+          // Keep track of the generated aliases to avoid generating multiple aliases
+          // for the same attribute (in case the attribute is duplicated)
+          generatedAliasesMap.getOrElseUpdate(attr,
+            Alias(new First(attr).toAggregateExpression(), attr.name)())
         }
       }
       // SPARK-22951: Physical aggregate operators distinguishes global aggregation and grouping
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index 5d81e96a8e583..cb9577e050d04 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -265,4 +265,35 @@ class ReplaceOperatorSuite extends PlanTest {
       Join(basePlan, otherPlan, LeftAnti, Option(condition), JoinHint.NONE)).analyze
     comparePlans(result, correctAnswer)
   }
+
+  test("SPARK-46763: ReplaceDeduplicateWithAggregate non-grouping keys with duplicate attributes") {
+    val a = $"a".int
+    val b = $"b".int
+    val first_a = Alias(new First(a).toAggregateExpression(), a.name)()
+
+    val query = Project(
+      projectList = Seq(a, b),
+      Deduplicate(
+        keys = Seq(b),
+        child = Project(
+          projectList = Seq(a, a, b),
+          child = LocalRelation(Seq(a, b))
+        )
+      )
+    ).analyze
+
+    val result = Optimize.execute(query)
+    val correctAnswer = Project(
+        projectList = Seq(first_a.toAttribute, b),
+        Aggregate(
+            Seq(b),
+            Seq(first_a, first_a, b),
+            Project(
+              projectList = Seq(a, a, b),
+              child = LocalRelation(Seq(a, b))
+            )
+        )
+    ).analyze
+    comparePlans(result, correctAnswer)
+  }
 }

From 20da7c0f3c44b1b435171058da36d8cd59391ba8 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 23 Jan 2024 17:35:59 +0800
Subject: [PATCH 189/521] Revert "[SPARK-46417][SQL] Do not fail when calling
 hive.getTable and throwException is false"

This reverts commit 8abf9583ac2303765255299af3e843d8248f313f.
---
 .../scala/org/apache/spark/sql/hive/client/HiveShim.scala | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 7025e09ae9d9e..60ff9ec42f29d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -620,13 +620,7 @@ private[client] class Shim_v0_12 extends Shim with Logging {
       tableName: String,
       throwException: Boolean): Table = {
     recordHiveCall()
-    val table = try {
-      hive.getTable(dbName, tableName, throwException)
-    } catch {
-      // Hive may have bugs and still throw an exception even if `throwException` is false.
-      case e: HiveException if !throwException =>
-        null
-    }
+    val table = hive.getTable(dbName, tableName, throwException)
     if (table != null) {
       table.getTTable.setTableName(tableName)
       table.getTTable.setDbName(dbName)

From 05f7aa596c7b1c05704abfad94b1b1d3085c530e Mon Sep 17 00:00:00 2001
From: Tom van Bussel <tom.vanbussel@databricks.com>
Date: Tue, 23 Jan 2024 08:45:32 -0800
Subject: [PATCH 190/521] [SPARK-46794][SQL] Remove subqueries from LogicalRDD
 constraints

This PR modifies `LogicalRDD` to filter out all subqueries from its `constraints`.

Fixes a correctness bug. Spark can produce incorrect results when using a checkpointed `DataFrame` with a filter containing a scalar subquery. This subquery is included in the constraints of the resulting `LogicalRDD`, and may then be propagated as a filter when joining with the checkpointed `DataFrame`. This causes the subquery to be evaluated twice: once during checkpointing and once while evaluating the query. These two subquery evaluations may return different results, e.g. when the subquery contains a limit with an underspecified sort order.

No

Added a test to `DataFrameSuite`.

No

Closes #44833 from tomvanbussel/SPARK-46794.

Authored-by: Tom van Bussel <tom.vanbussel@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit d26e871136e0c6e1f84a25978319733a516b7b2e)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/execution/ExistingRDD.scala |  7 +++++++
 .../org/apache/spark/sql/DataFrameSuite.scala    | 16 +++++++++++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 3dcf0efaadd8f..3b49abcb1a866 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -150,6 +150,13 @@ case class LogicalRDD(
   }
 
   override lazy val constraints: ExpressionSet = originConstraints.getOrElse(ExpressionSet())
+    // Subqueries can have non-deterministic results even when they only contain deterministic
+    // expressions (e.g. consider a LIMIT 1 subquery without an ORDER BY). Propagating predicates
+    // containing a subquery causes the subquery to be executed twice (as the result of the subquery
+    // in the checkpoint computation cannot be reused), which could result in incorrect results.
+    // Therefore we assume that all subqueries are non-deterministic, and we do not expose any
+    // constraints that contain a subquery.
+    .filterNot(SubqueryExpression.hasSubquery)
 }
 
 object LogicalRDD extends Logging {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 2eba9f1810982..002719f06896d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LocalRelation, LogicalPlan, OneRowRelation, Statistics}
@@ -2258,6 +2258,20 @@ class DataFrameSuite extends QueryTest
     assert(newConstraints === newExpectedConstraints)
   }
 
+  test("SPARK-46794: exclude subqueries from LogicalRDD constraints") {
+    withTempDir { checkpointDir =>
+      val subquery =
+        new Column(ScalarSubquery(spark.range(10).selectExpr("max(id)").logicalPlan))
+      val df = spark.range(1000).filter($"id" === subquery)
+      assert(df.logicalPlan.constraints.exists(_.exists(_.isInstanceOf[ScalarSubquery])))
+
+      spark.sparkContext.setCheckpointDir(checkpointDir.getAbsolutePath)
+      val checkpointedDf = df.checkpoint()
+      assert(!checkpointedDf.logicalPlan.constraints
+        .exists(_.exists(_.isInstanceOf[ScalarSubquery])))
+    }
+  }
+
   test("SPARK-10656: completely support special chars") {
     val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.")
     checkAnswer(df.select(df("*")), Row(1, "a"))

From be7f1e9979c38b1358b0af2b358bacb0bd523c80 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 23 Jan 2024 16:38:45 -0800
Subject: [PATCH 191/521] [SPARK-46817][CORE] Fix `spark-daemon.sh` usage by
 adding `decommission` command

### What changes were proposed in this pull request?

This PR aims to fix `spark-daemon.sh` usage by adding `decommission` command.

### Why are the changes needed?

This was missed when SPARK-20628 added `decommission` command at Apache Spark 3.1.0. The command has been used like the following.

https://github.com/apache/spark/blob/0356ac00947282b1a0885ad7eaae1e25e43671fe/sbin/decommission-worker.sh#L41

### Does this PR introduce _any_ user-facing change?

No, this is only a change on usage message.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44856 from dongjoon-hyun/SPARK-46817.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 00a92d328576c39b04cfd0fdd8a30c5a9bc37e36)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 sbin/spark-daemon.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sbin/spark-daemon.sh b/sbin/spark-daemon.sh
index 3cfd5acfe2b56..28d205f03e0fa 100755
--- a/sbin/spark-daemon.sh
+++ b/sbin/spark-daemon.sh
@@ -31,7 +31,7 @@
 #   SPARK_NO_DAEMONIZE   If set, will run the proposed command in the foreground. It will not output a PID file.
 ##
 
-usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|status) <spark-command> <spark-instance-number> <args...>"
+usage="Usage: spark-daemon.sh [--config <conf-dir>] (start|stop|submit|decommission|status) <spark-command> <spark-instance-number> <args...>"
 
 # if no args specified, show usage
 if [ $# -le 1 ]; then

From 0956db6901bf03d2d948b23f00bcd6e74a0c251b Mon Sep 17 00:00:00 2001
From: zml1206 <zhuml1206@gmail.com>
Date: Wed, 24 Jan 2024 15:06:55 +0800
Subject: [PATCH 192/521] [SPARK-46590][SQL][FOLLOWUP] Update
 CoalesceShufflePartitions comments

### What changes were proposed in this pull request?
After #44661 ,In addition to Union, children of CartesianProduct, BroadcastHashJoin and BroadcastNestedLoopJoin can also be coalesced independently, update comments.

### Why are the changes needed?
Improve the readability and maintainability.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
N/A

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44854 from zml1206/SPARK-46590-FOLLOWUP.

Authored-by: zml1206 <zhuml1206@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit fe4f8eac3efee42d53f7f24763a59c82ef03d343)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/CoalesceShufflePartitions.scala          | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
index 26e5ac649dbb1..db4a6b7dcf2eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/CoalesceShufflePartitions.scala
@@ -66,9 +66,9 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
       }
     }
 
-    // Sub-plans under the Union operator can be coalesced independently, so we can divide them
-    // into independent "coalesce groups", and all shuffle stages within each group have to be
-    // coalesced together.
+    // Sub-plans under the Union/CartesianProduct/BroadcastHashJoin/BroadcastNestedLoopJoin
+    // operator can be coalesced independently, so we can divide them into independent
+    // "coalesce groups", and all shuffle stages within each group have to be coalesced together.
     val coalesceGroups = collectCoalesceGroups(plan)
 
     // Divide minimum task parallelism among coalesce groups according to their data sizes.
@@ -137,8 +137,9 @@ case class CoalesceShufflePartitions(session: SparkSession) extends AQEShuffleRe
   }
 
   /**
-   * Gather all coalesce-able groups such that the shuffle stages in each child of a Union operator
-   * are in their independent groups if:
+   * Gather all coalesce-able groups such that the shuffle stages in each child of a
+   * Union/CartesianProduct/BroadcastHashJoin/BroadcastNestedLoopJoin operator are in their
+   * independent groups if:
    * 1) all leaf nodes of this child are exchange stages; and
    * 2) all these shuffle stages support coalescing.
    */

From ef33b9c50806475f287267c05278aeda3645abac Mon Sep 17 00:00:00 2001
From: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Date: Wed, 24 Jan 2024 21:35:33 +0900
Subject: [PATCH 193/521] [SPARK-46796][SS] Ensure the correct remote files
 (mentioned in metadata.zip) are used on RocksDB version load

This PR ensures that RocksDB loads do not run into SST file Version ID mismatch issue. RocksDB has added validation to ensure exact same SST file is used during database load from snapshot. Current streaming state suffers from certain edge cases where this condition is violated resulting in state load failure.

The changes introduced are:

1. Ensure that the local SST file is exactly the same DFS file (as per mapping in metadata.zip). We keep track of the DFS file path for a local SST file, and re download the SST file in case DFS file has a different UUID in metadata zip.
2. Reset lastSnapshotVersion in RocksDB when Rocks DB is loaded. Changelog checkpoint relies on this version for future snapshots. Currently, if a older version is reloaded we were not uploading snapshots as lastSnapshotVersion was pointing to a higher snapshot of a cleanup database.

We need to ensure that the correct SST files are used on executor during RocksDB load as per mapping in metadata.zip. With current implementation, its possible that the executor uses a SST file (with a different UUID) from a older version which is not the exact file mapped in the metadata.zip. This can cause version Id mismatch errors while loading RocksDB leading to streaming query failures.

See https://issues.apache.org/jira/browse/SPARK-46796 for failure scenarios.

No

Added exhaustive unit testcases covering the scenarios.

No

Closes #44837 from sahnib/SPARK-46796.

Authored-by: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit f25ebe52b9b84ece9b3c5ae30b83eaaef52ec55b)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../execution/streaming/state/RocksDB.scala   |   3 +
 .../streaming/state/RocksDBFileManager.scala  |  92 +++--
 .../streaming/state/RocksDBSuite.scala        | 314 +++++++++++++++++-
 3 files changed, 372 insertions(+), 37 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 2398b7780726a..0c9738a6b0817 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -151,6 +151,8 @@ class RocksDB(
         val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion, workingDir)
         loadedVersion = latestSnapshotVersion
 
+        // reset last snapshot version
+        lastSnapshotVersion = 0L
         openDB()
 
         numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
@@ -191,6 +193,7 @@ class RocksDB(
    */
   private def replayChangelog(endVersion: Long): Unit = {
     for (v <- loadedVersion + 1 to endVersion) {
+      logInfo(s"replaying changelog from version $loadedVersion -> $endVersion")
       var changelogReader: StateStoreChangelogReader = null
       try {
         changelogReader = fileManager.getChangelogReader(v)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index faf9cd701aeca..300a3b8137b4c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -132,6 +132,15 @@ class RocksDBFileManager(
   import RocksDBImmutableFile._
 
   private val versionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
+
+
+  // used to keep a mapping of the exact Dfs file that was used to create a local SST file.
+  // The reason this is a separate map because versionToRocksDBFiles can contain multiple similar
+  // SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst in v1 and
+  // 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID compatibility
+  // across SST files and RocksDB manifest.
+  private[sql] val localFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
+
   private lazy val fm = CheckpointFileManager.create(new Path(dfsRootDir), hadoopConf)
   private val fs = new Path(dfsRootDir).getFileSystem(hadoopConf)
   private val onlyZipFiles = new PathFilter {
@@ -213,6 +222,7 @@ class RocksDBFileManager(
     versionToRocksDBFiles.keySet().removeIf(_ >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
+      localFilesToDfsFiles.clear()
       localDir.mkdirs()
       RocksDBCheckpointMetadata(Seq.empty, 0)
     } else {
@@ -449,44 +459,54 @@ class RocksDBFileManager(
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
     logInfo(s"Saving RocksDB files to DFS for $version")
-    val prevFilesToSizes = versionToRocksDBFiles.asScala.filterKeys(_ < version)
-      .values.flatten.map { f =>
-      f.localFileName -> f
-    }.toMap
 
     var bytesCopied = 0L
     var filesCopied = 0L
     var filesReused = 0L
 
     val immutableFiles = localFiles.map { localFile =>
-      prevFilesToSizes
-        .get(localFile.getName)
-        .filter(_.isSameFile(localFile))
-        .map { reusable =>
-          filesReused += 1
-          reusable
-        }.getOrElse {
-          val localFileName = localFile.getName
-          val dfsFileName = newDFSFileName(localFileName)
-          val dfsFile = dfsFilePath(dfsFileName)
-          // Note: The implementation of copyFromLocalFile() closes the output stream when there is
-          // any exception while copying. So this may generate partial files on DFS. But that is
-          // okay because until the main [version].zip file is written, those partial files are
-          // not going to be used at all. Eventually these files should get cleared.
-          fs.copyFromLocalFile(
-            new Path(localFile.getAbsoluteFile.toURI), dfsFile)
-          val localFileSize = localFile.length()
-          logInfo(s"Copied $localFile to $dfsFile - $localFileSize bytes")
-          filesCopied += 1
-          bytesCopied += localFileSize
-
-          RocksDBImmutableFile(localFile.getName, dfsFileName, localFileSize)
-        }
+      val existingDfsFile = localFilesToDfsFiles.asScala.get(localFile.getName)
+      if (existingDfsFile.isDefined && existingDfsFile.get.sizeBytes == localFile.length()) {
+        val dfsFile = existingDfsFile.get
+        filesReused += 1
+        logInfo(s"reusing file $dfsFile for $localFile")
+        RocksDBImmutableFile(localFile.getName, dfsFile.dfsFileName, dfsFile.sizeBytes)
+      } else {
+        val localFileName = localFile.getName
+        val dfsFileName = newDFSFileName(localFileName)
+        val dfsFile = dfsFilePath(dfsFileName)
+        // Note: The implementation of copyFromLocalFile() closes the output stream when there is
+        // any exception while copying. So this may generate partial files on DFS. But that is
+        // okay because until the main [version].zip file is written, those partial files are
+        // not going to be used at all. Eventually these files should get cleared.
+        fs.copyFromLocalFile(
+          new Path(localFile.getAbsoluteFile.toURI), dfsFile)
+        val localFileSize = localFile.length()
+        logInfo(s"Copied $localFile to $dfsFile - $localFileSize bytes")
+        filesCopied += 1
+        bytesCopied += localFileSize
+
+        val immutableDfsFile = RocksDBImmutableFile(localFile.getName, dfsFileName, localFileSize)
+        localFilesToDfsFiles.put(localFileName, immutableDfsFile)
+
+        immutableDfsFile
+      }
     }
     logInfo(s"Copied $filesCopied files ($bytesCopied bytes) from local to" +
       s" DFS for version $version. $filesReused files reused without copying.")
     versionToRocksDBFiles.put(version, immutableFiles)
 
+    // clean up deleted SST files from the localFilesToDfsFiles Map
+    val currentLocalFiles = localFiles.map(_.getName).toSet
+    val mappingsToClean = localFilesToDfsFiles.asScala
+      .keys
+      .filterNot(currentLocalFiles.contains)
+
+    mappingsToClean.foreach { f =>
+      logInfo(s"cleaning $f from the localFilesToDfsFiles map")
+      localFilesToDfsFiles.remove(f)
+    }
+
     saveCheckpointMetrics = RocksDBFileManagerMetrics(
       bytesCopied = bytesCopied,
       filesCopied = filesCopied,
@@ -506,11 +526,22 @@ class RocksDBFileManager(
     // Delete unnecessary local immutable files
     listRocksDBFiles(localDir)._1
       .foreach { existingFile =>
-        val isSameFile =
-          requiredFileNameToFileDetails.get(existingFile.getName).exists(_.isSameFile(existingFile))
+        val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
+        val prevDfsFile = localFilesToDfsFiles.asScala.get(existingFile.getName)
+        val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
+          requiredFile.get.dfsFileName == prevDfsFile.get.dfsFileName &&
+            existingFile.length() == requiredFile.get.sizeBytes
+        } else {
+          false
+        }
+
         if (!isSameFile) {
           existingFile.delete()
-          logInfo(s"Deleted local file $existingFile")
+          localFilesToDfsFiles.remove(existingFile.getName)
+          logInfo(s"Deleted local file $existingFile with size ${existingFile.length()} mapped" +
+            s" to previous dfsFile ${prevDfsFile.getOrElse("null")}")
+        } else {
+          logInfo(s"reusing $prevDfsFile present at $existingFile for $requiredFile")
         }
       }
 
@@ -536,6 +567,7 @@ class RocksDBFileManager(
         }
         filesCopied += 1
         bytesCopied += localFileSize
+        localFilesToDfsFiles.put(localFileName, file)
         logInfo(s"Copied $dfsFile to $localFile - $localFileSize bytes")
       } else {
         filesReused += 1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 91dd858220717..04b11dfe43f0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -24,16 +24,36 @@ import scala.language.implicitConversions
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
 import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.execution.streaming.CreateAtomicTestManager
+import org.apache.spark.sql.execution.streaming.{CreateAtomicTestManager, FileSystemBasedCheckpointFileManager}
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.{CancellableFSDataOutputStream, RenameBasedFSDataOutputStream}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.STREAMING_CHECKPOINT_FILE_MANAGER_CLASS
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.tags.SlowSQLTest
 import org.apache.spark.util.{ThreadUtils, Utils}
 
+class NoOverwriteFileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends FileSystemBasedCheckpointFileManager(path, hadoopConf) {
+
+  override def createAtomic(path: Path,
+                            overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
+    new RenameBasedFSDataOutputStream(this, path, overwriteIfPossible)
+  }
+
+  override def renameTempFile(srcPath: Path, dstPath: Path,
+                              overwriteIfPossible: Boolean): Unit = {
+    if (!fs.exists(dstPath)) {
+      // only write if a file does not exist at this location
+      super.renameTempFile(srcPath, dstPath, overwriteIfPossible)
+    }
+  }
+}
+
 trait RocksDBStateStoreChangelogCheckpointingTestUtil {
   val rocksdbChangelogCheckpointingConfKey: String = RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX +
     ".changelogCheckpointing.enabled"
@@ -666,19 +686,19 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       // Save SAME version again with different checkpoint files and load back again to verify
       // whether files were overwritten.
       val cpFiles1_ = Seq(
-        "sst-file1.sst" -> 10, // same SST file as before, but same version, so should get copied
+        "sst-file1.sst" -> 10, // same SST file as before, this should get reused
         "sst-file2.sst" -> 25, // new SST file with same name as before, but different length
         "sst-file3.sst" -> 30, // new SST file
         "other-file1" -> 100, // same non-SST file as before, should not get copied
         "other-file2" -> 210, // new non-SST file with same name as before, but different length
         "other-file3" -> 300, // new non-SST file
-        "archive/00001.log" -> 1000, // same log file as before and version, so should get copied
+        "archive/00001.log" -> 1000, // same log file as before, this should get reused
         "archive/00002.log" -> 2500, // new log file with same name as before, but different length
         "archive/00003.log" -> 3000 // new log file
       )
       saveCheckpointFiles(fileManager, cpFiles1_, version = 1, numKeys = 1001)
-      assert(numRemoteSSTFiles === 5, "shouldn't copy same files again") // 2 old + 3 new SST files
-      assert(numRemoteLogFiles === 5, "shouldn't copy same files again") // 2 old + 3 new log files
+      assert(numRemoteSSTFiles === 4, "shouldn't copy same files again") // 2 old + 2 new SST files
+      assert(numRemoteLogFiles === 4, "shouldn't copy same files again") // 2 old + 2 new log files
       loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, cpFiles1_, 1001)
 
       // Save another version and verify
@@ -688,8 +708,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00004.log" -> 4000
       )
       saveCheckpointFiles(fileManager, cpFiles2, version = 2, numKeys = 1501)
-      assert(numRemoteSSTFiles === 6) // 1 new file over earlier 5 files
-      assert(numRemoteLogFiles === 6) // 1 new file over earlier 5 files
+      assert(numRemoteSSTFiles === 5) // 1 new file over earlier 4 files
+      assert(numRemoteLogFiles === 5) // 1 new file over earlier 4 files
       loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 2, cpFiles2, 1501)
 
       // Loading an older version should work
@@ -1152,6 +1172,286 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  test("time travel - validate successful RocksDB load") {
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf) { db =>
+      for (version <- 0 to 1) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 2.zip
+      db.doMaintenance()
+      for (version <- Seq(2)) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 3.zip
+      db.doMaintenance()
+      // simulate db in another executor that override the zip file
+      withDB(remoteDir, conf = conf) { db1 =>
+        for (version <- 0 to 1) {
+          db1.load(version)
+          db1.put(version.toString, version.toString)
+          db1.commit()
+        }
+        db1.doMaintenance()
+      }
+      db.load(2)
+      for (version <- Seq(2)) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 3.zip
+      db.doMaintenance()
+      // rollback to version 2
+      db.load(2)
+    }
+  }
+
+  test("time travel 2 - validate successful RocksDB load") {
+    Seq(1, 2).map(minDeltasForSnapshot => {
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = minDeltasForSnapshot,
+        compactOnCommit = false)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      withDB(remoteDir, conf = conf) { db =>
+        for (version <- 0 to 1) {
+          db.load(version)
+          db.put(version.toString, version.toString)
+          db.commit()
+        }
+        // upload snapshot 2.zip
+        db.doMaintenance()
+        for (version <- 2 to 3) {
+          db.load(version)
+          db.put(version.toString, version.toString)
+          db.commit()
+        }
+        db.load(0)
+        // simulate db in another executor that override the zip file
+        withDB(remoteDir, conf = conf) { db1 =>
+          for (version <- 0 to 1) {
+            db1.load(version)
+            db1.put(version.toString, version.toString)
+            db1.commit()
+          }
+          db1.doMaintenance()
+        }
+        for (version <- 2 to 3) {
+          db.load(version)
+          db.put(version.toString, version.toString)
+          db.commit()
+        }
+        // upload snapshot 4.zip
+        db.doMaintenance()
+      }
+      withDB(remoteDir, version = 4, conf = conf) { db =>
+      }
+    })
+  }
+
+  test("time travel 3 - validate successful RocksDB load") {
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 0, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf) { db =>
+      for (version <- 0 to 2) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 2.zip
+      db.doMaintenance()
+      for (version <- 1 to 3) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+      // upload snapshot 4.zip
+      db.doMaintenance()
+    }
+
+    withDB(remoteDir, version = 4, conf = conf) { db =>
+    }
+  }
+
+  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is not overwritten - scenario 1") {
+    val fmClass = "org.apache.spark.sql.execution.streaming.state." +
+      "NoOverwriteFileSystemBasedCheckpointFileManager"
+    withTempDir { dir =>
+      val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
+      val hadoopConf = new Configuration()
+      hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+          // commit version 1 via db1
+          db1.load(0)
+          db1.put("a", "1")
+          db1.put("b", "1")
+
+          db1.commit()
+
+          // commit version 1 via db2
+          db2.load(0)
+          db2.put("a", "1")
+          db2.put("b", "1")
+
+          db2.commit()
+
+          // commit version 2 via db2
+          db2.load(1)
+          db2.put("a", "2")
+          db2.put("b", "2")
+
+          db2.commit()
+
+          // reload version 1, this should succeed
+          db2.load(1)
+          db1.load(1)
+
+          // reload version 2, this should succeed
+          db2.load(2)
+          db1.load(2)
+        }
+      }
+    }
+  }
+
+  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is overwritten - scenario 1") {
+    withTempDir { dir =>
+      val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
+      val hadoopConf = new Configuration()
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+          // commit version 1 via db1
+          db1.load(0)
+          db1.put("a", "1")
+          db1.put("b", "1")
+
+          db1.commit()
+
+          // commit version 1 via db2
+          db2.load(0)
+          db2.put("a", "1")
+          db2.put("b", "1")
+
+          db2.commit()
+
+          // commit version 2 via db2
+          db2.load(1)
+          db2.put("a", "2")
+          db2.put("b", "2")
+
+          db2.commit()
+
+          // reload version 1, this should succeed
+          db2.load(1)
+          db1.load(1)
+
+          // reload version 2, this should succeed
+          db2.load(2)
+          db1.load(2)
+        }
+      }
+    }
+  }
+
+  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is not overwritten - scenario 2") {
+    val fmClass = "org.apache.spark.sql.execution.streaming.state." +
+      "NoOverwriteFileSystemBasedCheckpointFileManager"
+    withTempDir { dir =>
+      val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
+      val hadoopConf = new Configuration()
+      hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+          // commit version 1 via db2
+          db2.load(0)
+          db2.put("a", "1")
+          db2.put("b", "1")
+
+          db2.commit()
+
+          // commit version 1 via db1
+          db1.load(0)
+          db1.put("a", "1")
+          db1.put("b", "1")
+
+          db1.commit()
+
+          // commit version 2 via db2
+          db2.load(1)
+          db2.put("a", "2")
+          db2.put("b", "2")
+
+          db2.commit()
+
+          // reload version 1, this should succeed
+          db2.load(1)
+          db1.load(1)
+
+          // reload version 2, this should succeed
+          db2.load(2)
+          db1.load(2)
+        }
+      }
+    }
+  }
+
+  test("validate Rocks DB SST files do not have a VersionIdMismatch" +
+    " when metadata file is overwritten - scenario 2") {
+    withTempDir { dir =>
+      val dbConf = RocksDBConf(StateStoreConf(new SQLConf()))
+      val hadoopConf = new Configuration()
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db1 =>
+        withDB(remoteDir, conf = dbConf, hadoopConf = hadoopConf) { db2 =>
+          // commit version 1 via db2
+          db2.load(0)
+          db2.put("a", "1")
+          db2.put("b", "1")
+
+          db2.commit()
+
+          // commit version 1 via db1
+          db1.load(0)
+          db1.put("a", "1")
+          db1.put("b", "1")
+
+          db1.commit()
+
+          // commit version 2 via db2
+          db2.load(1)
+          db2.put("a", "2")
+          db2.put("b", "2")
+
+          db2.commit()
+
+          // reload version 1, this should succeed
+          db2.load(1)
+          db1.load(1)
+
+          // reload version 2, this should succeed
+          db2.load(2)
+          db1.load(2)
+        }
+      }
+    }
+  }
+
   private def sqlConf = SQLConf.get.clone()
 
   private def dbConf = RocksDBConf(StateStoreConf(sqlConf))

From 125b2f87d453a16325f24e7382707f2b365bba14 Mon Sep 17 00:00:00 2001
From: fred-db <fredrik.klauss@databricks.com>
Date: Thu, 25 Jan 2024 08:34:37 -0800
Subject: [PATCH 194/521] [SPARK-46861][CORE] Avoid Deadlock in DAGScheduler

* The DAGScheduler could currently run into a deadlock with another thread if both access the partitions of the same RDD at the same time.
* To make progress in getCacheLocs, we require both exclusive access to the RDD partitions and the location cache. We first lock on the location cache, and then on the RDD.
* When accessing partitions of an RDD, the RDD first acquires exclusive access on the partitions, and then might acquire exclusive access on the location cache.
* If thread 1 is able to acquire access on the RDD, while thread 2 holds the access to the location cache, we can run into a deadlock situation.
* To fix this, acquire locks in the same order. Change the DAGScheduler to first acquire the lock on the RDD, and then the lock on the location cache.

* This is a deadlock you can run into, which can prevent any progress on the cluster.

* No

* Unit test that reproduces the issue.

No

Closes #44882 from fred-db/fix-deadlock.

Authored-by: fred-db <fredrik.klauss@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 617014cc92d933c70c9865a578fceb265883badd)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/rdd/RDD.scala | 11 ++++--
 .../apache/spark/scheduler/DAGScheduler.scala | 31 ++++++++-------
 .../spark/scheduler/DAGSchedulerSuite.scala   | 38 ++++++++++++++++++-
 3 files changed, 62 insertions(+), 18 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index a21d2ae773966..f695b10202758 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -223,14 +223,17 @@ abstract class RDD[T: ClassTag](
    * not use `this` because RDDs are user-visible, so users might have added their own locking on
    * RDDs; sharing that could lead to a deadlock.
    *
-   * One thread might hold the lock on many of these, for a chain of RDD dependencies; but
-   * because DAGs are acyclic, and we only ever hold locks for one path in that DAG, there is no
-   * chance of deadlock.
+   * One thread might hold the lock on many of these, for a chain of RDD dependencies. Deadlocks
+   * are possible if we try to lock another resource while holding the stateLock,
+   * and the lock acquisition sequence of these locks is not guaranteed to be the same.
+   * This can lead lead to a deadlock as one thread might first acquire the stateLock,
+   * and then the resource,
+   * while another thread might first acquire the resource, and then the stateLock.
    *
    * Executors may reference the shared fields (though they should never mutate them,
    * that only happens on the driver).
    */
-  private val stateLock = new Serializable {}
+  private[spark] val stateLock = new Serializable {}
 
   // Our dependencies and partitions will be gotten by calling subclass's methods below, and will
   // be overwritten when we're checkpointed
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index d8adaae19b90d..89d16e5793482 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -174,6 +174,9 @@ private[spark] class DAGScheduler(
    * locations where that RDD partition is cached.
    *
    * All accesses to this map should be guarded by synchronizing on it (see SPARK-4454).
+   * If you need to access any RDD while synchronizing on the cache locations,
+   * first synchronize on the RDD, and then synchronize on this map to avoid deadlocks. The RDD
+   * could try to access the cache locations after synchronizing on the RDD.
    */
   private val cacheLocs = new HashMap[Int, IndexedSeq[Seq[TaskLocation]]]
 
@@ -420,22 +423,24 @@ private[spark] class DAGScheduler(
   }
 
   private[scheduler]
-  def getCacheLocs(rdd: RDD[_]): IndexedSeq[Seq[TaskLocation]] = cacheLocs.synchronized {
-    // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times
-    if (!cacheLocs.contains(rdd.id)) {
-      // Note: if the storage level is NONE, we don't need to get locations from block manager.
-      val locs: IndexedSeq[Seq[TaskLocation]] = if (rdd.getStorageLevel == StorageLevel.NONE) {
-        IndexedSeq.fill(rdd.partitions.length)(Nil)
-      } else {
-        val blockIds =
-          rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId]
-        blockManagerMaster.getLocations(blockIds).map { bms =>
-          bms.map(bm => TaskLocation(bm.host, bm.executorId))
+  def getCacheLocs(rdd: RDD[_]): IndexedSeq[Seq[TaskLocation]] = rdd.stateLock.synchronized {
+    cacheLocs.synchronized {
+      // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times
+      if (!cacheLocs.contains(rdd.id)) {
+        // Note: if the storage level is NONE, we don't need to get locations from block manager.
+        val locs: IndexedSeq[Seq[TaskLocation]] = if (rdd.getStorageLevel == StorageLevel.NONE) {
+          IndexedSeq.fill(rdd.partitions.length)(Nil)
+        } else {
+          val blockIds =
+            rdd.partitions.indices.map(index => RDDBlockId(rdd.id, index)).toArray[BlockId]
+          blockManagerMaster.getLocations(blockIds).map { bms =>
+            bms.map(bm => TaskLocation(bm.host, bm.executorId))
+          }
         }
+        cacheLocs(rdd.id) = locs
       }
-      cacheLocs(rdd.id) = locs
+      cacheLocs(rdd.id)
     }
-    cacheLocs(rdd.id)
   }
 
   private def clearCacheLocs(): Unit = cacheLocs.synchronized {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 9b7c5d5ace314..1818bf9b152d3 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -48,7 +48,7 @@ import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException}
 import org.apache.spark.storage.{BlockId, BlockManager, BlockManagerId, BlockManagerMaster}
-import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, Clock, LongAccumulator, SystemClock, Utils}
+import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, Clock, LongAccumulator, SystemClock, ThreadUtils, Utils}
 
 class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
   extends DAGSchedulerEventProcessLoop(dagScheduler) {
@@ -594,6 +594,42 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertDataStructuresEmpty()
   }
 
+  // Note that this test is NOT perfectly reproducible when there is a deadlock as it uses
+  // Thread.sleep, but it should never fail / flake when there is no deadlock.
+  // If this test starts to flake, this shows that there is a deadlock!
+  test("No Deadlock between getCacheLocs and CoalescedRDD") {
+    val rdd = sc.parallelize(1 to 10, numSlices = 10)
+    val coalescedRDD = rdd.coalesce(2)
+    val executionContext = ThreadUtils.newDaemonFixedThreadPool(
+      nThreads = 2, "test-getCacheLocs")
+    // Used to only make progress on getCacheLocs after we acquired the lock to the RDD.
+    val rddLock = new java.util.concurrent.Semaphore(0)
+    val partitionsFuture = executionContext.submit(new Runnable {
+      override def run(): Unit = {
+        coalescedRDD.stateLock.synchronized {
+          rddLock.release(1)
+          // Try to access the partitions of the coalescedRDD. This will cause a call to
+          // getCacheLocs internally.
+          Thread.sleep(5000)
+          coalescedRDD.partitions
+        }
+      }
+    })
+    val getCacheLocsFuture = executionContext.submit(new Runnable {
+      override def run(): Unit = {
+        rddLock.acquire()
+        // Access the cache locations.
+        // If the partition location cache is locked before the stateLock is locked,
+        // we'll run into a deadlock.
+        sc.dagScheduler.getCacheLocs(coalescedRDD)
+      }
+    })
+    // If any of the futures throw a TimeOutException, this shows that there is a deadlock between
+    // getCacheLocs and accessing partitions of an RDD.
+    getCacheLocsFuture.get(120, TimeUnit.SECONDS)
+    partitionsFuture.get(120, TimeUnit.SECONDS)
+  }
+
   test("All shuffle files on the storage endpoint should be cleaned up when it is lost") {
     conf.set(config.SHUFFLE_SERVICE_ENABLED.key, "true")
     conf.set("spark.files.fetchFailure.unRegisterOutputOnHost", "true")

From e5a654e818b4698260807a081e5cf3d71480ac13 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 25 Jan 2024 22:35:38 -0800
Subject: [PATCH 195/521] [SPARK-46855][INFRA][3.5] Add `sketch` to the
 dependencies of the `catalyst` in `module.py`

### What changes were proposed in this pull request?
This pr add `sketch` to the dependencies of the `catalyst` module in `module.py` due to `sketch` is direct dependency of `catalyst` module.

### Why are the changes needed?
Ensure that when modifying the `sketch` module, both `catalyst` and cascading modules will trigger tests.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #44893 from LuciferYang/SPARK-46855-35.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 dev/sparktestsupport/modules.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 33d253a47ea07..d29fc8726018d 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -168,6 +168,15 @@ def __hash__(self):
     ],
 )
 
+sketch = Module(
+    name="sketch",
+    dependencies=[tags],
+    source_file_regexes=[
+        "common/sketch/",
+    ],
+    sbt_test_goals=["sketch/test"],
+)
+
 core = Module(
     name="core",
     dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher],
@@ -181,7 +190,7 @@ def __hash__(self):
 
 catalyst = Module(
     name="catalyst",
-    dependencies=[tags, core],
+    dependencies=[tags, sketch, core],
     source_file_regexes=[
         "sql/catalyst/",
     ],
@@ -295,15 +304,6 @@ def __hash__(self):
     ],
 )
 
-sketch = Module(
-    name="sketch",
-    dependencies=[tags],
-    source_file_regexes=[
-        "common/sketch/",
-    ],
-    sbt_test_goals=["sketch/test"],
-)
-
 graphx = Module(
     name="graphx",
     dependencies=[tags, core],

From cf4e8672dd1a4e6309bb530191be7d92a793cb28 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Fri, 26 Jan 2024 11:02:15 +0300
Subject: [PATCH 196/521] [SPARK-46862][SQL] Disable CSV column pruning in the
 multi-line mode

### What changes were proposed in this pull request?
In the PR, I propose to disable the column pruning feature in the CSV datasource for the `multiLine` mode.

### Why are the changes needed?
To workaround the issue in the `uniVocity` parser used by the CSV datasource: https://github.com/uniVocity/univocity-parsers/issues/529

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *CSVv1Suite"
$ build/sbt "test:testOnly *CSVv2Suite"
$ build/sbt "test:testOnly *CSVLegacyTimeParserSuite"
$ build/sbt "testOnly *.CsvFunctionsSuite"
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44872 from MaxGekk/csv-disable-column-pruning.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 829e742df8251c6f5e965cb08ad454ac3ee1a389)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/catalyst/csv/CSVOptions.scala   | 10 ++++++++
 .../sql/catalyst/csv/UnivocityParser.scala    |  2 +-
 .../v2/csv/CSVPartitionReaderFactory.scala    |  2 +-
 .../execution/datasources/csv/CSVSuite.scala  | 25 ++++++++++++++++++-
 4 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 845c815c5648b..c5a6bf5076dec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -277,6 +277,15 @@ class CSVOptions(
   val unescapedQuoteHandling: UnescapedQuoteHandling = UnescapedQuoteHandling.valueOf(parameters
     .getOrElse(UNESCAPED_QUOTE_HANDLING, "STOP_AT_DELIMITER").toUpperCase(Locale.ROOT))
 
+  /**
+   * The column pruning feature can be enabled either via the CSV option `columnPruning` or
+   * in non-multiline mode via initialization of CSV options by the SQL config:
+   * `spark.sql.csv.parser.columnPruning.enabled`.
+   * The feature is disabled in the `multiLine` mode because of the issue:
+   * https://github.com/uniVocity/univocity-parsers/issues/529
+   */
+  val isColumnPruningEnabled: Boolean = getBool(COLUMN_PRUNING, !multiLine && columnPruning)
+
   def asWriterSettings: CsvWriterSettings = {
     val writerSettings = new CsvWriterSettings()
     val format = writerSettings.getFormat
@@ -376,4 +385,5 @@ object CSVOptions extends DataSourceOptions {
   val SEP = "sep"
   val DELIMITER = "delimiter"
   newOption(SEP, DELIMITER)
+  val COLUMN_PRUNING = newOption("columnPruning")
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index b99ee630d4b22..804c5d358ad6c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -72,7 +72,7 @@ class UnivocityParser(
   // positions. Generally assigned by input configuration options, except when input column(s) have
   // default values, in which case we omit the explicit indexes in order to know how many tokens
   // were present in each line instead.
-  private def columnPruning: Boolean = options.columnPruning &&
+  private def columnPruning: Boolean = options.isColumnPruningEnabled &&
     !requiredSchema.exists(_.metadata.contains(EXISTS_DEFAULT_COLUMN_METADATA_KEY))
 
   // When column pruning is enabled, the parser only parses the required columns based on
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index 37f6ae4aaa9fc..cef5a71ca9c60 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -58,7 +58,7 @@ case class CSVPartitionReaderFactory(
       actualReadDataSchema,
       options,
       filters)
-    val schema = if (options.columnPruning) actualReadDataSchema else actualDataSchema
+    val schema = if (options.isColumnPruningEnabled) actualReadDataSchema else actualDataSchema
     val isStartOfFile = file.start == 0
     val headerChecker = new CSVHeaderChecker(
       schema, options, source = s"CSV file: ${file.urlEncodedPath}", isStartOfFile)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 78266acfd7de9..6690bf101fa7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -2079,6 +2079,7 @@ abstract class CSVSuite
             .option("header", true)
             .option("enforceSchema", false)
             .option("multiLine", multiLine)
+            .option("columnPruning", true)
             .load(dir)
             .select("columnA"),
             Row("a"))
@@ -2089,6 +2090,7 @@ abstract class CSVSuite
             .option("header", true)
             .option("enforceSchema", false)
             .option("multiLine", multiLine)
+            .option("columnPruning", true)
             .load(dir)
             .count() === 1L)
         }
@@ -3151,7 +3153,7 @@ abstract class CSVSuite
   }
 
   test("SPARK-40667: validate CSV Options") {
-    assert(CSVOptions.getAllOptions.size == 38)
+    assert(CSVOptions.getAllOptions.size == 39)
     // Please add validation on any new CSV options here
     assert(CSVOptions.isValidOption("header"))
     assert(CSVOptions.isValidOption("inferSchema"))
@@ -3191,6 +3193,7 @@ abstract class CSVSuite
     assert(CSVOptions.isValidOption("codec"))
     assert(CSVOptions.isValidOption("sep"))
     assert(CSVOptions.isValidOption("delimiter"))
+    assert(CSVOptions.isValidOption("columnPruning"))
     // Please add validation on any new parquet options with alternative here
     assert(CSVOptions.getAlternativeOption("sep").contains("delimiter"))
     assert(CSVOptions.getAlternativeOption("delimiter").contains("sep"))
@@ -3200,6 +3203,26 @@ abstract class CSVSuite
     assert(CSVOptions.getAlternativeOption("codec").contains("compression"))
     assert(CSVOptions.getAlternativeOption("preferDate").isEmpty)
   }
+
+  test("SPARK-46862: column pruning in the multi-line mode") {
+    val data =
+      """"jobID","Name","City","Active"
+        |"1","DE","","Yes"
+        |"5",",","",","
+        |"3","SA","","No"
+        |"10","abcd""efgh"" \ndef","",""
+        |"8","SE","","No"""".stripMargin
+
+    withTempPath { path =>
+      Files.write(path.toPath, data.getBytes(StandardCharsets.UTF_8))
+      val df = spark.read
+        .option("multiline", "true")
+        .option("header", "true")
+        .option("escape", "\"")
+        .csv(path.getCanonicalPath)
+      assert(df.count() === 5)
+    }
+  }
 }
 
 class CSVv1Suite extends CSVSuite {

From a2854ba5d852e2001b96636a8964494c45fc27d3 Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Sat, 27 Jan 2024 19:22:52 +0300
Subject: [PATCH 197/521] [SPARK-46862][SQL][FOLLOWUP] Fix column pruning
 without schema enforcing in V1 CSV datasource

### What changes were proposed in this pull request?
In the PR, I propose to invoke `CSVOptons.isColumnPruningEnabled` introduced by https://github.com/apache/spark/pull/44872 while matching of CSV header to a schema in the V1 CSV datasource.

### Why are the changes needed?
To fix the failure when column pruning happens and a schema is not enforced:
```scala
scala> spark.read.
     | option("multiLine", true).
     | option("header", true).
     | option("escape", "\"").
     | option("enforceSchema", false).
     | csv("/Users/maximgekk/tmp/es-939111-data.csv").
     | count()
24/01/27 12:43:14 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 3)
java.lang.IllegalArgumentException: Number of column in CSV header is not equal to number of fields in the schema:
 Header length: 4, schema size: 0
CSV file: file:///Users/maximgekk/tmp/es-939111-data.csv
```

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *CSVv1Suite"
$ build/sbt "test:testOnly *CSVv2Suite"
$ build/sbt "test:testOnly *CSVLegacyTimeParserSuite"
$ build/sbt "testOnly *.CsvFunctionsSuite"
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44910 from MaxGekk/check-header-column-pruning.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit bc51c9fea3645c6ae1d9e1e83b0f94f8b849be20)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../execution/datasources/csv/CSVFileFormat.scala |  6 +++---
 .../sql/execution/datasources/csv/CSVSuite.scala  | 15 +++++++++------
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 069ad9562a7d5..0ff96f073f03b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -100,12 +100,12 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = {
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-    val columnPruning = sparkSession.sessionState.conf.csvColumnPruning
     val parsedOptions = new CSVOptions(
       options,
-      columnPruning,
+      sparkSession.sessionState.conf.csvColumnPruning,
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
+    val isColumnPruningEnabled = parsedOptions.isColumnPruningEnabled
 
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
@@ -125,7 +125,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
         actualRequiredSchema,
         parsedOptions,
         actualFilters)
-      val schema = if (columnPruning) actualRequiredSchema else actualDataSchema
+      val schema = if (isColumnPruningEnabled) actualRequiredSchema else actualDataSchema
       val isStartOfFile = file.start == 0
       val headerChecker = new CSVHeaderChecker(
         schema, parsedOptions, source = s"CSV file: ${file.urlEncodedPath}", isStartOfFile)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 6690bf101fa7a..a91adb787838e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -3215,12 +3215,15 @@ abstract class CSVSuite
 
     withTempPath { path =>
       Files.write(path.toPath, data.getBytes(StandardCharsets.UTF_8))
-      val df = spark.read
-        .option("multiline", "true")
-        .option("header", "true")
-        .option("escape", "\"")
-        .csv(path.getCanonicalPath)
-      assert(df.count() === 5)
+      Seq(true, false).foreach { enforceSchema =>
+        val df = spark.read
+          .option("multiLine", true)
+          .option("header", true)
+          .option("escape", "\"")
+          .option("enforceSchema", enforceSchema)
+          .csv(path.getCanonicalPath)
+        assert(df.count() === 5)
+      }
     }
   }
 }

From accfb39e4ddf7f7b54396bd0e35256a04461c693 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 27 Jan 2024 20:24:15 -0800
Subject: [PATCH 198/521] [SPARK-46888][CORE] Fix `Master` to reject
 `/workers/kill/` requests if decommission is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR aims to fix `Master` to reject `/workers/kill/` request if `spark.decommission.enabled` is `false` in order to fix the dangling worker issue.

Currently, `spark.decommission.enabled` is `false` by default. So, when a user asks to decommission, only Master marked it `DECOMMISSIONED` while the worker is alive.
```
$ curl -XPOST http://localhost:8080/workers/kill/\?host\=127.0.0.1
```

**Master UI**
![Screenshot 2024-01-27 at 6 19 18 PM](https://github.com/apache/spark/assets/9700541/443bfc32-b924-438a-8bf6-c64b9afbc4be)

**Worker Log**
```
24/01/27 18:18:06 WARN Worker: Receive decommission request, but decommission feature is disabled.
```

To be consistent with the existing `Worker` behavior which ignores the request.

https://github.com/apache/spark/blob/1787a5261e87e0214a3f803f6534c5e52a0138e6/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala#L859-L868

No, this is a bug fix.

Pass the CI with the newly added test case.

No.

Closes #44915 from dongjoon-hyun/SPARK-46888.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 20b593811dc02c96c71978851e051d32bf8c3496)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/deploy/master/ui/MasterWebUI.scala  |  4 +++-
 .../spark/deploy/master/MasterSuite.scala     | 21 +++++++++++++++++++
 .../deploy/master/ui/MasterWebUISuite.scala   |  3 ++-
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index af94bd6d9e0f2..53e5c5ac2a8f0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -23,6 +23,7 @@ import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.internal.config.UI.MASTER_UI_DECOMMISSION_ALLOW_MODE
 import org.apache.spark.internal.config.UI.UI_KILL_ENABLED
 import org.apache.spark.ui.{SparkUI, WebUI}
@@ -40,6 +41,7 @@ class MasterWebUI(
 
   val masterEndpointRef = master.self
   val killEnabled = master.conf.get(UI_KILL_ENABLED)
+  val decommissionDisabled = !master.conf.get(DECOMMISSION_ENABLED)
   val decommissionAllowMode = master.conf.get(MASTER_UI_DECOMMISSION_ALLOW_MODE)
 
   initialize()
@@ -58,7 +60,7 @@ class MasterWebUI(
       override def doPost(req: HttpServletRequest, resp: HttpServletResponse): Unit = {
         val hostnames: Seq[String] = Option(req.getParameterValues("host"))
           .getOrElse(Array[String]()).toSeq
-        if (!isDecommissioningRequestAllowed(req)) {
+        if (decommissionDisabled || !isDecommissioningRequestAllowed(req)) {
           resp.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED)
         } else {
           val removedWorkers = masterEndpointRef.askSync[Integer](
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 1cec863b1e7f9..37874de987662 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.deploy.master
 
+import java.net.{HttpURLConnection, URL}
 import java.util.Date
 import java.util.concurrent.{ConcurrentLinkedQueue, CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
@@ -325,6 +326,26 @@ class MasterSuite extends SparkFunSuite
     }
   }
 
+  test("SPARK-46888: master should reject worker kill request if decommision is disabled") {
+    implicit val formats = org.json4s.DefaultFormats
+    val conf = new SparkConf()
+      .set(DECOMMISSION_ENABLED, false)
+      .set(MASTER_UI_DECOMMISSION_ALLOW_MODE, "ALLOW")
+    val localCluster = LocalSparkCluster(1, 1, 512, conf)
+    localCluster.start()
+    val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
+    try {
+      eventually(timeout(30.seconds), interval(100.milliseconds)) {
+        val url = new URL(s"$masterUrl/workers/kill/?host=${Utils.localHostNameForURI()}")
+        val conn = url.openConnection().asInstanceOf[HttpURLConnection]
+        conn.setRequestMethod("POST")
+        assert(conn.getResponseCode === 405)
+      }
+    } finally {
+      localCluster.stop()
+    }
+  }
+
   test("master/worker web ui available") {
     implicit val formats = org.json4s.DefaultFormats
     val conf = new SparkConf()
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index 024511189accc..40265a12af93b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -30,12 +30,13 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, KillDriverResponse, RequestKillDriver}
 import org.apache.spark.deploy.DeployTestUtils._
 import org.apache.spark.deploy.master._
+import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
 import org.apache.spark.util.Utils
 
 class MasterWebUISuite extends SparkFunSuite {
 
-  val conf = new SparkConf()
+  val conf = new SparkConf().set(DECOMMISSION_ENABLED, true)
   val securityMgr = new SecurityManager(conf)
   val rpcEnv = mock(classOf[RpcEnv])
   val master = mock(classOf[Master])

From 343ae822616185022570f1c14b151e54ff54e265 Mon Sep 17 00:00:00 2001
From: Willi Raschkowski <wraschkowski@palantir.com>
Date: Mon, 29 Jan 2024 22:43:21 -0800
Subject: [PATCH 199/521] [SPARK-46893][UI] Remove inline scripts from UI
 descriptions

### What changes were proposed in this pull request?
This PR prevents malicious users from injecting inline scripts via job and stage descriptions.

Spark's Web UI [already checks the security of job and stage descriptions](https://github.com/apache/spark/blob/a368280708dd3c6eb90bd3b09a36a68bdd096222/core/src/main/scala/org/apache/spark/ui/UIUtils.scala#L528-L545) before rendering them as HTML (or treating them as plain text). The UI already disallows `<script>` tags but doesn't protect against attributes with inline scripts like `onclick` or `onmouseover`.

### Why are the changes needed?
On multi-user clusters, bad users can inject scripts into their job and stage descriptions. The UI already finds that [worth protecting against](https://github.com/apache/spark/blob/a368280708dd3c6eb90bd3b09a36a68bdd096222/core/src/main/scala/org/apache/spark/ui/UIUtils.scala#L533-L535). So this is extending that protection to scripts in attributes.

### Does this PR introduce _any_ user-facing change?
Yes if users relied on inline scripts or attributes in their job or stage descriptions.

### How was this patch tested?
Added tests.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #44933 from rshkv/wr/spark-46893.

Authored-by: Willi Raschkowski <wraschkowski@palantir.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit abd9d27e87b915612e2a89e0d2527a04c7b984e0)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/ui/UIUtils.scala   | 12 +++++++++---
 .../scala/org/apache/spark/ui/UIUtilsSuite.scala   | 14 ++++++++++++++
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 286c0a1625150..695f6d54e8f53 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -551,8 +551,8 @@ private[spark] object UIUtils extends Logging {
    * the whole string will rendered as a simple escaped text.
    *
    * Note: In terms of security, only anchor tags with root relative links are supported. So any
-   * attempts to embed links outside Spark UI, or other tags like &lt;script&gt; will cause in
-   * the whole description to be treated as plain text.
+   * attempts to embed links outside Spark UI, other tags like &lt;script&gt;, or inline scripts
+   * like `onclick` will cause in the whole description to be treated as plain text.
    *
    * @param desc        the original job or stage description string, which may contain html tags.
    * @param basePathUri with which to prepend the relative links; this is used when plainText is
@@ -572,7 +572,13 @@ private[spark] object UIUtils extends Logging {
 
       // Verify that this has only anchors and span (we are wrapping in span)
       val allowedNodeLabels = Set("a", "span", "br")
-      val illegalNodes = (xml \\ "_").filterNot(node => allowedNodeLabels.contains(node.label))
+      val allowedAttributes = Set("class", "href")
+      val illegalNodes =
+        (xml \\ "_").filterNot { node =>
+          allowedNodeLabels.contains(node.label) &&
+            // Verify we only have href attributes
+            node.attributes.map(_.key).forall(allowedAttributes.contains)
+        }
       if (illegalNodes.nonEmpty) {
         throw new IllegalArgumentException(
           "Only HTML anchors allowed in job descriptions\n" +
diff --git a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
index aecd25f6c8dea..5586badd668dd 100644
--- a/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UIUtilsSuite.scala
@@ -61,6 +61,20 @@ class UIUtilsSuite extends SparkFunSuite {
       errorMsg = "Base URL should be prepended to html links",
       plainText = false
     )
+
+    verify(
+      """<a onclick="alert('oops');"></a>""",
+      <span class="description-input">{"""<a onclick="alert('oops');"></a>"""}</span>,
+      "Non href attributes should make the description be treated as a string instead of HTML",
+      plainText = false
+    )
+
+    verify(
+      """<a onmouseover="alert('oops');"></a>""",
+      <span class="description-input">{"""<a onmouseover="alert('oops');"></a>"""}</span>,
+      "Non href attributes should make the description be treated as a string instead of HTML",
+      plainText = false
+    )
   }
 
   test("makeDescription(plainText = true)") {

From d3b4537f8bd510ced3ac0fa9a3c53a0ade2898fe Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 30 Jan 2024 20:34:28 +0800
Subject: [PATCH 200/521] [SPARK-46747][SQL] Avoid scan in getTableExistsQuery
 for JDBC Dialects
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

[SPARK-46747](https://issues.apache.org/jira/browse/SPARK-46747) reported an issue that Postgres instances suffered from too many shared locks, which was caused by Spark‘s get table exist query. In this PR, we supplanted `"SELECT 1 FROM $table LIMIT 1"` with `"SELECT 1 FROM $table WHERE 1=0"` to prevent data from being scanned.

### Why are the changes needed?

overhead reduction for JDBC datasources

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing JDBC v1/v2 datasouce tests.

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44948 from yaooqinn/SPARK-46747.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 031df8fa62666f14f54cf0a792f7fa2acc43afee)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../scala/org/apache/spark/sql/jdbc/JdbcDialects.scala     | 2 +-
 .../scala/org/apache/spark/sql/jdbc/MySQLDialect.scala     | 4 ----
 .../scala/org/apache/spark/sql/jdbc/PostgresDialect.scala  | 4 ----
 .../test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala   | 7 +++----
 4 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 2f5e813dcb618..ae8d89f0f0469 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -198,7 +198,7 @@ abstract class JdbcDialect extends Serializable with Logging {
    * @return The SQL query to use for checking the table.
    */
   def getTableExistsQuery(table: String): String = {
-    s"SELECT * FROM $table WHERE 1=0"
+    s"SELECT 1 FROM $table WHERE 1=0"
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index c7e14cc78d5bf..c96da2d42a2af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -130,10 +130,6 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
     schemaBuilder.result
   }
 
-  override def getTableExistsQuery(table: String): String = {
-    s"SELECT 1 FROM $table LIMIT 1"
-  }
-
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 
   // See https://dev.mysql.com/doc/refman/8.0/en/alter-table.html
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 9c1ca2cb913e6..55cd64fa898c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -129,10 +129,6 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     case _ => None
   }
 
-  override def getTableExistsQuery(table: String): String = {
-    s"SELECT 1 FROM $table LIMIT 1"
-  }
-
   override def isCascadingTruncateTable(): Option[Boolean] = Some(false)
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index c4145f4cbf73b..e151f2c0225de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -1055,10 +1055,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     val h2 = JdbcDialects.get(url)
     val derby = JdbcDialects.get("jdbc:derby:db")
     val table = "weblogs"
-    val defaultQuery = s"SELECT * FROM $table WHERE 1=0"
-    val limitQuery = s"SELECT 1 FROM $table LIMIT 1"
-    assert(MySQL.getTableExistsQuery(table) == limitQuery)
-    assert(Postgres.getTableExistsQuery(table) == limitQuery)
+    val defaultQuery = s"SELECT 1 FROM $table WHERE 1=0"
+    assert(MySQL.getTableExistsQuery(table) == defaultQuery)
+    assert(Postgres.getTableExistsQuery(table) == defaultQuery)
     assert(db2.getTableExistsQuery(table) == defaultQuery)
     assert(h2.getTableExistsQuery(table) == defaultQuery)
     assert(derby.getTableExistsQuery(table) == defaultQuery)

From 547edb2b4ac7afd146717bb9c8d38c6ec92d6bd9 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 2 Feb 2024 10:44:40 +0800
Subject: [PATCH 201/521] [SPARK-46945][K8S][3.5] Add
 `spark.kubernetes.legacy.useReadWriteOnceAccessMode` for old K8s clusters

### What changes were proposed in this pull request?

This PR aims to introduce a legacy configuration for K8s PVC access mode to mitigate migrations issues in old K8s clusters.

This is a kind of backport of
- #44985

### Why are the changes needed?

- The default value of `spark.kubernetes.legacy.useReadWriteOnceAccessMode` is `true` in branch-3.5.
- To help the users who cannot upgrade their K8s versions.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #44986 from dongjoon-hyun/SPARK-46945-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../main/scala/org/apache/spark/deploy/k8s/Config.scala  | 9 +++++++++
 .../deploy/k8s/features/MountVolumesFeatureStep.scala    | 8 +++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 0c54191fb10d5..dd8c59204b5ee 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -63,6 +63,15 @@ private[spark] object Config extends Logging {
       .booleanConf
       .createWithDefault(true)
 
+  val KUBERNETES_USE_LEGACY_PVC_ACCESS_MODE =
+    ConfigBuilder("spark.kubernetes.legacy.useReadWriteOnceAccessMode")
+      .internal()
+      .doc("If true, use ReadWriteOnce instead of ReadWriteOncePod as persistence volume " +
+        "access mode.")
+      .version("3.4.3")
+      .booleanConf
+      .createWithDefault(true)
+
   val KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY =
     ConfigBuilder("spark.kubernetes.driver.service.ipFamilyPolicy")
       .doc("K8s IP Family Policy for Driver Service")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index cbbbb9c0bdf57..64f3491b861ee 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuffer
 import io.fabric8.kubernetes.api.model._
 
 import org.apache.spark.deploy.k8s._
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_USE_LEGACY_PVC_ACCESS_MODE
 import org.apache.spark.deploy.k8s.Constants.{ENV_EXECUTOR_ID, SPARK_APP_ID_LABEL}
 
 private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
@@ -29,6 +30,11 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
   import MountVolumesFeatureStep._
 
   val additionalResources = ArrayBuffer.empty[HasMetadata]
+  val accessMode = if (conf.get(KUBERNETES_USE_LEGACY_PVC_ACCESS_MODE)) {
+    "ReadWriteOnce"
+  } else {
+    PVC_ACCESS_MODE
+  }
 
   override def configurePod(pod: SparkPod): SparkPod = {
     val (volumeMounts, volumes) = constructVolumes(conf.volumes).unzip
@@ -89,7 +95,7 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
                 .endMetadata()
               .withNewSpec()
                 .withStorageClassName(storageClass.get)
-                .withAccessModes(PVC_ACCESS_MODE)
+                .withAccessModes(accessMode)
                 .withResources(new ResourceRequirementsBuilder()
                   .withRequests(Map("storage" -> new Quantity(size.get)).asJava).build())
                 .endSpec()

From 4b33d2874fb3d73a4a35155b9d8b515518153321 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 2 Feb 2024 15:17:43 +0800
Subject: [PATCH 202/521] [SPARK-46953][TEST] Wrap withTable for a test in
 ResolveDefaultColumnsSuite

### What changes were proposed in this pull request?

The table is not cleaned up after this test; test retries or upcoming new tests reused 't' as the table name will fail with TAEE.

### Why are the changes needed?

fix tests as FOLLOWUP of SPARK-43742

### Does this PR introduce _any_ user-facing change?

no
### How was this patch tested?

this test itself
### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44993 from yaooqinn/SPARK-43742.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit a3432428e760fc16610cfe3380d3bdea7654f75d)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/ResolveDefaultColumnsSuite.scala      | 104 +++++++++---------
 1 file changed, 53 insertions(+), 51 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 29b2796d25aa4..00529559a4853 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -76,57 +76,59 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("INSERT into partitioned tables") {
-    sql("create table t(c1 int, c2 int, c3 int, c4 int) using parquet partitioned by (c3, c4)")
-
-    // INSERT without static partitions
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("insert into t values (1, 2, 3)")
-      },
-      errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
-      parameters = Map(
-        "tableName" -> "`spark_catalog`.`default`.`t`",
-        "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
-        "dataColumns" -> "`col1`, `col2`, `col3`"))
-
-    // INSERT without static partitions but with column list
-    sql("truncate table t")
-    sql("insert into t (c2, c1, c4) values (1, 2, 3)")
-    checkAnswer(spark.table("t"), Row(2, 1, null, 3))
-
-    // INSERT with static partitions
-    sql("truncate table t")
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("insert into t partition(c3=3, c4=4) values (1)")
-      },
-      errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
-      parameters = Map(
-        "tableName" -> "`spark_catalog`.`default`.`t`",
-        "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
-        "dataColumns" -> "`col1`",
-        "staticPartCols" -> "`c3`, `c4`"))
-
-    // INSERT with static partitions and with column list
-    sql("truncate table t")
-    sql("insert into t partition(c3=3, c4=4) (c2) values (1)")
-    checkAnswer(spark.table("t"), Row(null, 1, 3, 4))
-
-    // INSERT with partial static partitions
-    sql("truncate table t")
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("insert into t partition(c3=3, c4) values (1, 2)")
-      },
-      errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
-      parameters = Map(
-        "tableName" -> "`spark_catalog`.`default`.`t`",
-        "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
-        "dataColumns" -> "`col1`, `col2`",
-        "staticPartCols" -> "`c3`"))
-
-    // INSERT with partial static partitions and with column list is not allowed
-    intercept[AnalysisException](sql("insert into t partition(c3=3, c4) (c1) values (1, 4)"))
+    withTable("t") {
+      sql("create table t(c1 int, c2 int, c3 int, c4 int) using parquet partitioned by (c3, c4)")
+
+      // INSERT without static partitions
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("insert into t values (1, 2, 3)")
+        },
+        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        parameters = Map(
+          "tableName" -> "`spark_catalog`.`default`.`t`",
+          "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
+          "dataColumns" -> "`col1`, `col2`, `col3`"))
+
+      // INSERT without static partitions but with column list
+      sql("truncate table t")
+      sql("insert into t (c2, c1, c4) values (1, 2, 3)")
+      checkAnswer(spark.table("t"), Row(2, 1, null, 3))
+
+      // INSERT with static partitions
+      sql("truncate table t")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("insert into t partition(c3=3, c4=4) values (1)")
+        },
+        errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
+        parameters = Map(
+          "tableName" -> "`spark_catalog`.`default`.`t`",
+          "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
+          "dataColumns" -> "`col1`",
+          "staticPartCols" -> "`c3`, `c4`"))
+
+      // INSERT with static partitions and with column list
+      sql("truncate table t")
+      sql("insert into t partition(c3=3, c4=4) (c2) values (1)")
+      checkAnswer(spark.table("t"), Row(null, 1, 3, 4))
+
+      // INSERT with partial static partitions
+      sql("truncate table t")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("insert into t partition(c3=3, c4) values (1, 2)")
+        },
+        errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
+        parameters = Map(
+          "tableName" -> "`spark_catalog`.`default`.`t`",
+          "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
+          "dataColumns" -> "`col1`, `col2`",
+          "staticPartCols" -> "`c3`"))
+
+      // INSERT with partial static partitions and with column list is not allowed
+      intercept[AnalysisException](sql("insert into t partition(c3=3, c4) (c1) values (1, 4)"))
+    }
   }
 
   test("SPARK-43085: Column DEFAULT assignment for target tables with multi-part names") {

From 3f426b5bddf4c45b5829ca71acaf82dce1d376df Mon Sep 17 00:00:00 2001
From: KKtheGhost <dev@amd.sh>
Date: Mon, 5 Feb 2024 09:49:19 +0800
Subject: [PATCH 203/521] [MINOR][DOCS] Add Missing space in
 `docs/configuration.md`

### What changes were proposed in this pull request?

Add a missing space in documentation file `docs/configuration.md`, which might lead to some misunderstanding to newcomers.

### Why are the changes needed?

To eliminate ambiguity in sentences.

### Does this PR introduce _any_ user-facing change?

Yes, it changes the documentation.

### How was this patch tested?

I built the docs locally and double-checked the spelling.

### Was this patch authored or co-authored using generative AI tooling?

No. It is just a little typo lol.

Closes #45021 from KKtheGhost/fix/spell-configuration.

Authored-by: KKtheGhost <dev@amd.sh>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit da73c123e648460dc7df04e9eda9d90445dfedff)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/configuration.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 645c3e8208abc..5457a77e4fc72 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -3216,7 +3216,7 @@ the driver or executor, or, in the absence of that value, the number of cores av
 #### Server Configuration
 
 Server configurations are set in Spark Connect server, for example, when you start the Spark Connect server with `./sbin/start-connect-server.sh`.
-They are typically set via the config file and command-lineoptions with `--conf/-c`.
+They are typically set via the config file and command-line options with `--conf/-c`.
 
 <table class="table">
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>

From 1b36e3c4db41df9c6ec476db31f9cbec0d60ab6f Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 6 Feb 2024 14:54:45 +0800
Subject: [PATCH 204/521] [SPARK-46170][SQL][3.5] Support inject adaptive query
 post planner strategy rules in SparkSessionExtensions

This pr is backport https://github.com/apache/spark/pull/44074 for branch-3.5 since 3.5 is a lts version

### What changes were proposed in this pull request?

This pr adds a new extension entrance `queryPostPlannerStrategyRules` in `SparkSessionExtensions`. It will be applied between plannerStrategy and queryStagePrepRules in AQE, so it can get the whole plan before injecting exchanges.

### Why are the changes needed?

3.5 is a lts version

### Does this PR introduce _any_ user-facing change?

no, only for develop

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #44074 from ulysses-you/post-planner.

Authored-by: ulysses-you <ulyssesyou18gmail.com>

Closes #45037 from ulysses-you/SPARK-46170.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/SparkSessionExtensions.scala    | 21 ++++++++++
 .../adaptive/AdaptiveRulesHolder.scala        |  5 ++-
 .../adaptive/AdaptiveSparkPlanExec.scala      | 14 ++++++-
 .../internal/BaseSessionStateBuilder.scala    |  3 +-
 .../sql/SparkSessionExtensionSuite.scala      | 40 ++++++++++++++++++-
 5 files changed, 78 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index b7c86ab7de6b4..677dba0082575 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -47,6 +47,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
  * <li>Customized Parser.</li>
  * <li>(External) Catalog listeners.</li>
  * <li>Columnar Rules.</li>
+ * <li>Adaptive Query Post Planner Strategy Rules.</li>
  * <li>Adaptive Query Stage Preparation Rules.</li>
  * <li>Adaptive Query Execution Runtime Optimizer Rules.</li>
  * <li>Adaptive Query Stage Optimizer Rules.</li>
@@ -112,10 +113,13 @@ class SparkSessionExtensions {
   type FunctionDescription = (FunctionIdentifier, ExpressionInfo, FunctionBuilder)
   type TableFunctionDescription = (FunctionIdentifier, ExpressionInfo, TableFunctionBuilder)
   type ColumnarRuleBuilder = SparkSession => ColumnarRule
+  type QueryPostPlannerStrategyBuilder = SparkSession => Rule[SparkPlan]
   type QueryStagePrepRuleBuilder = SparkSession => Rule[SparkPlan]
   type QueryStageOptimizerRuleBuilder = SparkSession => Rule[SparkPlan]
 
   private[this] val columnarRuleBuilders = mutable.Buffer.empty[ColumnarRuleBuilder]
+  private[this] val queryPostPlannerStrategyRuleBuilders =
+    mutable.Buffer.empty[QueryPostPlannerStrategyBuilder]
   private[this] val queryStagePrepRuleBuilders = mutable.Buffer.empty[QueryStagePrepRuleBuilder]
   private[this] val runtimeOptimizerRules = mutable.Buffer.empty[RuleBuilder]
   private[this] val queryStageOptimizerRuleBuilders =
@@ -128,6 +132,14 @@ class SparkSessionExtensions {
     columnarRuleBuilders.map(_.apply(session)).toSeq
   }
 
+  /**
+   * Build the override rules for the query post planner strategy phase of adaptive query execution.
+   */
+  private[sql] def buildQueryPostPlannerStrategyRules(
+      session: SparkSession): Seq[Rule[SparkPlan]] = {
+    queryPostPlannerStrategyRuleBuilders.map(_.apply(session)).toSeq
+  }
+
   /**
    * Build the override rules for the query stage preparation phase of adaptive query execution.
    */
@@ -156,6 +168,15 @@ class SparkSessionExtensions {
     columnarRuleBuilders += builder
   }
 
+  /**
+   * Inject a rule that applied between `plannerStrategy` and `queryStagePrepRules`, so
+   * it can get the whole plan before injecting exchanges.
+   * Note, these rules can only be applied within AQE.
+   */
+  def injectQueryPostPlannerStrategyRule(builder: QueryPostPlannerStrategyBuilder): Unit = {
+    queryPostPlannerStrategyRuleBuilders += builder
+  }
+
   /**
    * Inject a rule that can override the query stage preparation phase of adaptive query
    * execution.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala
index 8391fe44f5598..ee2cd8a4953bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala
@@ -29,9 +29,12 @@ import org.apache.spark.sql.execution.SparkPlan
  *                              query stage
  * @param queryStageOptimizerRules applied to a new query stage before its execution. It makes sure
  *                                 all children query stages are materialized
+ * @param queryPostPlannerStrategyRules applied between `plannerStrategy` and `queryStagePrepRules`,
+ *                                      so it can get the whole plan before injecting exchanges.
  */
 class AdaptiveRulesHolder(
     val queryStagePrepRules: Seq[Rule[SparkPlan]],
     val runtimeOptimizerRules: Seq[Rule[LogicalPlan]],
-    val queryStageOptimizerRules: Seq[Rule[SparkPlan]]) {
+    val queryStageOptimizerRules: Seq[Rule[SparkPlan]],
+    val queryPostPlannerStrategyRules: Seq[Rule[SparkPlan]]) {
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index fa671c8faf8b3..96b83a91cc739 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -193,9 +193,19 @@ case class AdaptiveSparkPlanExec(
     optimized
   }
 
+  private def applyQueryPostPlannerStrategyRules(plan: SparkPlan): SparkPlan = {
+    applyPhysicalRules(
+      plan,
+      context.session.sessionState.adaptiveRulesHolder.queryPostPlannerStrategyRules,
+      Some((planChangeLogger, "AQE Query Post Planner Strategy Rules"))
+    )
+  }
+
   @transient val initialPlan = context.session.withActive {
     applyPhysicalRules(
-      inputPlan, queryStagePreparationRules, Some((planChangeLogger, "AQE Preparations")))
+      applyQueryPostPlannerStrategyRules(inputPlan),
+      queryStagePreparationRules,
+      Some((planChangeLogger, "AQE Preparations")))
   }
 
   @volatile private var currentPhysicalPlan = initialPlan
@@ -706,7 +716,7 @@ case class AdaptiveSparkPlanExec(
       val optimized = optimizer.execute(logicalPlan)
       val sparkPlan = context.session.sessionState.planner.plan(ReturnAnswer(optimized)).next()
       val newPlan = applyPhysicalRules(
-        sparkPlan,
+        applyQueryPostPlannerStrategyRules(sparkPlan),
         preprocessingRules ++ queryStagePreparationRules,
         Some((planChangeLogger, "AQE Replanning")))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 5543b409d1702..3a07dbf5480db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -318,7 +318,8 @@ abstract class BaseSessionStateBuilder(
     new AdaptiveRulesHolder(
       extensions.buildQueryStagePrepRules(session),
       extensions.buildRuntimeOptimizerRules(session),
-      extensions.buildQueryStageOptimizerRules(session))
+      extensions.buildQueryStageOptimizerRules(session),
+      extensions.buildQueryPostPlannerStrategyRules(session))
   }
 
   protected def planNormalizationRules: Seq[Rule[LogicalPlan]] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 21518085ca4c5..8b4ac474f8753 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -29,15 +29,17 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIden
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Limit, LocalRelation, LogicalPlan, Statistics, UnresolvedHint}
-import org.apache.spark.sql.catalyst.plans.physical.Partitioning
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, AQEShuffleReadExec, QueryStageExec, ShuffleQueryStageExec}
+import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.datasources.{FileFormat, WriteFilesExec, WriteFilesSpec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
@@ -516,6 +518,31 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
       }
     }
   }
+
+  test("SPARK-46170: Support inject adaptive query post planner strategy rules in " +
+    "SparkSessionExtensions") {
+    val extensions = create { extensions =>
+      extensions.injectQueryPostPlannerStrategyRule(_ => MyQueryPostPlannerStrategyRule)
+    }
+    withSession(extensions) { session =>
+      assert(session.sessionState.adaptiveRulesHolder.queryPostPlannerStrategyRules
+        .contains(MyQueryPostPlannerStrategyRule))
+      import session.implicits._
+      withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "3",
+          SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false") {
+        val input = Seq(10, 20, 10).toDF("c1")
+        val df = input.groupBy("c1").count()
+        df.collect()
+        assert(df.rdd.partitions.length == 1)
+        assert(collectFirst(df.queryExecution.executedPlan) {
+          case s: ShuffleExchangeExec if s.outputPartitioning == SinglePartition => true
+        }.isDefined)
+        assert(collectFirst(df.queryExecution.executedPlan) {
+          case _: SortExec => true
+        }.isDefined)
+      }
+    }
+  }
 }
 
 case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
@@ -1190,3 +1217,14 @@ object RequireAtLeaseTwoPartitions extends Rule[SparkPlan] {
     }
   }
 }
+
+object MyQueryPostPlannerStrategyRule extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    plan.transformUp {
+      case h: HashAggregateExec if h.aggregateExpressions.map(_.mode).contains(Partial) =>
+        ShuffleExchangeExec(SinglePartition, h)
+      case h: HashAggregateExec if h.aggregateExpressions.map(_.mode).contains(Final) =>
+        SortExec(h.groupingExpressions.map(k => SortOrder.apply(k, Ascending)), false, h)
+    }
+  }
+}

From 77f8b38a1091aa51af32dc790b61ae54ac47a2c2 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Thu, 8 Feb 2024 14:41:51 +0800
Subject: [PATCH 205/521] [SPARK-46400][CORE][SQL][3.5] When there are
 corrupted files in the local maven repo, skip this cache and try again

### What changes were proposed in this pull request?
The pr aims to
- fix potential bug(ie: https://github.com/apache/spark/pull/44208) and enhance user experience.
- make the code more compliant with standards

Backport above to branch 3.5.
Master branch pr: https://github.com/apache/spark/pull/44343

### Why are the changes needed?
We use the local maven repo as the first-level cache in ivy.  The original intention was to reduce the time required to parse and obtain the ar, but when there are corrupted files in the local maven repo,The above mechanism will be directly interrupted and the prompt is very unfriendly, which will greatly confuse the user.  Based on the original intention, we should skip the cache directly in similar situations.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually test.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45017 from panbingkun/branch-3.5_SPARK-46400.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../org/apache/spark/deploy/SparkSubmit.scala | 116 ++++++++++++++----
 .../hive/client/IsolatedClientLoader.scala    |   4 +
 2 files changed, 98 insertions(+), 22 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index af35f451e3704..0f0d8b6c07c0a 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -41,7 +41,7 @@ import org.apache.ivy.Ivy
 import org.apache.ivy.core.LogOptions
 import org.apache.ivy.core.module.descriptor._
 import org.apache.ivy.core.module.id.{ArtifactId, ModuleId, ModuleRevisionId}
-import org.apache.ivy.core.report.ResolveReport
+import org.apache.ivy.core.report.{DownloadStatus, ResolveReport}
 import org.apache.ivy.core.resolve.ResolveOptions
 import org.apache.ivy.core.retrieve.RetrieveOptions
 import org.apache.ivy.core.settings.IvySettings
@@ -1226,7 +1226,7 @@ private[spark] object SparkSubmitUtils extends Logging {
         s"be whitespace. The artifactId provided is: ${splits(1)}")
       require(splits(2) != null && splits(2).trim.nonEmpty, s"The version cannot be null or " +
         s"be whitespace. The version provided is: ${splits(2)}")
-      new MavenCoordinate(splits(0), splits(1), splits(2))
+      MavenCoordinate(splits(0), splits(1), splits(2))
     }
   }
 
@@ -1241,21 +1241,27 @@ private[spark] object SparkSubmitUtils extends Logging {
   }
 
   /**
-   * Extracts maven coordinates from a comma-delimited string
+   * Create a ChainResolver used by Ivy to search for and resolve dependencies.
+   *
    * @param defaultIvyUserDir The default user path for Ivy
+   * @param useLocalM2AsCache Whether to use the local maven repo as a cache
    * @return A ChainResolver used by Ivy to search for and resolve dependencies.
    */
-  def createRepoResolvers(defaultIvyUserDir: File): ChainResolver = {
+  def createRepoResolvers(
+      defaultIvyUserDir: File,
+      useLocalM2AsCache: Boolean = true): ChainResolver = {
     // We need a chain resolver if we want to check multiple repositories
     val cr = new ChainResolver
     cr.setName("spark-list")
 
-    val localM2 = new IBiblioResolver
-    localM2.setM2compatible(true)
-    localM2.setRoot(m2Path.toURI.toString)
-    localM2.setUsepoms(true)
-    localM2.setName("local-m2-cache")
-    cr.add(localM2)
+    if (useLocalM2AsCache) {
+      val localM2 = new IBiblioResolver
+      localM2.setM2compatible(true)
+      localM2.setRoot(m2Path.toURI.toString)
+      localM2.setUsepoms(true)
+      localM2.setName("local-m2-cache")
+      cr.add(localM2)
+    }
 
     val localIvy = new FileSystemResolver
     val localIvyRoot = new File(defaultIvyUserDir, "local")
@@ -1351,18 +1357,23 @@ private[spark] object SparkSubmitUtils extends Logging {
 
   /**
    * Build Ivy Settings using options with default resolvers
+   *
    * @param remoteRepos Comma-delimited string of remote repositories other than maven central
    * @param ivyPath The path to the local ivy repository
+   * @param useLocalM2AsCache Whether or not use `local-m2 repo` as cache
    * @return An IvySettings object
    */
-  def buildIvySettings(remoteRepos: Option[String], ivyPath: Option[String]): IvySettings = {
+  def buildIvySettings(
+      remoteRepos: Option[String],
+      ivyPath: Option[String],
+      useLocalM2AsCache: Boolean = true): IvySettings = {
     val ivySettings: IvySettings = new IvySettings
     processIvyPathArg(ivySettings, ivyPath)
 
     // create a pattern matcher
     ivySettings.addMatcher(new GlobPatternMatcher)
     // create the dependency resolvers
-    val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir)
+    val repoResolver = createRepoResolvers(ivySettings.getDefaultIvyUserDir, useLocalM2AsCache)
     ivySettings.addResolver(repoResolver)
     ivySettings.setDefaultResolver(repoResolver.getName)
     processRemoteRepoArg(ivySettings, remoteRepos)
@@ -1459,7 +1470,7 @@ private[spark] object SparkSubmitUtils extends Logging {
    */
   private def clearIvyResolutionFiles(
       mdId: ModuleRevisionId,
-      ivySettings: IvySettings,
+      defaultCacheFile: File,
       ivyConfName: String): Unit = {
     val currentResolutionFiles = Seq(
       s"${mdId.getOrganisation}-${mdId.getName}-$ivyConfName.xml",
@@ -1467,14 +1478,40 @@ private[spark] object SparkSubmitUtils extends Logging {
       s"resolved-${mdId.getOrganisation}-${mdId.getName}-${mdId.getRevision}.properties"
     )
     currentResolutionFiles.foreach { filename =>
-      new File(ivySettings.getDefaultCache, filename).delete()
+      new File(defaultCacheFile, filename).delete()
+    }
+  }
+
+  /**
+   * Clear invalid cache files in ivy. The cache file is usually at
+   * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml,
+   * ~/.ivy2/cache/${groupId}/${artifactId}/ivy-${version}.xml.original, and
+   * ~/.ivy2/cache/${groupId}/${artifactId}/ivydata-${version}.properties.
+   * Because when using `local-m2` repo as a cache, some invalid files were created.
+   * If not deleted here, an error prompt similar to `unknown resolver local-m2-cache`
+   * will be generated, making some confusion for users.
+   */
+  private def clearInvalidIvyCacheFiles(
+      mdId: ModuleRevisionId,
+      defaultCacheFile: File): Unit = {
+    val cacheFiles = Seq(
+      s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" +
+        s"ivy-${mdId.getRevision}.xml",
+      s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" +
+        s"ivy-${mdId.getRevision}.xml.original",
+      s"${mdId.getOrganisation}${File.separator}${mdId.getName}${File.separator}" +
+        s"ivydata-${mdId.getRevision}.properties")
+    cacheFiles.foreach { filename =>
+      new File(defaultCacheFile, filename).delete()
     }
   }
 
   /**
    * Resolves any dependencies that were supplied through maven coordinates
+   *
    * @param coordinates Comma-delimited string of maven coordinates
    * @param ivySettings An IvySettings containing resolvers to use
+   * @param noCacheIvySettings An no-cache(local-m2-cache) IvySettings containing resolvers to use
    * @param transitive Whether resolving transitive dependencies, default is true
    * @param exclusions Exclusions to apply when resolving transitive dependencies
    * @return Seq of path to the jars of the given maven artifacts including their
@@ -1483,6 +1520,7 @@ private[spark] object SparkSubmitUtils extends Logging {
   def resolveMavenCoordinates(
       coordinates: String,
       ivySettings: IvySettings,
+      noCacheIvySettings: Option[IvySettings] = None,
       transitive: Boolean,
       exclusions: Seq[String] = Nil,
       isTest: Boolean = false): Seq[String] = {
@@ -1511,6 +1549,8 @@ private[spark] object SparkSubmitUtils extends Logging {
         // scalastyle:on println
 
         val ivy = Ivy.newInstance(ivySettings)
+        ivy.pushContext()
+
         // Set resolve options to download transitive dependencies as well
         val resolveOptions = new ResolveOptions
         resolveOptions.setTransitive(transitive)
@@ -1523,6 +1563,11 @@ private[spark] object SparkSubmitUtils extends Logging {
         } else {
           resolveOptions.setDownload(true)
         }
+        // retrieve all resolved dependencies
+        retrieveOptions.setDestArtifactPattern(
+          packagesDirectory.getAbsolutePath + File.separator +
+            "[organization]_[artifact]-[revision](-[classifier]).[ext]")
+        retrieveOptions.setConfs(Array(ivyConfName))
 
         // Add exclusion rules for Spark and Scala Library
         addExclusionRules(ivySettings, ivyConfName, md)
@@ -1534,17 +1579,44 @@ private[spark] object SparkSubmitUtils extends Logging {
         // resolve dependencies
         val rr: ResolveReport = ivy.resolve(md, resolveOptions)
         if (rr.hasError) {
-          throw new RuntimeException(rr.getAllProblemMessages.toString)
+          // SPARK-46302: When there are some corrupted jars in the local maven repo,
+          // we try to continue without the cache
+          val failedReports = rr.getArtifactsReports(DownloadStatus.FAILED, true)
+          if (failedReports.nonEmpty && noCacheIvySettings.isDefined) {
+            val failedArtifacts = failedReports.map(r => r.getArtifact)
+            logInfo(s"Download failed: ${failedArtifacts.mkString("[", ", ", "]")}, " +
+              s"attempt to retry while skipping local-m2-cache.")
+            failedArtifacts.foreach(artifact => {
+              clearInvalidIvyCacheFiles(artifact.getModuleRevisionId, ivySettings.getDefaultCache)
+            })
+            ivy.popContext()
+
+            val noCacheIvy = Ivy.newInstance(noCacheIvySettings.get)
+            noCacheIvy.pushContext()
+
+            val noCacheRr = noCacheIvy.resolve(md, resolveOptions)
+            if (noCacheRr.hasError) {
+              throw new RuntimeException(noCacheRr.getAllProblemMessages.toString)
+            }
+            noCacheIvy.retrieve(noCacheRr.getModuleDescriptor.getModuleRevisionId, retrieveOptions)
+            val dependencyPaths = resolveDependencyPaths(
+              noCacheRr.getArtifacts.toArray, packagesDirectory)
+            noCacheIvy.popContext()
+
+            dependencyPaths
+          } else {
+            throw new RuntimeException(rr.getAllProblemMessages.toString)
+          }
+        } else {
+          ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId, retrieveOptions)
+          val dependencyPaths = resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
+          ivy.popContext()
+
+          dependencyPaths
         }
-        // retrieve all resolved dependencies
-        retrieveOptions.setDestArtifactPattern(packagesDirectory.getAbsolutePath + File.separator +
-          "[organization]_[artifact]-[revision](-[classifier]).[ext]")
-        ivy.retrieve(rr.getModuleDescriptor.getModuleRevisionId,
-          retrieveOptions.setConfs(Array(ivyConfName)))
-        resolveDependencyPaths(rr.getArtifacts.toArray, packagesDirectory)
       } finally {
         System.setOut(sysOut)
-        clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings, ivyConfName)
+        clearIvyResolutionFiles(md.getModuleRevisionId, ivySettings.getDefaultCache, ivyConfName)
       }
     }
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index a28a0464e6ee9..18090b53e3c10 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -139,6 +139,10 @@ private[hive] object IsolatedClientLoader extends Logging {
         SparkSubmitUtils.buildIvySettings(
           Some(remoteRepos),
           ivyPath),
+        Some(SparkSubmitUtils.buildIvySettings(
+          Some(remoteRepos),
+          ivyPath,
+          useLocalM2AsCache = false)),
         transitive = true,
         exclusions = version.exclusions)
     }

From 7658f77a613c91364c4b6c986e1861c7bd5487db Mon Sep 17 00:00:00 2001
From: Tigran Manasyan <t.manasyan@arenadata.io>
Date: Thu, 8 Feb 2024 20:29:09 +0800
Subject: [PATCH 206/521] [SPARK-39910][SQL] Delegate path qualification to
 filesystem during DataSource file path globbing

In current version `DataSource#checkAndGlobPathIfNecessary` qualifies paths via `Path#makeQualified` and `PartitioningAwareFileIndex` qualifies via `FileSystem#makeQualified`. Most `FileSystem` implementations simply delegate to `Path#makeQualified`, but others, like `HarFileSystem` contain fs-specific logic, that can produce different result. Such inconsistencies can lead to a situation, when spark can't find partitions of the source file, because qualified paths, built by `Path` and `FileSystem` are different. Therefore, for uniformity, the `FileSystem` path qualification should be used in `DataSource#checkAndGlobPathIfNecessary`.

Allow users to read files from hadoop archives (.har) using DataFrameReader API

No

New tests were added in `DataSourceSuite` and `DataFrameReaderWriterSuite`

No

Closes #43463 from tigrulya-exe/SPARK-39910-use-fs-path-qualification.

Authored-by: Tigran Manasyan <t.manasyan@arenadata.io>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit b7edc5fac0f4e479cbc869d54a9490c553ba2613)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 dev/.rat-excludes                                         | 1 +
 .../spark/sql/execution/datasources/DataSource.scala      | 2 +-
 .../src/test/resources/test-data/test-archive.har/_index  | 2 ++
 .../resources/test-data/test-archive.har/_masterindex     | 2 ++
 .../src/test/resources/test-data/test-archive.har/part-0  | 3 +++
 .../spark/sql/execution/datasources/DataSourceSuite.scala | 4 ++++
 .../spark/sql/test/DataFrameReaderWriterSuite.scala       | 8 ++++++++
 7 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/test-data/test-archive.har/_index
 create mode 100644 sql/core/src/test/resources/test-data/test-archive.har/_masterindex
 create mode 100644 sql/core/src/test/resources/test-data/test-archive.har/part-0

diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 16e0e3e30c9e5..6bf840cee2831 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -145,3 +145,4 @@ empty.proto
 .*\.proto.bin
 LimitedInputStream.java
 TimSort.java
+.*\.har
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 94dd3bc0bd63e..2e24087d507bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -722,7 +722,7 @@ object DataSource extends Logging {
     val qualifiedPaths = pathStrings.map { pathString =>
       val path = new Path(pathString)
       val fs = path.getFileSystem(hadoopConf)
-      path.makeQualified(fs.getUri, fs.getWorkingDirectory)
+      fs.makeQualified(path)
     }
 
     // Split the paths into glob and non glob paths, because we don't need to do an existence check
diff --git a/sql/core/src/test/resources/test-data/test-archive.har/_index b/sql/core/src/test/resources/test-data/test-archive.har/_index
new file mode 100644
index 0000000000000..b7ae3ef9c5a4c
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/test-archive.har/_index
@@ -0,0 +1,2 @@
+%2F dir 1707380620211+493+tigrulya+hadoop 0 0 test.csv 
+%2Ftest.csv file part-0 0 6 1707380620197+420+tigrulya+hadoop 
diff --git a/sql/core/src/test/resources/test-data/test-archive.har/_masterindex b/sql/core/src/test/resources/test-data/test-archive.har/_masterindex
new file mode 100644
index 0000000000000..4192a9597299b
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/test-archive.har/_masterindex
@@ -0,0 +1,2 @@
+3 
+0 1948547033 0 119 
diff --git a/sql/core/src/test/resources/test-data/test-archive.har/part-0 b/sql/core/src/test/resources/test-data/test-archive.har/part-0
new file mode 100644
index 0000000000000..01e79c32a8c99
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/test-archive.har/part-0
@@ -0,0 +1,3 @@
+1
+2
+3
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 06e570cb016b0..90b341ae1f2cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import java.net.URI
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path, RawLocalFileSystem}
 import org.scalatest.PrivateMethodTester
@@ -214,4 +216,6 @@ class MockFileSystem extends RawLocalFileSystem {
   override def globStatus(pathPattern: Path): Array[FileStatus] = {
     mockGlobResults.getOrElse(pathPattern, Array())
   }
+
+  override def getUri: URI = URI.create("mockFs://mockFs/")
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 17348fe2dcbb5..b40f9210a686d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -1363,4 +1363,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       }
     }
   }
+
+  test("SPARK-39910: read files from Hadoop archives") {
+    val fileSchema = new StructType().add("str", StringType)
+    val harPath = testFile("test-data/test-archive.har")
+      .replaceFirst("file:/", "har:/")
+
+    testRead(spark.read.schema(fileSchema).csv(s"$harPath/test.csv"), data, fileSchema)
+  }
 }

From 9700da7bfc1abb607f3cb916b96724d0fb8f2eba Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 11 Feb 2024 10:38:00 -0800
Subject: [PATCH 207/521] [SPARK-47021][BUILD][TESTS] Fix `kvstore` module to
 have explicit `commons-lang3` test dependency

### What changes were proposed in this pull request?

This PR aims to fix `kvstore` module by adding explicit `commons-lang3` test dependency and excluding `htmlunit-driver` from `org.scalatestplus` to use Apache Spark's explicit declaration.
https://github.com/apache/spark/blob/fa23d276e7e4ed94bf11d71f2e1daa22fe2238e5/pom.xml#L711-L716

### Why are the changes needed?

Since Spark 3.3.0 (SPARK-37282), `kvstore` uses `commons-lang3` test dependency like the following, but we didn't declare it explicitly so far.

https://github.com/apache/spark/blob/fa23d276e7e4ed94bf11d71f2e1daa22fe2238e5/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java#L33

https://github.com/apache/spark/blob/fa23d276e7e4ed94bf11d71f2e1daa22fe2238e5/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBIteratorSuite.java#L23

Previously, it was provided by some unused `htmlunit-driver`'s transitive dependency accidentally. This causes a weird situation which `kvstore` module starts to fail to compile when we upgrade `htmlunit-driver`. We need to fix this first.

```
$ mvn dependency:tree -pl common/kvstore
...
[INFO] |  \- org.seleniumhq.selenium:htmlunit-driver:jar:4.12.0:test
...
[INFO] |        +- org.apache.commons:commons-lang3:jar:3.14.0:test
```

### Does this PR introduce _any_ user-facing change?

No. This is only a test dependency fix.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45080 from dongjoon-hyun/SPARK-47021.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit a926c7912a78f1a2fb71c5ffd21b5c2f723a0128)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 common/kvstore/pom.xml | 5 +++++
 pom.xml                | 6 ++++++
 2 files changed, 11 insertions(+)

diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 1b1a8d0066f89..7dece9de699ce 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -66,6 +66,11 @@
       <artifactId>commons-io</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
diff --git a/pom.xml b/pom.xml
index 9e945f8d959a4..d0cfdaa1496ba 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1146,6 +1146,12 @@
         <artifactId>selenium-4-9_${scala.binary.version}</artifactId>
         <version>3.2.16.0</version>
         <scope>test</scope>
+        <exclusions>
+          <exclusion>
+            <groupId>org.seleniumhq.selenium</groupId>
+            <artifactId>htmlunit-driver</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>

From 4e4d9f07d0954357e85a6e2b0da47746a4b08501 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 11 Feb 2024 14:38:48 -0800
Subject: [PATCH 208/521] [SPARK-47022][CONNECT][TESTS][3.5] Fix
 `connect/client/jvm` to have explicit `commons-(io|lang3)` test dependency

### What changes were proposed in this pull request?

This PR aims to add `commons-io` and `commons-lang3` test dependency to `connector/client/jvm` module.

### Why are the changes needed?

`connector/client/jvm` module uses `commons-io` and `commons-lang3` during testing like the following.

https://github.com/apache/spark/blob/9700da7bfc1abb607f3cb916b96724d0fb8f2eba/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala#L26-L28

Currently, it's broken due to that.

- https://github.com/apache/spark/actions?query=branch%3Abranch-3.5

### Does this PR introduce _any_ user-facing change?

No, this is a test-dependency only change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45081 from dongjoon-hyun/SPARK-47022.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 connector/connect/client/jvm/pom.xml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 236e5850b762f..0c0d4cdad3a99 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -71,6 +71,16 @@
       <version>${ammonite.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>

From 08fe67b9ebf656b6ae7c44163bffba247061aa42 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan@apache.org>
Date: Mon, 12 Feb 2024 03:39:11 +0000
Subject: [PATCH 209/521] Preparing Spark release v3.5.1-rc1

---
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 2 +-
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 44 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 45b68dd81cb9a..47b54729bbd2d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 7dece9de699ce..66e6bb473bf2f 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 54c10a05eed22..98897b4424ae0 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 92bf5bc07854b..44531ea54cd58 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 3003927e713c1..8fcf20328e8e2 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 43982032a621d..901214de77c9b 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index a54382c0f4d03..6395454245ef1 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index bea8f1ba87c57..bf116a6ff12e7 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 588fa3950dc5f..1e30bfe67b90f 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index c24ebad632940..bb2f0dcb77c18 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 0c0d4cdad3a99..91f041db33f61 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 40de7414051bd..0bbb54e88fc80 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1-SNAPSHOT</version>
+        <version>3.5.1</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 403255c543727..1cdb52febb19f 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 1bd6b8e7e4883..898e1f88c7783 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 69908e8bb4fb9..1fae402ec8ce1 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 72b6d7038b7e5..e67f01ec746fd 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index e76f5d59d5048..e713aa6722075 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index d6a023392f87d..ed6ec5999c434 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 90a568d1c3d83..9abee84ece576 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 666f757996d7d..ee16104ab72e9 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 8021b78141ac4..177815cc60965 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index df3deb35a772b..2946329b983e6 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index eb4a563f1f31f..54a7e10040d9d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index e346833722b93..ed8453f85ad5e 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,7 +19,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.1-SNAPSHOT
+SPARK_VERSION: 3.5.1
 SPARK_VERSION_SHORT: 3.5.1
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
diff --git a/examples/pom.xml b/examples/pom.xml
index e85bc9ea02f8b..e9b064f0b1133 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 19d58f8c8b148..b315bea1d199e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 2b19307862060..47e45c94a69a7 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 18ad615675b6e..8994319be472f 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 0660631bb4a9d..3b99cbb4c55bd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index e5948ba6eeff2..6dbc98c504985 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index d0cfdaa1496ba..3be31d8d58095 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.1-SNAPSHOT</version>
+  <version>3.5.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 64ef604affff2..58d89619c9982 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.1.dev0"
+__version__: str = "3.5.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index 875d0455ee778..bff2cb1706d6d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 39771c07ca549..95344f8de0156 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index b420eda3eac5e..979ae610e6de2 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 4c2fc6ef28aa5..edc9c32f14259 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 4d5cec6aba5c0..5a35b098b16ee 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 0763c5266a2df..b8db88db7a596 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1-SNAPSHOT</version>
+        <version>3.5.1</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index d7fc835fddbcb..49c96d93cb87e 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c46c80ecff70b..c46ab7b8fce3a 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 3659a0f846a5e..736c9ca1a5e45 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index f833b6c34fa94..6c8089b237b04 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 75a98c050b5e0..799aba5ba029e 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 1805ba06c5b49..daef9687aac96 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From d27bdbeae9c5b634702ad20a58b9d3c68ac9d39d Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan@apache.org>
Date: Mon, 12 Feb 2024 03:39:15 +0000
Subject: [PATCH 210/521] Preparing development version 3.5.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 66faa8031c45d..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.1
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 47b54729bbd2d..d1ef9b24afdac 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 66e6bb473bf2f..9df20f8facf5c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 98897b4424ae0..27a53b0f9f3bd 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 44531ea54cd58..93410815e6c05 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 8fcf20328e8e2..a99b8b96402a6 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 901214de77c9b..98bc41807a3b7 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 6395454245ef1..69cf167704742 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index bf116a6ff12e7..92d0a7e0b55f1 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 1e30bfe67b90f..6f7419316c6f4 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index bb2f0dcb77c18..c8fc16cac01a3 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 91f041db33f61..53ff0b0147e01 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 0bbb54e88fc80..7ce0aa6615d3c 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1</version>
+        <version>3.5.2-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 1cdb52febb19f..724bad616f82c 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 898e1f88c7783..053c14d1d35c8 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 1fae402ec8ce1..a06ae2ba9ea94 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index e67f01ec746fd..29c5fb056b39f 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index e713aa6722075..f96ad6f8efb85 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index ed6ec5999c434..fa950e028e95b 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 9abee84ece576..4dda8b4833423 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ee16104ab72e9..c8607c3a1ae96 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 177815cc60965..5152fb19f761e 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 2946329b983e6..c51636093f2ef 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 54a7e10040d9d..d5a46afdf17ae 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index ed8453f85ad5e..472bd73db0d3c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.1
-SPARK_VERSION_SHORT: 3.5.1
+SPARK_VERSION: 3.5.2-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.1"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index e9b064f0b1133..bf1e79eea3bc3 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index b315bea1d199e..f55a607de2216 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 47e45c94a69a7..e6c704d74460a 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 8994319be472f..0833607007cb2 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 3b99cbb4c55bd..356d4c99c6720 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6dbc98c504985..879354a6bce64 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 3be31d8d58095..52505e6e12002 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.1</version>
+  <version>3.5.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 58d89619c9982..20ee673c93364 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.1"
+__version__: str = "3.5.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index bff2cb1706d6d..f1769b2eff14c 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 95344f8de0156..5a46f78a60b8a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 979ae610e6de2..58e4bb92f4409 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index edc9c32f14259..a62a3a6505e3f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 5a35b098b16ee..b432973d4386f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index b8db88db7a596..e6269f9654fb0 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1</version>
+        <version>3.5.2-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 49c96d93cb87e..6833f203b3fad 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c46ab7b8fce3a..19826701fc137 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 736c9ca1a5e45..518e280ad2499 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 6c8089b237b04..bf5e8676ec158 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 799aba5ba029e..6d3f98bdd2674 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index daef9687aac96..d49f2b94cae78 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From a8c62d3f9a8de22f92e0e0ca1a5770f373b0b142 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 12 Feb 2024 10:37:49 -0800
Subject: [PATCH 211/521] [SPARK-47023][BUILD] Upgrade `aircompressor` to 1.26

This PR aims to upgrade `aircompressor` to 1.26.

`aircompressor` v1.26 has the following bug fixes.

- [Fix out of bounds read/write in Snappy decompressor](https://github.com/airlift/aircompressor/commit/b89db180bb97debe025b640dc40ed43816e8c7d2)
- [Fix ZstdOutputStream corruption on double close](https://github.com/airlift/aircompressor/commit/b89db180bb97debe025b640dc40ed43816e8c7d2)

No.

Pass the CIs.

No.

Closes #45084 from dongjoon-hyun/SPARK-47023.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 9ab51dfa011a2..c76702cd0af01 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
 RoaringBitmap/0.9.45//RoaringBitmap-0.9.45.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.25//aircompressor-0.25.jar
+aircompressor/0.26//aircompressor-0.26.jar
 algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
 aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
 aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
diff --git a/pom.xml b/pom.xml
index 52505e6e12002..5db3c78e00eb8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2555,6 +2555,11 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>io.airlift</groupId>
+        <artifactId>aircompressor</artifactId>
+        <version>0.26</version>
+      </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-mapreduce</artifactId>

From ea6b25767fb86732c108c759fd5393caee22f129 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 15 Feb 2024 09:20:57 +0900
Subject: [PATCH 212/521] Revert "[SPARK-45396][PYTHON] Add doc entry for
 `pyspark.ml.connect` module, and adds `Evaluator` to `__all__` at
 `ml.connect`"

This reverts commit 35b627a934b1ab28be7d6ba88fdad63dc129525a.
---
 python/docs/source/reference/index.rst        |   1 -
 .../source/reference/pyspark.ml.connect.rst   | 122 ------------------
 python/pyspark/ml/connect/__init__.py         |   3 +-
 3 files changed, 1 insertion(+), 125 deletions(-)
 delete mode 100644 python/docs/source/reference/pyspark.ml.connect.rst

diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst
index 6330636839cdf..ed3eb4d07dac6 100644
--- a/python/docs/source/reference/index.rst
+++ b/python/docs/source/reference/index.rst
@@ -31,7 +31,6 @@ Pandas API on Spark follows the API specifications of latest pandas release.
    pyspark.pandas/index
    pyspark.ss/index
    pyspark.ml
-   pyspark.ml.connect
    pyspark.streaming
    pyspark.mllib
    pyspark
diff --git a/python/docs/source/reference/pyspark.ml.connect.rst b/python/docs/source/reference/pyspark.ml.connect.rst
deleted file mode 100644
index 1a3e6a593980f..0000000000000
--- a/python/docs/source/reference/pyspark.ml.connect.rst
+++ /dev/null
@@ -1,122 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-MLlib (DataFrame-based) for Spark Connect
-=========================================
-
-.. warning::
-    The namespace for this package can change in the future Spark version.
-
-
-Pipeline APIs
--------------
-
-.. currentmodule:: pyspark.ml.connect
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    Transformer
-    Estimator
-    Model
-    Evaluator
-    Pipeline
-    PipelineModel
-
-
-Feature
--------
-
-.. currentmodule:: pyspark.ml.connect.feature
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    MaxAbsScaler
-    MaxAbsScalerModel
-    StandardScaler
-    StandardScalerModel
-
-
-Classification
---------------
-
-.. currentmodule:: pyspark.ml.connect.classification
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    LogisticRegression
-    LogisticRegressionModel
-
-
-Functions
----------
-
-.. currentmodule:: pyspark.ml.connect.functions
-
-.. autosummary::
-    :toctree: api/
-
-    array_to_vector
-    vector_to_array
-
-
-Tuning
-------
-
-.. currentmodule:: pyspark.ml.connect.tuning
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    CrossValidator
-    CrossValidatorModel
-
-
-Evaluation
-----------
-
-.. currentmodule:: pyspark.ml.connect.evaluation
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    RegressionEvaluator
-    BinaryClassificationEvaluator
-    MulticlassClassificationEvaluator
-
-
-Utilities
----------
-
-.. currentmodule:: pyspark.ml.connect.io_utils
-
-.. autosummary::
-    :template: autosummary/class_with_docs.rst
-    :toctree: api/
-
-    ParamsReadWrite
-    CoreModelReadWrite
-    MetaAlgorithmReadWrite
-
diff --git a/python/pyspark/ml/connect/__init__.py b/python/pyspark/ml/connect/__init__.py
index e6115a62ccfe8..2ee152f6a38a3 100644
--- a/python/pyspark/ml/connect/__init__.py
+++ b/python/pyspark/ml/connect/__init__.py
@@ -28,14 +28,13 @@
     evaluation,
     tuning,
 )
-from pyspark.ml.connect.evaluation import Evaluator
 
 from pyspark.ml.connect.pipeline import Pipeline, PipelineModel
 
 __all__ = [
     "Estimator",
     "Transformer",
-    "Evaluator",
+    "Estimator",
     "Model",
     "feature",
     "evaluation",

From 9b4778fc1dc7047635c9ec19c187d4e75d182590 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 15 Feb 2024 14:49:09 +0900
Subject: [PATCH 213/521] [SPARK-46906][INFRA][3.5] Bump python libraries
 (pandas, pyarrow) in Docker image for release script

### What changes were proposed in this pull request?

This PR proposes to bump python libraries (pandas to 2.0.3, pyarrow to 4.0.0) in Docker image for release script.

### Why are the changes needed?

Without this change, release script (do-release-docker.sh) fails on docs phase. Changing this fixes the release process against branch-3.5.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Confirmed with dry-run of release script against branch-3.5.

`dev/create-release/do-release-docker.sh -d ~/spark-release -n -s docs`

```
Generating HTML files for SQL API documentation.
INFO    -  Cleaning site directory
INFO    -  Building documentation to directory: /opt/spark-rm/output/spark/sql/site
INFO    -  Documentation built in 0.85 seconds
/opt/spark-rm/output/spark/sql
Moving back into docs dir.
Making directory api/sql
cp -r ../sql/site/. api/sql
            Source: /opt/spark-rm/output/spark/docs
       Destination: /opt/spark-rm/output/spark/docs/_site
 Incremental build: disabled. Enable with --incremental
      Generating...
                    done in 7.469 seconds.
 Auto-regeneration: disabled. Use --watch to enable.
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45111 from HeartSaVioR/SPARK-46906-3.5.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 dev/create-release/spark-rm/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index cd57226f5e017..789915d018def 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 #   We should use the latest Sphinx version once this is fixed.
 # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.8.0 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.56.0 protobuf==4.21.6 grpcio-status==1.56.0 googleapis-common-protos==1.56.4"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.8.0 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 sphinx-copybutton==0.5.2 pandas==2.0.3 pyarrow==4.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.56.0 protobuf==4.21.6 grpcio-status==1.56.0 googleapis-common-protos==1.56.4"
 ARG GEM_PKGS="bundler:2.3.8"
 
 # Install extra needed repos and refresh.

From fd86f85e181fc2dc0f50a096855acf83a6cc5d9c Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan@apache.org>
Date: Thu, 15 Feb 2024 10:56:47 +0000
Subject: [PATCH 214/521] Preparing Spark release v3.5.1-rc2

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..66faa8031c45d 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d1ef9b24afdac..47b54729bbd2d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 9df20f8facf5c..66e6bb473bf2f 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 27a53b0f9f3bd..98897b4424ae0 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93410815e6c05..44531ea54cd58 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index a99b8b96402a6..8fcf20328e8e2 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 98bc41807a3b7..901214de77c9b 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 69cf167704742..6395454245ef1 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 92d0a7e0b55f1..bf116a6ff12e7 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 6f7419316c6f4..1e30bfe67b90f 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index c8fc16cac01a3..bb2f0dcb77c18 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 53ff0b0147e01..91f041db33f61 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 7ce0aa6615d3c..0bbb54e88fc80 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2-SNAPSHOT</version>
+        <version>3.5.1</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 724bad616f82c..1cdb52febb19f 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 053c14d1d35c8..898e1f88c7783 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index a06ae2ba9ea94..1fae402ec8ce1 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 29c5fb056b39f..e67f01ec746fd 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index f96ad6f8efb85..e713aa6722075 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index fa950e028e95b..ed6ec5999c434 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 4dda8b4833423..9abee84ece576 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index c8607c3a1ae96..ee16104ab72e9 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 5152fb19f761e..177815cc60965 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index c51636093f2ef..2946329b983e6 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d5a46afdf17ae..54a7e10040d9d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 472bd73db0d3c..ed8453f85ad5e 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.1
+SPARK_VERSION_SHORT: 3.5.1
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.1"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index bf1e79eea3bc3..e9b064f0b1133 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index f55a607de2216..b315bea1d199e 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index e6c704d74460a..47e45c94a69a7 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 0833607007cb2..8994319be472f 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 356d4c99c6720..3b99cbb4c55bd 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 879354a6bce64..6dbc98c504985 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 5db3c78e00eb8..0f504dbee85be 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2-SNAPSHOT</version>
+  <version>3.5.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 20ee673c93364..58d89619c9982 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2.dev0"
+__version__: str = "3.5.1"
diff --git a/repl/pom.xml b/repl/pom.xml
index f1769b2eff14c..bff2cb1706d6d 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5a46f78a60b8a..95344f8de0156 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 58e4bb92f4409..979ae610e6de2 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index a62a3a6505e3f..edc9c32f14259 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index b432973d4386f..5a35b098b16ee 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index e6269f9654fb0..b8db88db7a596 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2-SNAPSHOT</version>
+        <version>3.5.1</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 6833f203b3fad..49c96d93cb87e 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19826701fc137..c46ab7b8fce3a 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 518e280ad2499..736c9ca1a5e45 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index bf5e8676ec158..6c8089b237b04 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6d3f98bdd2674..799aba5ba029e 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index d49f2b94cae78..daef9687aac96 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From cacb6fa0868a8741cb67ef705375ac378adaeebb Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan@apache.org>
Date: Thu, 15 Feb 2024 10:56:51 +0000
Subject: [PATCH 215/521] Preparing development version 3.5.2-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 66faa8031c45d..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.1
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 47b54729bbd2d..d1ef9b24afdac 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 66e6bb473bf2f..9df20f8facf5c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 98897b4424ae0..27a53b0f9f3bd 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 44531ea54cd58..93410815e6c05 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 8fcf20328e8e2..a99b8b96402a6 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 901214de77c9b..98bc41807a3b7 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 6395454245ef1..69cf167704742 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index bf116a6ff12e7..92d0a7e0b55f1 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 1e30bfe67b90f..6f7419316c6f4 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index bb2f0dcb77c18..c8fc16cac01a3 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 91f041db33f61..53ff0b0147e01 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 0bbb54e88fc80..7ce0aa6615d3c 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1</version>
+        <version>3.5.2-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 1cdb52febb19f..724bad616f82c 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 898e1f88c7783..053c14d1d35c8 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 1fae402ec8ce1..a06ae2ba9ea94 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index e67f01ec746fd..29c5fb056b39f 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index e713aa6722075..f96ad6f8efb85 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index ed6ec5999c434..fa950e028e95b 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 9abee84ece576..4dda8b4833423 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ee16104ab72e9..c8607c3a1ae96 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 177815cc60965..5152fb19f761e 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 2946329b983e6..c51636093f2ef 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 54a7e10040d9d..d5a46afdf17ae 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index ed8453f85ad5e..472bd73db0d3c 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.1
-SPARK_VERSION_SHORT: 3.5.1
+SPARK_VERSION: 3.5.2-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.1"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index e9b064f0b1133..bf1e79eea3bc3 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index b315bea1d199e..f55a607de2216 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 47e45c94a69a7..e6c704d74460a 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 8994319be472f..0833607007cb2 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 3b99cbb4c55bd..356d4c99c6720 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 6dbc98c504985..879354a6bce64 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 0f504dbee85be..5db3c78e00eb8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.1</version>
+  <version>3.5.2-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 58d89619c9982..20ee673c93364 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.1"
+__version__: str = "3.5.2.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index bff2cb1706d6d..f1769b2eff14c 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 95344f8de0156..5a46f78a60b8a 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 979ae610e6de2..58e4bb92f4409 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index edc9c32f14259..a62a3a6505e3f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 5a35b098b16ee..b432973d4386f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index b8db88db7a596..e6269f9654fb0 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.1</version>
+        <version>3.5.2-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 49c96d93cb87e..6833f203b3fad 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c46ab7b8fce3a..19826701fc137 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 736c9ca1a5e45..518e280ad2499 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 6c8089b237b04..bf5e8676ec158 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 799aba5ba029e..6d3f98bdd2674 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index daef9687aac96..d49f2b94cae78 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.1</version>
+    <version>3.5.2-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 1c1c5faa29dc649faf143fe2eea39ccf15862f85 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 16 Feb 2024 12:41:19 +0900
Subject: [PATCH 216/521] [SPARK-47068][PYTHON][TESTS] Recover -1 and 0 case
 for spark.sql.execution.arrow.maxRecordsPerBatch

This PR fixes the regression introduced by https://github.com/apache/spark/pull/36683.

```python
import pandas as pd
spark.conf.set("spark.sql.execution.arrow.pyspark.enabled", "true")
spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", 0)
spark.conf.set("spark.sql.execution.arrow.pyspark.fallback.enabled", False)
spark.createDataFrame(pd.DataFrame({'a': [123]})).toPandas()

spark.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", -1)
spark.createDataFrame(pd.DataFrame({'a': [123]})).toPandas()
```

**Before**

```
/.../spark/python/pyspark/sql/pandas/conversion.py:371: UserWarning: createDataFrame attempted Arrow optimization because 'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has reached the error below and will not continue because automatic fallback with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to false.
  range() arg 3 must not be zero
  warn(msg)
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/.../spark/python/pyspark/sql/session.py", line 1483, in createDataFrame
    return super(SparkSession, self).createDataFrame(  # type: ignore[call-overload]
  File "/.../spark/python/pyspark/sql/pandas/conversion.py", line 351, in createDataFrame
    return self._create_from_pandas_with_arrow(data, schema, timezone)
  File "/.../spark/python/pyspark/sql/pandas/conversion.py", line 633, in _create_from_pandas_with_arrow
    pdf_slices = (pdf.iloc[start : start + step] for start in range(0, len(pdf), step))
ValueError: range() arg 3 must not be zero
```
```
Empty DataFrame
Columns: [a]
Index: []
```

**After**

```
     a
0  123
```

```
     a
0  123
```

It fixes a regerssion. This is a documented behaviour. It should be backported to branch-3.4 and branch-3.5.

Yes, it fixes a regression as described above.

Unittest was added.

No.

Closes #45132 from HyukjinKwon/SPARK-47068.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 3bb762dc032866cfb304019cba6db01125556c2f)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/pandas/conversion.py |  1 +
 python/pyspark/sql/tests/test_arrow.py  | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 8664c4df73ed8..3643cafbb3baf 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -613,6 +613,7 @@ def _create_from_pandas_with_arrow(
 
         # Slice the DataFrame to be batched
         step = self._jconf.arrowMaxRecordsPerBatch()
+        step = step if step > 0 else len(pdf)
         pdf_slices = (pdf.iloc[start : start + step] for start in range(0, len(pdf), step))
 
         # Create list of Arrow (columns, arrow_type, spark_type) for serializer dump_stream
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 73b6067373b07..9e9a7d3ac9b03 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -1238,6 +1238,16 @@ class MyInheritedTuple(MyTuple):
             df = self.spark.createDataFrame([MyInheritedTuple(1, 2, MyInheritedTuple(1, 2, 3))])
             self.assertEqual(df.first(), Row(a=1, b=2, c=Row(a=1, b=2, c=3)))
 
+    def test_negative_and_zero_batch_size(self):
+        # SPARK-47068: Negative and zero value should work as unlimited batch size.
+        with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": 0}):
+            pdf = pd.DataFrame({"a": [123]})
+            assert_frame_equal(pdf, self.spark.createDataFrame(pdf).toPandas())
+
+        with self.sql_conf({"spark.sql.execution.arrow.maxRecordsPerBatch": -1}):
+            pdf = pd.DataFrame({"a": [123]})
+            assert_frame_equal(pdf, self.spark.createDataFrame(pdf).toPandas())
+
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,

From c61d89aa94859e3b75409a71d48d4f1a023eceac Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 16 Feb 2024 09:20:11 -0800
Subject: [PATCH 217/521] [SPARK-45357][CONNECT][TESTS][3.5] Normalize
 `dataframeId` when comparing `CollectMetrics` in `SparkConnectProtoSuite`

### What changes were proposed in this pull request?
This PR add a new function `normalizeDataframeId` to sets the `dataframeId` to the constant 0 of `CollectMetrics`  before comparing `LogicalPlan` in the test case of `SparkConnectProtoSuite`.

### Why are the changes needed?
The test scenario in `SparkConnectProtoSuite` does not need to compare the `dataframeId` in `CollectMetrics`

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
- Manually check

run

```
build/mvn clean install -pl connector/connect/server -am -DskipTests
build/mvn test -pl connector/connect/server
```

**Before**

```
- Test observe *** FAILED ***
  == FAIL: Plans do not match ===
  !CollectMetrics my_metric, [min(id#0) AS min_val#0, max(id#0) AS max_val#0, sum(id#0) AS sum(id)#0L], 0   CollectMetrics my_metric, [min(id#0) AS min_val#0, max(id#0) AS max_val#0, sum(id#0) AS sum(id)#0L], 53
   +- LocalRelation <empty>, [id#0, name#0]                                                                 +- LocalRelation <empty>, [id#0, name#0] (PlanTest.scala:179)
```

**After**

```
Run completed in 41 seconds, 631 milliseconds.
Total number of tests run: 882
Suites: completed 24, aborted 0
Tests: succeeded 882, failed 0, canceled 0, ignored 0, pending 0
All tests passed.
```

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45141 from LuciferYang/SPARK-45357-35.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/connect/planner/SparkConnectProtoSuite.scala | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index 0c12bf5e625a9..8bc4de8351248 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Observation,
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInternalRow, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, PlanTest, RightOuter}
-import org.apache.spark.sql.catalyst.plans.logical.{Distinct, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{CollectMetrics, Distinct, LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
@@ -1067,7 +1067,10 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
 
   // Compares proto plan with LogicalPlan.
   private def comparePlans(connectPlan: proto.Relation, sparkPlan: LogicalPlan): Unit = {
+    def normalizeDataframeId(plan: LogicalPlan): LogicalPlan = plan transform {
+      case cm: CollectMetrics => cm.copy(dataframeId = 0)
+    }
     val connectAnalyzed = analyzePlan(transform(connectPlan))
-    comparePlans(connectAnalyzed, sparkPlan, false)
+    comparePlans(normalizeDataframeId(connectAnalyzed), normalizeDataframeId(sparkPlan), false)
   }
 }

From 5067447bf9a420b2f972a03351058ebfa61e0e41 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Fri, 16 Feb 2024 18:21:19 -0800
Subject: [PATCH 218/521] [SPARK-42285][DOC] Update Parquet data source doc on
 the timestamp_ntz inference option

### What changes were proposed in this pull request?

This is a follow-up of https://github.com/apache/spark/pull/39856. The configuration changes should be reflected in the Parquet data source doc

### Why are the changes needed?

To fix doc
### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Preview:
<img width="1010" alt="image" src="https://github.com/apache/spark/assets/1097932/618df731-49ad-49e7-afa2-22381cb3bbef">

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45145 from gengliangwang/changeConfigName.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit dc2f2673a73ccde44b59cada00e95e869ad64c01)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/sql-data-sources-parquet.md | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index f49bbd7a9d042..707871e798026 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -616,14 +616,15 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td>3.3.0</td>
 </tr>
 <tr>
-  <td><code>spark.sql.parquet.timestampNTZ.enabled</code></td>
+  <td><code>spark.sql.parquet.inferTimestampNTZ.enabled</code></td>
   <td>true</td>
   <td>
-    Enables <code>TIMESTAMP_NTZ</code> support for Parquet reads and writes.
-    When enabled, <code>TIMESTAMP_NTZ</code> values are written as Parquet timestamp
-    columns with annotation isAdjustedToUTC = false and are inferred in a similar way.
-    When disabled, such values are read as <code>TIMESTAMP_LTZ</code> and have to be
-    converted to <code>TIMESTAMP_LTZ</code> for writes.
+    When enabled, Parquet timestamp columns with annotation <code>isAdjustedToUTC = false</code>
+    are inferred as TIMESTAMP_NTZ type during schema inference. Otherwise, all the Parquet
+    timestamp columns are inferred as TIMESTAMP_LTZ types. Note that Spark writes the
+    output schema into Parquet's footer metadata on file writing and leverages it on file
+    reading. Thus this configuration only affects the schema inference on Parquet files
+    which are not written by Spark.
   </td>
   <td>3.4.0</td>
 </tr>

From 93a09ea279e6bd2515ced66d8f38053e4a5514ce Mon Sep 17 00:00:00 2001
From: Max Gekk <max.gekk@gmail.com>
Date: Mon, 19 Feb 2024 10:29:08 +0300
Subject: [PATCH 219/521] [SPARK-47072][SQL][3.5] Fix supported interval
 formats in error messages

### What changes were proposed in this pull request?
In the PR, I propose to add one more field to keys of `supportedFormat` in `IntervalUtils` because current implementation has duplicate keys that overwrites each other. For instance, the following keys are the same:
```
(YM.YEAR, YM.MONTH)
...
(DT.DAY, DT.HOUR)
```
because `YM.YEAR = DT.DAY = 0` and `YM.MONTH = DT.HOUR = 1`.

This is a backport of https://github.com/apache/spark/pull/45127.

### Why are the changes needed?
To fix the incorrect error message when Spark cannot parse ANSI interval string. For example, the expected format should be some year-month format but Spark outputs day-time one:
```sql
spark-sql (default)> select interval '-\t2-2\t' year to month;

Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -	2-2	. (line 1, pos 16)

== SQL ==
select interval '-\t2-2\t' year to month
----------------^^^
```

### Does this PR introduce _any_ user-facing change?
Yes.

### How was this patch tested?
By running the existing test suite:
```
$ build/sbt "test:testOnly *IntervalUtilsSuite"
```
and regenerating the golden files:
```
$ SPARK_GENERATE_GOLDEN_FILES=1 PYSPARK_PYTHON=python3 build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
```

### Was this patch authored or co-authored using generative AI tooling?
No.

Authored-by: Max Gekk <max.gekkgmail.com>
(cherry picked from commit 074fcf2807000d342831379de0fafc1e49a6bf19)

Closes #45139 from MaxGekk/fix-supportedFormat-3.5.

Authored-by: Max Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../sql/catalyst/util/IntervalUtils.scala     | 33 +++++++++++--------
 .../catalyst/expressions/CastSuiteBase.scala  |  8 ++---
 .../analyzer-results/ansi/interval.sql.out    |  2 +-
 .../analyzer-results/interval.sql.out         |  2 +-
 .../sql-tests/results/ansi/interval.sql.out   |  2 +-
 .../sql-tests/results/interval.sql.out        |  2 +-
 6 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index e051cfc37f12d..4d90007400ea7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -107,25 +107,30 @@ object IntervalUtils extends SparkIntervalUtils {
       fallBackNotice: Option[String] = None) = {
     throw new IllegalArgumentException(
       s"Interval string does not match $intervalStr format of " +
-        s"${supportedFormat((startFiled, endField)).map(format => s"`$format`").mkString(", ")} " +
+        s"${supportedFormat((intervalStr, startFiled, endField))
+          .map(format => s"`$format`").mkString(", ")} " +
         s"when cast to $typeName: ${input.toString}" +
         s"${fallBackNotice.map(s => s", $s").getOrElse("")}")
   }
 
   val supportedFormat = Map(
-    (YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
-    (YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
-    (YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
-    (DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
-    (DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
-    (DT.DAY, DT.MINUTE) -> Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
-    (DT.DAY, DT.SECOND) -> Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
-    (DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
-    (DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
-    (DT.HOUR, DT.SECOND) -> Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
-    (DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
-    (DT.MINUTE, DT.SECOND) -> Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
-    (DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
+    ("year-month", YM.YEAR, YM.MONTH) -> Seq("[+|-]y-m", "INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH"),
+    ("year-month", YM.YEAR, YM.YEAR) -> Seq("[+|-]y", "INTERVAL [+|-]'[+|-]y' YEAR"),
+    ("year-month", YM.MONTH, YM.MONTH) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MONTH"),
+    ("day-time", DT.DAY, DT.DAY) -> Seq("[+|-]d", "INTERVAL [+|-]'[+|-]d' DAY"),
+    ("day-time", DT.DAY, DT.HOUR) -> Seq("[+|-]d h", "INTERVAL [+|-]'[+|-]d h' DAY TO HOUR"),
+    ("day-time", DT.DAY, DT.MINUTE) ->
+      Seq("[+|-]d h:m", "INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE"),
+    ("day-time", DT.DAY, DT.SECOND) ->
+      Seq("[+|-]d h:m:s.n", "INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND"),
+    ("day-time", DT.HOUR, DT.HOUR) -> Seq("[+|-]h", "INTERVAL [+|-]'[+|-]h' HOUR"),
+    ("day-time", DT.HOUR, DT.MINUTE) -> Seq("[+|-]h:m", "INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE"),
+    ("day-time", DT.HOUR, DT.SECOND) ->
+      Seq("[+|-]h:m:s.n", "INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND"),
+    ("day-time", DT.MINUTE, DT.MINUTE) -> Seq("[+|-]m", "INTERVAL [+|-]'[+|-]m' MINUTE"),
+    ("day-time", DT.MINUTE, DT.SECOND) ->
+      Seq("[+|-]m:s.n", "INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND"),
+    ("day-time", DT.SECOND, DT.SECOND) -> Seq("[+|-]s.n", "INTERVAL [+|-]'[+|-]s.n' SECOND")
   )
 
   def castStringToYMInterval(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 1ce311a5544fa..4352d5bc9c6bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1174,7 +1174,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
       val dataType = YearMonthIntervalType()
       val expectedMsg = s"Interval string does not match year-month format of " +
-        s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+        s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
           .map(format => s"`$format`").mkString(", ")} " +
         s"when cast to ${dataType.typeName}: $interval"
       checkExceptionInExpression[IllegalArgumentException](
@@ -1194,7 +1194,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = s"Interval string does not match year-month format of " +
-          s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+          s"${IntervalUtils.supportedFormat(("year-month", dataType.startField, dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval"
         checkExceptionInExpression[IllegalArgumentException](
@@ -1314,7 +1314,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       ("1.23", DayTimeIntervalType(MINUTE)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = s"Interval string does not match day-time format of " +
-          s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+          s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval, " +
           s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
@@ -1338,7 +1338,7 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
       .foreach { case (interval, dataType) =>
         val expectedMsg = "Interval string does not match day-time format of " +
-          s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+          s"${IntervalUtils.supportedFormat(("day-time", dataType.startField, dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
           s"when cast to ${dataType.typeName}: $interval, " +
           s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
index 1120c40ac15c4..2e2a07beb7176 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
index 337edd5980c39..6242dc142eabb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
@@ -1916,7 +1916,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 9eb4a4766df89..b0d128e967a6d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -2355,7 +2355,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index fe15ade941785..faba4abfdbe7d 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -2168,7 +2168,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "_LEGACY_ERROR_TEMP_0063",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
   },
   "queryContext" : [ {
     "objectType" : "",

From 92a333ada7c56b6f3dacffc18010880e37e66ee2 Mon Sep 17 00:00:00 2001
From: Izek Greenfield <izek.greenfield@adenza.com>
Date: Tue, 20 Feb 2024 12:39:24 -0800
Subject: [PATCH 220/521] [SPARK-47085][SQL][3.5] reduce the complexity of
 toTRowSet from n^2 to n

### What changes were proposed in this pull request?
reduce the complexity of RowSetUtils.toTRowSet from n^2 to n

### Why are the changes needed?
This causes performance issues.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Tests + test manually on AWS EMR

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45165 from igreenfield/branch-3.5.

Authored-by: Izek Greenfield <izek.greenfield@adenza.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/thriftserver/RowSetUtils.scala  | 14 ++++----------
 .../SparkExecuteStatementOperation.scala           |  2 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index 9625021f392cb..047f0612898d9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -52,11 +52,7 @@ object RowSetUtils {
       rows: Seq[Row],
       schema: Array[DataType],
       timeFormatters: TimeFormatters): TRowSet = {
-    var i = 0
-    val rowSize = rows.length
-    val tRows = new java.util.ArrayList[TRow](rowSize)
-    while (i < rowSize) {
-      val row = rows(i)
+    val tRows = rows.map { row =>
       val tRow = new TRow()
       var j = 0
       val columnSize = row.length
@@ -65,9 +61,8 @@ object RowSetUtils {
         tRow.addToColVals(columnValue)
         j += 1
       }
-      i += 1
-      tRows.add(tRow)
-    }
+      tRow
+    }.asJava
     new TRowSet(startRowOffSet, tRows)
   }
 
@@ -159,8 +154,7 @@ object RowSetUtils {
     val size = rows.length
     val ret = new java.util.ArrayList[T](size)
     var idx = 0
-    while (idx < size) {
-      val row = rows(idx)
+    rows.foreach { row =>
       if (row.isNullAt(ordinal)) {
         nulls.set(idx, true)
         ret.add(idx, defaultVal)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index a9b46739fa665..e6b4c70bb395b 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -114,7 +114,7 @@ private[hive] class SparkExecuteStatementOperation(
     val offset = iter.getPosition
     val rows = iter.take(maxRows).toList
     log.debug(s"Returning result set with ${rows.length} rows from offsets " +
-      s"[${iter.getFetchStart}, ${offset}) with $statementId")
+      s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId")
     RowSetUtils.toTRowSet(offset, rows, dataTypes, getProtocolVersion, getTimeFormatters)
   }
 

From 3f4425b4880dfe3e494a894da18b412ecdba4fb1 Mon Sep 17 00:00:00 2001
From: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Date: Thu, 22 Feb 2024 10:59:15 +0900
Subject: [PATCH 221/521] [SPARK-47036][SS][3.5] Cleanup RocksDB file tracking
 for previously uploaded files if files were deleted from local directory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backports PR https://github.com/apache/spark/pull/45092 to Spark 3.5

### What changes were proposed in this pull request?

This change cleans up any dangling files tracked as being previously uploaded if they were cleaned up from the filesystem. The cleaning can happen due to a compaction racing in parallel with commit, where compaction completes after commit and a older version is loaded on the same executor.

### Why are the changes needed?

The changes are needed to prevent RocksDB versionId mismatch errors (which require users to clean the checkpoint directory and retry the query).

A particular scenario where this can happen is provided below:

1. Version V1 is loaded on executor A, RocksDB State Store has 195.sst, 196.sst, 197.sst and 198.sst files.
2. State changes are made, which result in creation of a new table file 200.sst.
3. State store is committed as version V2. The SST file 200.sst (as 000200-8c80161a-bc23-4e3b-b175-cffe38e427c7.sst) is uploaded to DFS, and previous 4 files are reused. A new metadata file is created to track the exact SST files with unique IDs, and uploaded with RocksDB Manifest as part of V1.zip.
4. Rocks DB compaction is triggered at the same time. The compaction creates a new L1 file (201.sst), and deletes existing 5 SST files.
5. Spark Stage is retried.
6. Version V1 is reloaded on the same executor. The local files are inspected, and 201.sst is deleted. The 4 SST files in version V1 are downloaded again to local file system.
7. Any local files which are deleted (as part of version load) are also removed from local → DFS file upload tracking. **However, the files already deleted as a result of compaction are not removed from tracking. This is the bug which resulted in the failure.**
8. State store is committed as version V1. However, the local mapping of SST files to DFS file path still has 200.sst in its tracking, hence the SST file is not re-uploaded. A new metadata file is created to track the exact SST files with unique IDs, and uploaded with the new RocksDB Manifest as part of V2.zip. (The V2.zip file is overwritten here atomically)
9. A new executor tried to load version V2. However, the SST files in (1) are now incompatible with Manifest file in (6) resulting in the version Id mismatch failure.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added unit test cases to cover the scenario where some files were deleted on the file system.

The test case fails with the existing master with error `Mismatch in unique ID on table file 16`, and is successful with changes in this PR.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45206 from sahnib/spark-3.5-rocks-db-fix.

Authored-by: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../streaming/state/RocksDBFileManager.scala  | 41 ++++++---
 .../streaming/state/RocksDBSuite.scala        | 91 ++++++++++++++++++-
 2 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 300a3b8137b4c..3089de7127e77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -496,16 +496,12 @@ class RocksDBFileManager(
       s" DFS for version $version. $filesReused files reused without copying.")
     versionToRocksDBFiles.put(version, immutableFiles)
 
-    // clean up deleted SST files from the localFilesToDfsFiles Map
-    val currentLocalFiles = localFiles.map(_.getName).toSet
-    val mappingsToClean = localFilesToDfsFiles.asScala
-      .keys
-      .filterNot(currentLocalFiles.contains)
-
-    mappingsToClean.foreach { f =>
-      logInfo(s"cleaning $f from the localFilesToDfsFiles map")
-      localFilesToDfsFiles.remove(f)
-    }
+    // Cleanup locally deleted files from the localFilesToDfsFiles map
+    // Locally, SST Files can be deleted due to RocksDB compaction. These files need
+    // to be removed rom the localFilesToDfsFiles map to ensure that if a older version
+    // regenerates them and overwrites the version.zip, SST files from the conflicting
+    // version (previously committed) are not reused.
+    removeLocallyDeletedSSTFilesFromDfsMapping(localFiles)
 
     saveCheckpointMetrics = RocksDBFileManagerMetrics(
       bytesCopied = bytesCopied,
@@ -523,8 +519,18 @@ class RocksDBFileManager(
   private def loadImmutableFilesFromDfs(
       immutableFiles: Seq[RocksDBImmutableFile], localDir: File): Unit = {
     val requiredFileNameToFileDetails = immutableFiles.map(f => f.localFileName -> f).toMap
+
+    val localImmutableFiles = listRocksDBFiles(localDir)._1
+
+    // Cleanup locally deleted files from the localFilesToDfsFiles map
+    // Locally, SST Files can be deleted due to RocksDB compaction. These files need
+    // to be removed rom the localFilesToDfsFiles map to ensure that if a older version
+    // regenerates them and overwrites the version.zip, SST files from the conflicting
+    // version (previously committed) are not reused.
+    removeLocallyDeletedSSTFilesFromDfsMapping(localImmutableFiles)
+
     // Delete unnecessary local immutable files
-    listRocksDBFiles(localDir)._1
+    localImmutableFiles
       .foreach { existingFile =>
         val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
         val prevDfsFile = localFilesToDfsFiles.asScala.get(existingFile.getName)
@@ -582,6 +588,19 @@ class RocksDBFileManager(
       filesReused = filesReused)
   }
 
+  private def removeLocallyDeletedSSTFilesFromDfsMapping(localFiles: Seq[File]): Unit = {
+    // clean up deleted SST files from the localFilesToDfsFiles Map
+    val currentLocalFiles = localFiles.map(_.getName).toSet
+    val mappingsToClean = localFilesToDfsFiles.asScala
+      .keys
+      .filterNot(currentLocalFiles.contains)
+
+    mappingsToClean.foreach { f =>
+      logInfo(s"cleaning $f from the localFilesToDfsFiles map")
+      localFilesToDfsFiles.remove(f)
+    }
+  }
+
   /** Get the SST files required for a version from the version zip file in DFS */
   private def getImmutableFilesFromVersionZip(version: Long): Seq[RocksDBImmutableFile] = {
     Utils.deleteRecursively(localTempDir)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 04b11dfe43f0c..16bfe2359f437 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.streaming.state
 import java.io._
 import java.nio.charset.Charset
 
+import scala.collection.mutable
 import scala.language.implicitConversions
 
 import org.apache.commons.io.FileUtils
@@ -1452,6 +1453,88 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  test("ensure local files deleted on filesystem" +
+    " are cleaned from dfs file mapping") {
+    def getSSTFiles(dir: File): Set[File] = {
+      val sstFiles = new mutable.HashSet[File]()
+      dir.listFiles().foreach { f =>
+        if (f.isDirectory) {
+          sstFiles ++= getSSTFiles(f)
+        } else {
+          if (f.getName.endsWith(".sst")) {
+            sstFiles.add(f)
+          }
+        }
+      }
+      sstFiles.toSet
+    }
+
+    def filterAndDeleteSSTFiles(dir: File, filesToKeep: Set[File]): Unit = {
+      dir.listFiles().foreach { f =>
+        if (f.isDirectory) {
+          filterAndDeleteSSTFiles(f, filesToKeep)
+        } else {
+          if (!filesToKeep.contains(f) && f.getName.endsWith(".sst")) {
+            logInfo(s"deleting ${f.getAbsolutePath} from local directory")
+            f.delete()
+          }
+        }
+      }
+    }
+
+    withTempDir { dir =>
+      withTempDir { localDir =>
+        val sqlConf = new SQLConf()
+        val dbConf = RocksDBConf(StateStoreConf(sqlConf))
+        logInfo(s"config set to ${dbConf.compactOnCommit}")
+        val hadoopConf = new Configuration()
+        val remoteDir = dir.getCanonicalPath
+        withDB(remoteDir = remoteDir,
+          conf = dbConf,
+          hadoopConf = hadoopConf,
+          localDir = localDir) { db =>
+          db.load(0)
+          db.put("a", "1")
+          db.put("b", "1")
+          db.commit()
+          db.doMaintenance()
+
+          // find all SST files written in version 1
+          val sstFiles = getSSTFiles(localDir)
+
+          // make more commits, this would generate more SST files and write
+          // them to remoteDir
+          for (version <- 1 to 10) {
+            db.load(version)
+            db.put("c", "1")
+            db.put("d", "1")
+            db.commit()
+            db.doMaintenance()
+          }
+
+          // clean the SST files committed after version 1 from local
+          // filesystem. This is similar to what a process like compaction
+          // where multiple L0 SST files can be merged into a single L1 file
+          filterAndDeleteSSTFiles(localDir, sstFiles)
+
+          // reload 2, and overwrite commit for version 3, this should not
+          // reuse any locally deleted files as they should be removed from the mapping
+          db.load(2)
+          db.put("e", "1")
+          db.put("f", "1")
+          db.commit()
+          db.doMaintenance()
+
+          // clean local state
+          db.load(0)
+
+          // reload version 3, should be successful
+          db.load(3)
+        }
+      }
+    }
+  }
+
   private def sqlConf = SQLConf.get.clone()
 
   private def dbConf = RocksDBConf(StateStoreConf(sqlConf))
@@ -1460,12 +1543,16 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       remoteDir: String,
       version: Int = 0,
       conf: RocksDBConf = dbConf,
-      hadoopConf: Configuration = new Configuration())(
+      hadoopConf: Configuration = new Configuration(),
+      localDir: File = Utils.createTempDir())(
       func: RocksDB => T): T = {
     var db: RocksDB = null
     try {
       db = new RocksDB(
-        remoteDir, conf = conf, hadoopConf = hadoopConf,
+        remoteDir,
+        conf = conf,
+        localRootDir = localDir,
+        hadoopConf = hadoopConf,
         loggingId = s"[Thread-${Thread.currentThread.getId}]")
       db.load(version)
       func(db)

From e81df1f39e8fd2d1babd5cadd4d4b76e2df95791 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 22 Feb 2024 12:13:24 +0900
Subject: [PATCH 222/521] [SPARK-47125][SQL] Return null if Univocity never
 triggers parsing

### What changes were proposed in this pull request?

This PR proposes to prevent `null` for `tokenizer.getContext`. This is similar with https://github.com/apache/spark/pull/28029. `getContext` seemingly via the univocity library, it can return null if `begingParsing` is not invoked (https://github.com/uniVocity/univocity-parsers/blob/master/src/main/java/com/univocity/parsers/common/AbstractParser.java#L53). This can happen when `parseLine` is not invoked at https://github.com/apache/spark/blob/e081f06ea401a2b6b8c214a36126583d35eaf55f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala#L300 - `parseLine` invokes `begingParsing`.

### Why are the changes needed?

To fix up a bug.

### Does this PR introduce _any_ user-facing change?

Yes. In a very rare case, when `CsvToStructs` is used as a sole predicate against an empty row, it might trigger NPE. This PR fixes it.

### How was this patch tested?

Manually tested, but test case will be done in a separate PR. We should backport this to all branches.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45210 from HyukjinKwon/SPARK-47125.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../org/apache/spark/sql/catalyst/csv/UnivocityParser.scala      | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 804c5d358ad6c..f0663ddd69b1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -139,6 +139,7 @@ class UnivocityParser(
 
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
+    if (tokenizer.getContext == null) return null
     val currentContent = tokenizer.getContext.currentParsedContent()
     if (currentContent == null) null else UTF8String.fromString(currentContent.stripLineEnd)
   }

From 588a55d010fefda7a63cde3b616ac38728fe4cfe Mon Sep 17 00:00:00 2001
From: Nicholas Chammas <nicholas.chammas@gmail.com>
Date: Mon, 26 Feb 2024 16:03:30 +0800
Subject: [PATCH 223/521] [SPARK-45599][CORE] Use object equality in
 OpenHashSet

### What changes were proposed in this pull request?

Change `OpenHashSet` to use object equality instead of cooperative equality when looking up keys.

### Why are the changes needed?

This brings the behavior of `OpenHashSet` more in line with the semantics of `java.util.HashSet`, and fixes its behavior when comparing values for which `equals` and `==` return different results, like 0.0/-0.0 and NaN/NaN.

For example, in certain cases where both 0.0 and -0.0 are provided as keys to the set, lookups of one or the other key may return the [wrong position][wrong] in the set. This leads to the bug described in SPARK-45599 and summarized in [this comment][1].

[wrong]: https://github.com/apache/spark/pull/45036/files#diff-894198a5fea34e5b7f07d0a4641eb09995315d5de3e0fded3743c15a3c8af405R277-R283
[1]: https://issues.apache.org/jira/browse/SPARK-45599?focusedCommentId=17806954&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-17806954

### Does this PR introduce _any_ user-facing change?

Yes, it resolves the bug described in SPARK-45599.

`OpenHashSet` is used widely under the hood, including in:
- `OpenHashMap`, which itself backs:
    - `TypedImperativeAggregate`
    - aggregate functions like `percentile` and `mode`
    - many algorithms in ML and MLlib
- `SQLOpenHashSet`, which backs array functions like `array_union` and `array_distinct`

However, the user-facing changes should be limited to the kind of edge case described in SPARK-45599.

### How was this patch tested?

New and existing unit tests. Of the new tests added in this PR, some simply validate that we have not changed existing SQL semantics, while others confirm that we have fixed the specific bug reported in SPARK-45599 along with any related incorrect behavior.

New tests failing on `master` but passing on this branch:
- [Handling 0.0 and -0.0 in `OpenHashSet`](https://github.com/apache/spark/pull/45036/files#diff-894198a5fea34e5b7f07d0a4641eb09995315d5de3e0fded3743c15a3c8af405R273)
- [Handling NaN in `OpenHashSet`](https://github.com/apache/spark/pull/45036/files#diff-894198a5fea34e5b7f07d0a4641eb09995315d5de3e0fded3743c15a3c8af405R302)
- [Handling 0.0 and -0.0 in `OpenHashMap`](https://github.com/apache/spark/pull/45036/files#diff-09400ec633b1f1322c5f7b39dc4e87073b0b0435b60b9cff93388053be5083b6R253)
- [Handling 0.0 and -0.0 when computing percentile](https://github.com/apache/spark/pull/45036/files#diff-bd3d5c79ede5675f4bf10d2efb313db893d57443d6d6d67b1f8766e6ce741271R1092)

New tests passing both on `master` and this branch:
- [Handling 0.0, -0.0, and NaN in `array_union`](https://github.com/apache/spark/pull/45036/files#diff-9e18a5ccf83ac94321e3a0ee8c5acf104c45734f3b35f1a0d4c15c4daa315ad5R793)
- [Handling 0.0, -0.0, and NaN in `array_distinct`](https://github.com/apache/spark/pull/45036/files#diff-9e18a5ccf83ac94321e3a0ee8c5acf104c45734f3b35f1a0d4c15c4daa315ad5R801)
- [Handling 0.0, -0.0, and NaN in `GROUP BY`](https://github.com/apache/spark/pull/45036/files#diff-496edb8b03201f078c3772ca81f7c7f80002acc11dff00b1d06d288b87855264R1107)
- [Normalizing -0 and -0.0](https://github.com/apache/spark/pull/45036/files#diff-4bdd04d06a2d88049dd5c8a67715c5566dd68a1c4ebffc689dc74b6b2e0b3b04R782)

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45036 from nchammas/SPARK-45599-plus-and-minus-zero.

Authored-by: Nicholas Chammas <nicholas.chammas@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit fcc5dbc9b6c8a8e16dc2e0854f3eebc8758a5826)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/util/collection/OpenHashSet.scala   | 16 ++++++--
 .../util/collection/OpenHashMapSuite.scala    | 30 ++++++++++++++
 .../util/collection/OpenHashSetSuite.scala    | 39 +++++++++++++++++++
 .../analyzer-results/ansi/array.sql.out       | 14 +++++++
 .../analyzer-results/ansi/literals.sql.out    |  7 ++++
 .../sql-tests/analyzer-results/array.sql.out  | 14 +++++++
 .../analyzer-results/group-by.sql.out         | 19 +++++++++
 .../analyzer-results/literals.sql.out         |  7 ++++
 .../test/resources/sql-tests/inputs/array.sql |  4 ++
 .../resources/sql-tests/inputs/group-by.sql   | 15 +++++++
 .../resources/sql-tests/inputs/literals.sql   |  3 ++
 .../sql-tests/results/ansi/array.sql.out      | 16 ++++++++
 .../sql-tests/results/ansi/literals.sql.out   |  8 ++++
 .../resources/sql-tests/results/array.sql.out | 16 ++++++++
 .../sql-tests/results/group-by.sql.out        | 22 +++++++++++
 .../sql-tests/results/literals.sql.out        |  8 ++++
 .../spark/sql/DataFrameAggregateSuite.scala   | 33 ++++++++++++++++
 17 files changed, 268 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 6815e47a198d9..435cf1a03cbc5 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -126,6 +126,17 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     this
   }
 
+  /**
+   * Check if a key exists at the provided position using object equality rather than
+   * cooperative equality. Otherwise, hash sets will mishandle values for which `==`
+   * and `equals` return different results, like 0.0/-0.0 and NaN/NaN.
+   *
+   * See: https://issues.apache.org/jira/browse/SPARK-45599
+   */
+  @annotation.nowarn("cat=other-non-cooperative-equals")
+  private def keyExistsAtPos(k: T, pos: Int) =
+    _data(pos) equals k
+
   /**
    * Add an element to the set. This one differs from add in that it doesn't trigger rehashing.
    * The caller is responsible for calling rehashIfNeeded.
@@ -146,8 +157,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
         _bitset.set(pos)
         _size += 1
         return pos | NONEXISTENCE_MASK
-      } else if (_data(pos) == k) {
-        // Found an existing key.
+      } else if (keyExistsAtPos(k, pos)) {
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
@@ -181,7 +191,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     while (true) {
       if (!_bitset.get(pos)) {
         return INVALID_POS
-      } else if (k == _data(pos)) {
+      } else if (keyExistsAtPos(k, pos)) {
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 1af99e9017c9c..f7b026ab565f0 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -249,4 +249,34 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     map(null) = null
     assert(map.get(null) === Some(null))
   }
+
+  test("SPARK-45599: 0.0 and -0.0 should count distinctly; NaNs should count together") {
+    // Exactly these elements provided in roughly this order trigger a condition where lookups of
+    // 0.0 and -0.0 in the bitset happen to collide, causing their counts to be merged incorrectly
+    // and inconsistently if `==` is used to check for key equality.
+    val spark45599Repro = Seq(
+      Double.NaN,
+      2.0,
+      168.0,
+      Double.NaN,
+      Double.NaN,
+      -0.0,
+      153.0,
+      0.0
+    )
+
+    val map1 = new OpenHashMap[Double, Int]()
+    spark45599Repro.foreach(map1.changeValue(_, 1, {_ + 1}))
+    assert(map1(0.0) == 1)
+    assert(map1(-0.0) == 1)
+    assert(map1(Double.NaN) == 3)
+
+    val map2 = new OpenHashMap[Double, Int]()
+    // Simply changing the order in which the elements are added to the map should not change the
+    // counts for 0.0 and -0.0.
+    spark45599Repro.reverse.foreach(map2.changeValue(_, 1, {_ + 1}))
+    assert(map2(0.0) == 1)
+    assert(map2(-0.0) == 1)
+    assert(map2(Double.NaN) == 3)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 89a308556d5df..0bc8aa067f57a 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -269,4 +269,43 @@ class OpenHashSetSuite extends SparkFunSuite with Matchers {
       assert(pos1 == pos2)
     }
   }
+
+  test("SPARK-45599: 0.0 and -0.0 are equal but not the same") {
+    // Therefore, 0.0 and -0.0 should get separate entries in the hash set.
+    //
+    // Exactly these elements provided in roughly this order will trigger the following scenario:
+    // When probing the bitset in `getPos(-0.0)`, the loop will happen upon the entry for 0.0.
+    // In the old logic pre-SPARK-45599, the loop will find that the bit is set and, because
+    // -0.0 == 0.0, it will think that's the position of -0.0. But in reality this is the position
+    // of 0.0. So -0.0 and 0.0 will be stored at different positions, but `getPos()` will return
+    // the same position for them. This can cause users of OpenHashSet, like OpenHashMap, to
+    // return the wrong value for a key based on whether or not this bitset lookup collision
+    // happens.
+    val spark45599Repro = Seq(
+      Double.NaN,
+      2.0,
+      168.0,
+      Double.NaN,
+      Double.NaN,
+      -0.0,
+      153.0,
+      0.0
+    )
+    val set = new OpenHashSet[Double]()
+    spark45599Repro.foreach(set.add)
+    assert(set.size == 6)
+    val zeroPos = set.getPos(0.0)
+    val negZeroPos = set.getPos(-0.0)
+    assert(zeroPos != negZeroPos)
+  }
+
+  test("SPARK-45599: NaN and NaN are the same but not equal") {
+    // Any mathematical comparison to NaN will return false, but when we place it in
+    // a hash set we want the lookup to work like a "normal" value.
+    val set = new OpenHashSet[Double]()
+    set.add(Double.NaN)
+    set.add(Double.NaN)
+    assert(set.contains(Double.NaN))
+    assert(set.size == 1)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
index 6fc308157933f..3b196ea93e40c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
@@ -747,3 +747,17 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query analysis
+Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
++- OneRowRelation
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query analysis
+Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
index 53c7327c58717..001dd4d644873 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
@@ -692,3 +692,10 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query analysis
+Project [0 AS 0#x, 0.0 AS 0.0#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index e0585b77cb6bd..ca6c89bfadc3d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -747,3 +747,17 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query analysis
+Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
++- OneRowRelation
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query analysis
+Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 202ceee18046a..93c463575dc1a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1196,3 +1196,22 @@ Aggregate [c#x], [(c#x * 2) AS d#x]
          +- Project [if ((a#x < 0)) 0 else a#x AS b#x]
             +- SubqueryAlias t1
                +- LocalRelation [a#x]
+
+
+-- !query
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+-- !query analysis
+Sort [col1#x ASC NULLS FIRST], true
++- Aggregate [col1#x], [col1#x, count(1) AS cnt#xL]
+   +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
index 53c7327c58717..001dd4d644873 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
@@ -692,3 +692,10 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query analysis
+Project [0 AS 0#x, 0.0 AS 0.0#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 52a0906ea7392..865dc8bac4ea5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -177,3 +177,7 @@ select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String));
 select array_prepend(array(), 1);
 select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
 select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String));
+
+-- SPARK-45599: Confirm 0.0, -0.0, and NaN are handled appropriately.
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")));
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index c35cdb0de2719..ce1b422de3196 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -264,3 +264,18 @@ FROM (
          GROUP BY b
      ) t3
 GROUP BY c;
+
+-- SPARK-45599: Check that "weird" doubles group and sort as desired.
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
index 9f0eefc16a8cd..e1e4a370bffdc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
@@ -118,3 +118,6 @@ select +X'1';
 select -date '1999-01-01';
 select -timestamp '1999-01-01';
 select -x'2379ACFe';
+
+-- normalize -0 and -0.0
+select -0, -0.0;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 49e18411ffa37..6a07d659e39b5 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -907,3 +907,19 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query schema
+struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query schema
+struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
index 85bcc2713ff5c..452580e4f3c34 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -770,3 +770,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query schema
+struct<0:int,0.0:decimal(1,1)>
+-- !query output
+0	0.0
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index e568f5fa7796d..d33fc62f0d9a1 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -788,3 +788,19 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query schema
+struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query schema
+struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index db79646fe435a..548917ef79b2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1121,3 +1121,25 @@ struct<d:int>
 -- !query output
 0
 2
+
+
+-- !query
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+-- !query schema
+struct<col1:double,cnt:bigint>
+-- !query output
+-Infinity	2
+0.0	2
+Infinity	2
+NaN	2
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 85bcc2713ff5c..452580e4f3c34 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -770,3 +770,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query schema
+struct<0:int,0.0:decimal(1,1)>
+-- !query output
+0	0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 631fcd8c0d87d..1ba3f6c84d0ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1068,6 +1068,39 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
+  test("SPARK-45599: Neither 0.0 nor -0.0 should be dropped when computing percentile") {
+    // To reproduce the bug described in SPARK-45599, we need exactly these rows in roughly
+    // this order in a DataFrame with exactly 1 partition.
+    // scalastyle:off line.size.limit
+    // See: https://issues.apache.org/jira/browse/SPARK-45599?focusedCommentId=17806954&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17806954
+    // scalastyle:on line.size.limit
+    val spark45599Repro: DataFrame = Seq(
+        0.0,
+        2.0,
+        153.0,
+        168.0,
+        3252411229536261.0,
+        7.205759403792794e+16,
+        1.7976931348623157e+308,
+        0.25,
+        Double.NaN,
+        Double.NaN,
+        -0.0,
+        -128.0,
+        Double.NaN,
+        Double.NaN
+      ).toDF("val").coalesce(1)
+
+    checkAnswer(
+      spark45599Repro.agg(
+        percentile(col("val"), lit(0.1))
+      ),
+      // With the buggy implementation of OpenHashSet, this returns `0.050000000000000044`
+      // instead of `-0.0`.
+      List(Row(-0.0))
+    )
+  }
+
   test("any_value") {
     checkAnswer(
       courseSales.groupBy("course").agg(

From cbf25fb633f4bf2f83a6f6e39aafaa80bf47e160 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 27 Feb 2024 08:38:54 -0800
Subject: [PATCH 224/521] Revert "[SPARK-45599][CORE] Use object equality in
 OpenHashSet"

This reverts commit 588a55d010fefda7a63cde3b616ac38728fe4cfe.
---
 .../spark/util/collection/OpenHashSet.scala   | 16 ++------
 .../util/collection/OpenHashMapSuite.scala    | 30 --------------
 .../util/collection/OpenHashSetSuite.scala    | 39 -------------------
 .../analyzer-results/ansi/array.sql.out       | 14 -------
 .../analyzer-results/ansi/literals.sql.out    |  7 ----
 .../sql-tests/analyzer-results/array.sql.out  | 14 -------
 .../analyzer-results/group-by.sql.out         | 19 ---------
 .../analyzer-results/literals.sql.out         |  7 ----
 .../test/resources/sql-tests/inputs/array.sql |  4 --
 .../resources/sql-tests/inputs/group-by.sql   | 15 -------
 .../resources/sql-tests/inputs/literals.sql   |  3 --
 .../sql-tests/results/ansi/array.sql.out      | 16 --------
 .../sql-tests/results/ansi/literals.sql.out   |  8 ----
 .../resources/sql-tests/results/array.sql.out | 16 --------
 .../sql-tests/results/group-by.sql.out        | 22 -----------
 .../sql-tests/results/literals.sql.out        |  8 ----
 .../spark/sql/DataFrameAggregateSuite.scala   | 33 ----------------
 17 files changed, 3 insertions(+), 268 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 435cf1a03cbc5..6815e47a198d9 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -126,17 +126,6 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     this
   }
 
-  /**
-   * Check if a key exists at the provided position using object equality rather than
-   * cooperative equality. Otherwise, hash sets will mishandle values for which `==`
-   * and `equals` return different results, like 0.0/-0.0 and NaN/NaN.
-   *
-   * See: https://issues.apache.org/jira/browse/SPARK-45599
-   */
-  @annotation.nowarn("cat=other-non-cooperative-equals")
-  private def keyExistsAtPos(k: T, pos: Int) =
-    _data(pos) equals k
-
   /**
    * Add an element to the set. This one differs from add in that it doesn't trigger rehashing.
    * The caller is responsible for calling rehashIfNeeded.
@@ -157,7 +146,8 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
         _bitset.set(pos)
         _size += 1
         return pos | NONEXISTENCE_MASK
-      } else if (keyExistsAtPos(k, pos)) {
+      } else if (_data(pos) == k) {
+        // Found an existing key.
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
@@ -191,7 +181,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     while (true) {
       if (!_bitset.get(pos)) {
         return INVALID_POS
-      } else if (keyExistsAtPos(k, pos)) {
+      } else if (k == _data(pos)) {
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index f7b026ab565f0..1af99e9017c9c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -249,34 +249,4 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     map(null) = null
     assert(map.get(null) === Some(null))
   }
-
-  test("SPARK-45599: 0.0 and -0.0 should count distinctly; NaNs should count together") {
-    // Exactly these elements provided in roughly this order trigger a condition where lookups of
-    // 0.0 and -0.0 in the bitset happen to collide, causing their counts to be merged incorrectly
-    // and inconsistently if `==` is used to check for key equality.
-    val spark45599Repro = Seq(
-      Double.NaN,
-      2.0,
-      168.0,
-      Double.NaN,
-      Double.NaN,
-      -0.0,
-      153.0,
-      0.0
-    )
-
-    val map1 = new OpenHashMap[Double, Int]()
-    spark45599Repro.foreach(map1.changeValue(_, 1, {_ + 1}))
-    assert(map1(0.0) == 1)
-    assert(map1(-0.0) == 1)
-    assert(map1(Double.NaN) == 3)
-
-    val map2 = new OpenHashMap[Double, Int]()
-    // Simply changing the order in which the elements are added to the map should not change the
-    // counts for 0.0 and -0.0.
-    spark45599Repro.reverse.foreach(map2.changeValue(_, 1, {_ + 1}))
-    assert(map2(0.0) == 1)
-    assert(map2(-0.0) == 1)
-    assert(map2(Double.NaN) == 3)
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 0bc8aa067f57a..89a308556d5df 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -269,43 +269,4 @@ class OpenHashSetSuite extends SparkFunSuite with Matchers {
       assert(pos1 == pos2)
     }
   }
-
-  test("SPARK-45599: 0.0 and -0.0 are equal but not the same") {
-    // Therefore, 0.0 and -0.0 should get separate entries in the hash set.
-    //
-    // Exactly these elements provided in roughly this order will trigger the following scenario:
-    // When probing the bitset in `getPos(-0.0)`, the loop will happen upon the entry for 0.0.
-    // In the old logic pre-SPARK-45599, the loop will find that the bit is set and, because
-    // -0.0 == 0.0, it will think that's the position of -0.0. But in reality this is the position
-    // of 0.0. So -0.0 and 0.0 will be stored at different positions, but `getPos()` will return
-    // the same position for them. This can cause users of OpenHashSet, like OpenHashMap, to
-    // return the wrong value for a key based on whether or not this bitset lookup collision
-    // happens.
-    val spark45599Repro = Seq(
-      Double.NaN,
-      2.0,
-      168.0,
-      Double.NaN,
-      Double.NaN,
-      -0.0,
-      153.0,
-      0.0
-    )
-    val set = new OpenHashSet[Double]()
-    spark45599Repro.foreach(set.add)
-    assert(set.size == 6)
-    val zeroPos = set.getPos(0.0)
-    val negZeroPos = set.getPos(-0.0)
-    assert(zeroPos != negZeroPos)
-  }
-
-  test("SPARK-45599: NaN and NaN are the same but not equal") {
-    // Any mathematical comparison to NaN will return false, but when we place it in
-    // a hash set we want the lookup to work like a "normal" value.
-    val set = new OpenHashSet[Double]()
-    set.add(Double.NaN)
-    set.add(Double.NaN)
-    assert(set.contains(Double.NaN))
-    assert(set.size == 1)
-  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
index 3b196ea93e40c..6fc308157933f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
@@ -747,17 +747,3 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
-
-
--- !query
-select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
--- !query analysis
-Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
-+- OneRowRelation
-
-
--- !query
-select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
--- !query analysis
-Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
-+- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
index 001dd4d644873..53c7327c58717 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
@@ -692,10 +692,3 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
-
-
--- !query
-select -0, -0.0
--- !query analysis
-Project [0 AS 0#x, 0.0 AS 0.0#x]
-+- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index ca6c89bfadc3d..e0585b77cb6bd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -747,17 +747,3 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
-
-
--- !query
-select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
--- !query analysis
-Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
-+- OneRowRelation
-
-
--- !query
-select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
--- !query analysis
-Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
-+- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 93c463575dc1a..202ceee18046a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1196,22 +1196,3 @@ Aggregate [c#x], [(c#x * 2) AS d#x]
          +- Project [if ((a#x < 0)) 0 else a#x AS b#x]
             +- SubqueryAlias t1
                +- LocalRelation [a#x]
-
-
--- !query
-SELECT col1, count(*) AS cnt
-FROM VALUES
-  (0.0),
-  (-0.0),
-  (double('NaN')),
-  (double('NaN')),
-  (double('Infinity')),
-  (double('Infinity')),
-  (-double('Infinity')),
-  (-double('Infinity'))
-GROUP BY col1
-ORDER BY col1
--- !query analysis
-Sort [col1#x ASC NULLS FIRST], true
-+- Aggregate [col1#x], [col1#x, count(1) AS cnt#xL]
-   +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
index 001dd4d644873..53c7327c58717 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
@@ -692,10 +692,3 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
-
-
--- !query
-select -0, -0.0
--- !query analysis
-Project [0 AS 0#x, 0.0 AS 0.0#x]
-+- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 865dc8bac4ea5..52a0906ea7392 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -177,7 +177,3 @@ select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String));
 select array_prepend(array(), 1);
 select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
 select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String));
-
--- SPARK-45599: Confirm 0.0, -0.0, and NaN are handled appropriately.
-select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")));
-select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index ce1b422de3196..c35cdb0de2719 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -264,18 +264,3 @@ FROM (
          GROUP BY b
      ) t3
 GROUP BY c;
-
--- SPARK-45599: Check that "weird" doubles group and sort as desired.
-SELECT col1, count(*) AS cnt
-FROM VALUES
-  (0.0),
-  (-0.0),
-  (double('NaN')),
-  (double('NaN')),
-  (double('Infinity')),
-  (double('Infinity')),
-  (-double('Infinity')),
-  (-double('Infinity'))
-GROUP BY col1
-ORDER BY col1
-;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
index e1e4a370bffdc..9f0eefc16a8cd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
@@ -118,6 +118,3 @@ select +X'1';
 select -date '1999-01-01';
 select -timestamp '1999-01-01';
 select -x'2379ACFe';
-
--- normalize -0 and -0.0
-select -0, -0.0;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 6a07d659e39b5..49e18411ffa37 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -907,19 +907,3 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
-
-
--- !query
-select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
--- !query schema
-struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
--- !query output
-[0.0,NaN]
-
-
--- !query
-select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
--- !query schema
-struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
--- !query output
-[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
index 452580e4f3c34..85bcc2713ff5c 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -770,11 +770,3 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
-
-
--- !query
-select -0, -0.0
--- !query schema
-struct<0:int,0.0:decimal(1,1)>
--- !query output
-0	0.0
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index d33fc62f0d9a1..e568f5fa7796d 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -788,19 +788,3 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
-
-
--- !query
-select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
--- !query schema
-struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
--- !query output
-[0.0,NaN]
-
-
--- !query
-select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
--- !query schema
-struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
--- !query output
-[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 548917ef79b2d..db79646fe435a 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1121,25 +1121,3 @@ struct<d:int>
 -- !query output
 0
 2
-
-
--- !query
-SELECT col1, count(*) AS cnt
-FROM VALUES
-  (0.0),
-  (-0.0),
-  (double('NaN')),
-  (double('NaN')),
-  (double('Infinity')),
-  (double('Infinity')),
-  (-double('Infinity')),
-  (-double('Infinity'))
-GROUP BY col1
-ORDER BY col1
--- !query schema
-struct<col1:double,cnt:bigint>
--- !query output
--Infinity	2
-0.0	2
-Infinity	2
-NaN	2
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 452580e4f3c34..85bcc2713ff5c 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -770,11 +770,3 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
-
-
--- !query
-select -0, -0.0
--- !query schema
-struct<0:int,0.0:decimal(1,1)>
--- !query output
-0	0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 1ba3f6c84d0ad..631fcd8c0d87d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1068,39 +1068,6 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
-  test("SPARK-45599: Neither 0.0 nor -0.0 should be dropped when computing percentile") {
-    // To reproduce the bug described in SPARK-45599, we need exactly these rows in roughly
-    // this order in a DataFrame with exactly 1 partition.
-    // scalastyle:off line.size.limit
-    // See: https://issues.apache.org/jira/browse/SPARK-45599?focusedCommentId=17806954&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17806954
-    // scalastyle:on line.size.limit
-    val spark45599Repro: DataFrame = Seq(
-        0.0,
-        2.0,
-        153.0,
-        168.0,
-        3252411229536261.0,
-        7.205759403792794e+16,
-        1.7976931348623157e+308,
-        0.25,
-        Double.NaN,
-        Double.NaN,
-        -0.0,
-        -128.0,
-        Double.NaN,
-        Double.NaN
-      ).toDF("val").coalesce(1)
-
-    checkAnswer(
-      spark45599Repro.agg(
-        percentile(col("val"), lit(0.1))
-      ),
-      // With the buggy implementation of OpenHashSet, this returns `0.050000000000000044`
-      // instead of `-0.0`.
-      List(Row(-0.0))
-    )
-  }
-
   test("any_value") {
     checkAnswer(
       courseSales.groupBy("course").agg(

From b4118e0dbb505033d6a6a63b4c22c4e4bcf3e8d8 Mon Sep 17 00:00:00 2001
From: Nicholas Chammas <nicholas.chammas@gmail.com>
Date: Tue, 27 Feb 2024 18:30:57 -0800
Subject: [PATCH 225/521] [SPARK-45599][CORE][3.5] Use object equality in
 OpenHashSet

### What changes were proposed in this pull request?

This is a backport of fcc5dbc9b6c8a8e16dc2e0854f3eebc8758a5826 / https://github.com/apache/spark/pull/45036 with a tweak so that it works on Scala 2.12.

### Why are the changes needed?

This is a correctness bug fix.

The original fix against `master` suppresses a warning category that doesn't exist on certain versions of Scala 2.13 and 2.12, and the exact versions are [not documented anywhere][1].

To be safe, this backport simply suppresses all warnings instead of just `other-non-cooperative-equals`. It would be interesting to see if `-Wconf:nowarn` complains, since the warning about non-cooperative equals itself is also not thrown on all versions of Scala, but I don't think that's a priority.

[1]: https://github.com/scala/scala/pull/8120#issuecomment-1967413860

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

CI + added tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45296 from nchammas/SPARK-45599-OpenHashSet.

Authored-by: Nicholas Chammas <nicholas.chammas@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/util/collection/OpenHashSet.scala   | 16 ++++++--
 .../util/collection/OpenHashMapSuite.scala    | 30 ++++++++++++++
 .../util/collection/OpenHashSetSuite.scala    | 39 +++++++++++++++++++
 .../analyzer-results/ansi/array.sql.out       | 14 +++++++
 .../analyzer-results/ansi/literals.sql.out    |  7 ++++
 .../sql-tests/analyzer-results/array.sql.out  | 14 +++++++
 .../analyzer-results/group-by.sql.out         | 19 +++++++++
 .../analyzer-results/literals.sql.out         |  7 ++++
 .../test/resources/sql-tests/inputs/array.sql |  4 ++
 .../resources/sql-tests/inputs/group-by.sql   | 15 +++++++
 .../resources/sql-tests/inputs/literals.sql   |  3 ++
 .../sql-tests/results/ansi/array.sql.out      | 16 ++++++++
 .../sql-tests/results/ansi/literals.sql.out   |  8 ++++
 .../resources/sql-tests/results/array.sql.out | 16 ++++++++
 .../sql-tests/results/group-by.sql.out        | 22 +++++++++++
 .../sql-tests/results/literals.sql.out        |  8 ++++
 .../spark/sql/DataFrameAggregateSuite.scala   | 33 ++++++++++++++++
 17 files changed, 268 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index 6815e47a198d9..4e307e35da8cd 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -126,6 +126,17 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     this
   }
 
+  /**
+   * Check if a key exists at the provided position using object equality rather than
+   * cooperative equality. Otherwise, hash sets will mishandle values for which `==`
+   * and `equals` return different results, like 0.0/-0.0 and NaN/NaN.
+   *
+   * See: https://issues.apache.org/jira/browse/SPARK-45599
+   */
+  @annotation.nowarn
+  private def keyExistsAtPos(k: T, pos: Int) =
+    _data(pos) equals k
+
   /**
    * Add an element to the set. This one differs from add in that it doesn't trigger rehashing.
    * The caller is responsible for calling rehashIfNeeded.
@@ -146,8 +157,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
         _bitset.set(pos)
         _size += 1
         return pos | NONEXISTENCE_MASK
-      } else if (_data(pos) == k) {
-        // Found an existing key.
+      } else if (keyExistsAtPos(k, pos)) {
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
@@ -181,7 +191,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
     while (true) {
       if (!_bitset.get(pos)) {
         return INVALID_POS
-      } else if (k == _data(pos)) {
+      } else if (keyExistsAtPos(k, pos)) {
         return pos
       } else {
         // quadratic probing with values increase by 1, 2, 3, ...
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 1af99e9017c9c..f7b026ab565f0 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -249,4 +249,34 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     map(null) = null
     assert(map.get(null) === Some(null))
   }
+
+  test("SPARK-45599: 0.0 and -0.0 should count distinctly; NaNs should count together") {
+    // Exactly these elements provided in roughly this order trigger a condition where lookups of
+    // 0.0 and -0.0 in the bitset happen to collide, causing their counts to be merged incorrectly
+    // and inconsistently if `==` is used to check for key equality.
+    val spark45599Repro = Seq(
+      Double.NaN,
+      2.0,
+      168.0,
+      Double.NaN,
+      Double.NaN,
+      -0.0,
+      153.0,
+      0.0
+    )
+
+    val map1 = new OpenHashMap[Double, Int]()
+    spark45599Repro.foreach(map1.changeValue(_, 1, {_ + 1}))
+    assert(map1(0.0) == 1)
+    assert(map1(-0.0) == 1)
+    assert(map1(Double.NaN) == 3)
+
+    val map2 = new OpenHashMap[Double, Int]()
+    // Simply changing the order in which the elements are added to the map should not change the
+    // counts for 0.0 and -0.0.
+    spark45599Repro.reverse.foreach(map2.changeValue(_, 1, {_ + 1}))
+    assert(map2(0.0) == 1)
+    assert(map2(-0.0) == 1)
+    assert(map2(Double.NaN) == 3)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 89a308556d5df..0bc8aa067f57a 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -269,4 +269,43 @@ class OpenHashSetSuite extends SparkFunSuite with Matchers {
       assert(pos1 == pos2)
     }
   }
+
+  test("SPARK-45599: 0.0 and -0.0 are equal but not the same") {
+    // Therefore, 0.0 and -0.0 should get separate entries in the hash set.
+    //
+    // Exactly these elements provided in roughly this order will trigger the following scenario:
+    // When probing the bitset in `getPos(-0.0)`, the loop will happen upon the entry for 0.0.
+    // In the old logic pre-SPARK-45599, the loop will find that the bit is set and, because
+    // -0.0 == 0.0, it will think that's the position of -0.0. But in reality this is the position
+    // of 0.0. So -0.0 and 0.0 will be stored at different positions, but `getPos()` will return
+    // the same position for them. This can cause users of OpenHashSet, like OpenHashMap, to
+    // return the wrong value for a key based on whether or not this bitset lookup collision
+    // happens.
+    val spark45599Repro = Seq(
+      Double.NaN,
+      2.0,
+      168.0,
+      Double.NaN,
+      Double.NaN,
+      -0.0,
+      153.0,
+      0.0
+    )
+    val set = new OpenHashSet[Double]()
+    spark45599Repro.foreach(set.add)
+    assert(set.size == 6)
+    val zeroPos = set.getPos(0.0)
+    val negZeroPos = set.getPos(-0.0)
+    assert(zeroPos != negZeroPos)
+  }
+
+  test("SPARK-45599: NaN and NaN are the same but not equal") {
+    // Any mathematical comparison to NaN will return false, but when we place it in
+    // a hash set we want the lookup to work like a "normal" value.
+    val set = new OpenHashSet[Double]()
+    set.add(Double.NaN)
+    set.add(Double.NaN)
+    assert(set.contains(Double.NaN))
+    assert(set.size == 1)
+  }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
index 6fc308157933f..3b196ea93e40c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
@@ -747,3 +747,17 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query analysis
+Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
++- OneRowRelation
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query analysis
+Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
index 53c7327c58717..001dd4d644873 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
@@ -692,3 +692,10 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query analysis
+Project [0 AS 0#x, 0.0 AS 0.0#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index e0585b77cb6bd..ca6c89bfadc3d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -747,3 +747,17 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 -- !query analysis
 Project [array_prepend(array(cast(null as string)), cast(null as string)) AS array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING))#x]
 +- OneRowRelation
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query analysis
+Project [array_union(array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double)), array(cast(0.0 as double), cast(0.0 as double), cast(NaN as double))) AS array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN))#x]
++- OneRowRelation
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query analysis
+Project [array_distinct(array(cast(0.0 as double), cast(0.0 as double), cast(0.0 as double), cast(NaN as double), cast(NaN as double))) AS array_distinct(array(0.0, 0.0, 0.0, NaN, NaN))#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 202ceee18046a..93c463575dc1a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1196,3 +1196,22 @@ Aggregate [c#x], [(c#x * 2) AS d#x]
          +- Project [if ((a#x < 0)) 0 else a#x AS b#x]
             +- SubqueryAlias t1
                +- LocalRelation [a#x]
+
+
+-- !query
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+-- !query analysis
+Sort [col1#x ASC NULLS FIRST], true
++- Aggregate [col1#x], [col1#x, count(1) AS cnt#xL]
+   +- LocalRelation [col1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
index 53c7327c58717..001dd4d644873 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/literals.sql.out
@@ -692,3 +692,10 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query analysis
+Project [0 AS 0#x, 0.0 AS 0.0#x]
++- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index 52a0906ea7392..865dc8bac4ea5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -177,3 +177,7 @@ select array_prepend(CAST(null AS ARRAY<String>), CAST(null as String));
 select array_prepend(array(), 1);
 select array_prepend(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
 select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String));
+
+-- SPARK-45599: Confirm 0.0, -0.0, and NaN are handled appropriately.
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")));
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index c35cdb0de2719..ce1b422de3196 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -264,3 +264,18 @@ FROM (
          GROUP BY b
      ) t3
 GROUP BY c;
+
+-- SPARK-45599: Check that "weird" doubles group and sort as desired.
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
index 9f0eefc16a8cd..e1e4a370bffdc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/literals.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql
@@ -118,3 +118,6 @@ select +X'1';
 select -date '1999-01-01';
 select -timestamp '1999-01-01';
 select -x'2379ACFe';
+
+-- normalize -0 and -0.0
+select -0, -0.0;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index 49e18411ffa37..6a07d659e39b5 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -907,3 +907,19 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query schema
+struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query schema
+struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
index 85bcc2713ff5c..452580e4f3c34 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -770,3 +770,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query schema
+struct<0:int,0.0:decimal(1,1)>
+-- !query output
+0	0.0
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index e568f5fa7796d..d33fc62f0d9a1 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -788,3 +788,19 @@ select array_prepend(array(CAST(NULL AS String)), CAST(NULL AS String))
 struct<array_prepend(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
 -- !query output
 [null,null]
+
+
+-- !query
+select array_union(array(0.0, -0.0, DOUBLE("NaN")), array(0.0, -0.0, DOUBLE("NaN")))
+-- !query schema
+struct<array_union(array(0.0, 0.0, NaN), array(0.0, 0.0, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
+
+
+-- !query
+select array_distinct(array(0.0, -0.0, -0.0, DOUBLE("NaN"), DOUBLE("NaN")))
+-- !query schema
+struct<array_distinct(array(0.0, 0.0, 0.0, NaN, NaN)):array<double>>
+-- !query output
+[0.0,NaN]
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index db79646fe435a..548917ef79b2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1121,3 +1121,25 @@ struct<d:int>
 -- !query output
 0
 2
+
+
+-- !query
+SELECT col1, count(*) AS cnt
+FROM VALUES
+  (0.0),
+  (-0.0),
+  (double('NaN')),
+  (double('NaN')),
+  (double('Infinity')),
+  (double('Infinity')),
+  (-double('Infinity')),
+  (-double('Infinity'))
+GROUP BY col1
+ORDER BY col1
+-- !query schema
+struct<col1:double,cnt:bigint>
+-- !query output
+-Infinity	2
+0.0	2
+Infinity	2
+NaN	2
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index 85bcc2713ff5c..452580e4f3c34 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -770,3 +770,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "-x'2379ACFe'"
   } ]
 }
+
+
+-- !query
+select -0, -0.0
+-- !query schema
+struct<0:int,0.0:decimal(1,1)>
+-- !query output
+0	0.0
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 631fcd8c0d87d..1ba3f6c84d0ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -1068,6 +1068,39 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
+  test("SPARK-45599: Neither 0.0 nor -0.0 should be dropped when computing percentile") {
+    // To reproduce the bug described in SPARK-45599, we need exactly these rows in roughly
+    // this order in a DataFrame with exactly 1 partition.
+    // scalastyle:off line.size.limit
+    // See: https://issues.apache.org/jira/browse/SPARK-45599?focusedCommentId=17806954&page=com.atlassian.jira.plugin.system.issuetabpanels%3Acomment-tabpanel#comment-17806954
+    // scalastyle:on line.size.limit
+    val spark45599Repro: DataFrame = Seq(
+        0.0,
+        2.0,
+        153.0,
+        168.0,
+        3252411229536261.0,
+        7.205759403792794e+16,
+        1.7976931348623157e+308,
+        0.25,
+        Double.NaN,
+        Double.NaN,
+        -0.0,
+        -128.0,
+        Double.NaN,
+        Double.NaN
+      ).toDF("val").coalesce(1)
+
+    checkAnswer(
+      spark45599Repro.agg(
+        percentile(col("val"), lit(0.1))
+      ),
+      // With the buggy implementation of OpenHashSet, this returns `0.050000000000000044`
+      // instead of `-0.0`.
+      List(Row(-0.0))
+    )
+  }
+
   test("any_value") {
     checkAnswer(
       courseSales.groupBy("course").agg(

From 96688aca4c3f540c6e6e7f54d7a97642486eb535 Mon Sep 17 00:00:00 2001
From: Pablo Langa <soypab@gmail.com>
Date: Wed, 28 Feb 2024 11:44:04 +0800
Subject: [PATCH 226/521] [SPARK-47063][SQL] CAST long to timestamp has
 different behavior for codegen vs interpreted

### What changes were proposed in this pull request?

When an overflow occurs casting long to timestamp there are different behaviors between codegen and interpreted

```
scala> Seq(Long.MaxValue, Long.MinValue).toDF("v").repartition(1).selectExpr("*", "CAST(v AS timestamp) as ts").selectExpr("*", "unix_micros(ts)").show(false)
+--------------------+-------------------+---------------+
|v                   |ts                 |unix_micros(ts)|
+--------------------+-------------------+---------------+
|9223372036854775807 |1969-12-31 20:59:59|-1000000       |
|-9223372036854775808|1969-12-31 21:00:00|0              |
+--------------------+-------------------+---------------+

scala> spark.conf.set("spark.sql.codegen.wholeStage", false)

scala> spark.conf.set("spark.sql.codegen.factoryMode", "NO_CODEGEN")

scala> Seq(Long.MaxValue, Long.MinValue).toDF("v").repartition(1).selectExpr("*", "CAST(v AS timestamp) as ts").selectExpr("*", "unix_micros(ts)").show(false)
+--------------------+-----------------------------+--------------------+
|v                   |ts                           |unix_micros(ts)     |
+--------------------+-----------------------------+--------------------+
|9223372036854775807 |+294247-01-10 01:00:54.775807|9223372036854775807 |
|-9223372036854775808|-290308-12-21 15:16:20.224192|-9223372036854775808|
+--------------------+-----------------------------+--------------------+

```

To align the behavior this PR change the codegen function the be the same as interpreted (https://github.com/apache/spark/blob/f0090c95ad4eca18040104848117a7da648ffa3c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala#L687)

### Why are the changes needed?

This is necesary to be consistent in all cases

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

With unit test and manually

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45294 from planga82/bugfix/spark47063_cast_codegen.

Authored-by: Pablo Langa <soypab@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit f18d945af7b69fbc89b38b9ca3ca79263b0881ed)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 3 ++-
 .../spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala  | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 24ade61c12149..01509c5b968c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -1615,7 +1615,8 @@ case class Cast(
     val block = inline"new java.math.BigDecimal($MICROS_PER_SECOND)"
     code"($d.toBigDecimal().bigDecimal().multiply($block)).longValue()"
   }
-  private[this] def longToTimeStampCode(l: ExprValue): Block = code"$l * (long)$MICROS_PER_SECOND"
+  private[this] def longToTimeStampCode(l: ExprValue): Block =
+    code"java.util.concurrent.TimeUnit.SECONDS.toMicros($l)"
   private[this] def timestampToLongCode(ts: ExprValue): Block =
     code"java.lang.Math.floorDiv($ts, $MICROS_PER_SECOND)"
   private[this] def timestampToDoubleCode(ts: ExprValue): Block =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala
index 1dbf03b1538a6..502ae399ec16a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala
@@ -507,6 +507,8 @@ class CastWithAnsiOffSuite extends CastSuiteBase {
     checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
     checkEvaluation(cast(Float.NaN, TimestampType), null)
     checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
+    checkEvaluation(cast(Literal(Long.MaxValue), TimestampType), Long.MaxValue)
+    checkEvaluation(cast(Literal(Long.MinValue), TimestampType), Long.MinValue)
   }
 
   test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {

From c0a4416cb960b1a8c335d1c0c27ceeccfacdb377 Mon Sep 17 00:00:00 2001
From: Arzav Jain <arzavj@users.noreply.github.com>
Date: Wed, 28 Feb 2024 14:10:32 +0900
Subject: [PATCH 227/521] [SPARK-47202][PYTHON] Fix typo breaking datetimes
 with tzinfo

This PR fixes a bug caused due to a typo.

This bug is preventing users from having datetime.datetime objects with tzinfo when using the `TimestampType`

No, just a bug fix.

There ought to be CI that lints code and catches these simple errors at the time of opening the PR.

No

Closes #45301 from arzavj/SPARK-47202.

Authored-by: Arzav Jain <arzavj@users.noreply.github.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit bf8396eaef9f95de3dab712e8895e4bc63adef7c)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/pandas/types.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index b02a003e632cb..9b7c40ddb24e6 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -898,7 +898,7 @@ def convert_timestamp(value: Any) -> Any:
                     return None
                 else:
                     if isinstance(value, datetime.datetime) and value.tzinfo is not None:
-                        ts = pd.Timstamp(value)
+                        ts = pd.Timestamp(value)
                     else:
                         ts = pd.Timestamp(value).tz_localize(timezone)
                     return ts.to_pydatetime()

From e6f3dd9b971e539485518dc041244a51c7a8302e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 28 Feb 2024 16:56:38 +0900
Subject: [PATCH 228/521] [SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Test
 timestamp with tzinfo in toPandas and createDataFrame with Arrow optimized

### What changes were proposed in this pull request?

This PR is a follow up of https://github.com/apache/spark/pull/45301 that actually test the change.

### Why are the changes needed?

To prevent a regression.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manually ran the tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45308 from HyukjinKwon/SPARK-47202-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 721c2a41a54bb00ea885093f322edf704e63d17f)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/tests/connect/test_parity_arrow.py    |  3 +++
 python/pyspark/sql/tests/test_arrow.py        | 27 +++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py
index a92ef971cd216..55e689da8fdd1 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow.py
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py
@@ -136,6 +136,9 @@ def test_createDataFrame_nested_timestamp(self):
     def test_toPandas_nested_timestamp(self):
         self.check_toPandas_nested_timestamp(True)
 
+    def test_toPandas_timestmap_tzinfo(self):
+        self.check_toPandas_timestmap_tzinfo(True)
+
     def test_createDataFrame_udt(self):
         self.check_createDataFrame_udt(True)
 
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 9e9a7d3ac9b03..6e462d38d8e5f 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -18,12 +18,14 @@
 import datetime
 import os
 import threading
+import calendar
 import time
 import unittest
 import warnings
 from distutils.version import LooseVersion
 from typing import cast
 from collections import namedtuple
+from zoneinfo import ZoneInfo
 
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
@@ -1090,6 +1092,31 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled):
 
         self.assertEqual(df.first(), expected)
 
+    def test_toPandas_timestmap_tzinfo(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_toPandas_timestmap_tzinfo(arrow_enabled)
+
+    def check_toPandas_timestmap_tzinfo(self, arrow_enabled):
+        # SPARK-47202: Test timestamp with tzinfo in toPandas and createDataFrame
+        ts_tzinfo = datetime.datetime(2023, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
+        data = pd.DataFrame({"a": [ts_tzinfo]})
+        df = self.spark.createDataFrame(data)
+
+        with self.sql_conf(
+            {
+                "spark.sql.execution.arrow.pyspark.enabled": arrow_enabled,
+            }
+        ):
+            pdf = df.toPandas()
+
+        expected = pd.DataFrame(
+            # Spark unsets tzinfo and converts them to localtimes.
+            {"a": [datetime.datetime.fromtimestamp(calendar.timegm(ts_tzinfo.utctimetuple()))]}
+        )
+
+        assert_frame_equal(pdf, expected)
+
     def test_toPandas_nested_timestamp(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):

From 9770016b180b0477060777d3739a2bfaabc6fcb3 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 29 Feb 2024 19:08:15 -0800
Subject: [PATCH 229/521] [SPARK-47236][CORE] Fix
 `deleteRecursivelyUsingJavaIO` to skip non-existing file input

### What changes were proposed in this pull request?

This PR aims to fix `deleteRecursivelyUsingJavaIO` to skip non-existing file input.

### Why are the changes needed?

`deleteRecursivelyUsingJavaIO` is a fallback of `deleteRecursivelyUsingUnixNative`.
We should have identical capability. Currently, it fails.

```
[info]   java.nio.file.NoSuchFileException: /Users/dongjoon/APACHE/spark-merge/target/tmp/spark-e264d853-42c0-44a2-9a30-22049522b04f
[info]   at java.base/sun.nio.fs.UnixException.translateToIOException(UnixException.java:92)
[info]   at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:106)
[info]   at java.base/sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:111)
[info]   at java.base/sun.nio.fs.UnixFileAttributeViews$Basic.readAttributes(UnixFileAttributeViews.java:55)
[info]   at java.base/sun.nio.fs.UnixFileSystemProvider.readAttributes(UnixFileSystemProvider.java:148)
[info]   at java.base/java.nio.file.Files.readAttributes(Files.java:1851)
[info]   at org.apache.spark.network.util.JavaUtils.deleteRecursivelyUsingJavaIO(JavaUtils.java:126)
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

This is difficult to test this `private static` Java method. I tested this with #45344 .

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45346 from dongjoon-hyun/SPARK-47236.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 1cd7bab5c5c2bd8d595b131c88e6576486dbf123)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/java/org/apache/spark/network/util/JavaUtils.java   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
index bbe764b8366c8..d6603dcbee1ae 100644
--- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -120,6 +120,7 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
   private static void deleteRecursivelyUsingJavaIO(
       File file,
       FilenameFilter filter) throws IOException {
+    if (!file.exists()) return;
     BasicFileAttributes fileAttributes =
       Files.readAttributes(file.toPath(), BasicFileAttributes.class);
     if (fileAttributes.isDirectory() && !isSymlink(file)) {

From 899153039023f2a12f43813028dcb54c9e880eda Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 29 Feb 2024 10:56:26 +0900
Subject: [PATCH 230/521] [SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Run the test
 only with Python 3.9+

This PR proposes to run the tzinfo test only with Python 3.9+. This is a followup of https://github.com/apache/spark/pull/45308.

To make the Python build passing with Python 3.8. It fails as below:

```python
Starting test(pypy3): pyspark.sql.tests.test_arrow (temp output: /__w/spark/spark/python/target/605c2e61-b7c8-4898-ac7b-1d86f495bd4f/pypy3__pyspark.sql.tests.test_arrow__qrwyvw4l.log)
Traceback (most recent call last):
  File "/usr/local/pypy/pypy3.8/lib/pypy3.8/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/local/pypy/pypy3.8/lib/pypy3.8/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/__w/spark/spark/python/pyspark/sql/tests/test_arrow.py", line 26, in <module>
    from zoneinfo import ZoneInfo
ModuleNotFoundError: No module named 'zoneinfo'
```

https://github.com/apache/spark/actions/runs/8082492167/job/22083534905

No, test-only.

CI in this PR should test it out.

No,

Closes #45324 from HyukjinKwon/SPARK-47202-followup2.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 11e8ae42af9234adf56dc2f32b92e87697c777e4)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/connect/test_parity_arrow.py | 2 ++
 python/pyspark/sql/tests/test_arrow.py                | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py
index 55e689da8fdd1..abcf839f0fc56 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow.py
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py
@@ -16,6 +16,7 @@
 #
 
 import unittest
+import sys
 from distutils.version import LooseVersion
 
 import pandas as pd
@@ -136,6 +137,7 @@ def test_createDataFrame_nested_timestamp(self):
     def test_toPandas_nested_timestamp(self):
         self.check_toPandas_nested_timestamp(True)
 
+    @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+")
     def test_toPandas_timestmap_tzinfo(self):
         self.check_toPandas_timestmap_tzinfo(True)
 
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index 6e462d38d8e5f..a333318b777e9 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -25,7 +25,7 @@
 from distutils.version import LooseVersion
 from typing import cast
 from collections import namedtuple
-from zoneinfo import ZoneInfo
+import sys
 
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
@@ -1092,6 +1092,7 @@ def check_createDataFrame_nested_timestamp(self, arrow_enabled):
 
         self.assertEqual(df.first(), expected)
 
+    @unittest.skipIf(sys.version_info < (3, 9), "zoneinfo is available from Python 3.9+")
     def test_toPandas_timestmap_tzinfo(self):
         for arrow_enabled in [True, False]:
             with self.subTest(arrow_enabled=arrow_enabled):
@@ -1099,6 +1100,8 @@ def test_toPandas_timestmap_tzinfo(self):
 
     def check_toPandas_timestmap_tzinfo(self, arrow_enabled):
         # SPARK-47202: Test timestamp with tzinfo in toPandas and createDataFrame
+        from zoneinfo import ZoneInfo
+
         ts_tzinfo = datetime.datetime(2023, 1, 1, 0, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
         data = pd.DataFrame({"a": [ts_tzinfo]})
         df = self.spark.createDataFrame(data)

From 14762b372dd623179aa2c985c44cd49048660dda Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 5 Mar 2024 10:13:00 +0800
Subject: [PATCH 231/521] [SPARK-47177][SQL] Cached SQL plan do not display
 final AQE plan in explain string

### What changes were proposed in this pull request?

This pr adds lock for ExplainUtils.processPlan to avoid tag race condition.

### Why are the changes needed?

To fix the issue [SPARK-47177](https://issues.apache.org/jira/browse/SPARK-47177)

### Does this PR introduce _any_ user-facing change?

yes, affect plan explain

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #45282 from ulysses-you/SPARK-47177.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
(cherry picked from commit 6e62a5690b810edb99e4fc6ad39afbd4d49ef85e)
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../spark/sql/catalyst/trees/TreeNode.scala   |  7 ++--
 .../spark/sql/execution/ExplainUtils.scala    |  6 ++-
 .../execution/columnar/InMemoryRelation.scala | 12 +-----
 .../columnar/InMemoryRelationSuite.scala      | 41 +++++++++++++------
 4 files changed, 38 insertions(+), 28 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 9e605a45414be..82228a5b2aafd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -1030,10 +1030,11 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
     append(str)
     append("\n")
 
-    if (innerChildren.nonEmpty) {
+    val innerChildrenLocal = innerChildren
+    if (innerChildrenLocal.nonEmpty) {
       lastChildren.add(children.isEmpty)
       lastChildren.add(false)
-      innerChildren.init.foreach(_.generateTreeString(
+      innerChildrenLocal.init.foreach(_.generateTreeString(
         depth + 2, lastChildren, append, verbose,
         addSuffix = addSuffix, maxFields = maxFields, printNodeId = printNodeId, indent = indent))
       lastChildren.remove(lastChildren.size() - 1)
@@ -1041,7 +1042,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
 
       lastChildren.add(children.isEmpty)
       lastChildren.add(true)
-      innerChildren.last.generateTreeString(
+      innerChildrenLocal.last.generateTreeString(
         depth + 2, lastChildren, append, verbose,
         addSuffix = addSuffix, maxFields = maxFields, printNodeId = printNodeId, indent = indent)
       lastChildren.remove(lastChildren.size() - 1)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 3da3e646f36b0..11f6ae0e47ee1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -75,8 +75,12 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * Given a input physical plan, performs the following tasks.
    *   1. Generates the explain output for the input plan excluding the subquery plans.
    *   2. Generates the explain output for each subquery referenced in the plan.
+   *
+   * Note that, ideally this is a no-op as different explain actions operate on different plan,
+   * instances but cached plan is an exception. The `InMemoryRelation#innerChildren` use a shared
+   * plan instance across multi-queries. Add lock for this method to avoid tag race condition.
    */
-  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = {
+  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = synchronized {
     try {
       // Initialize a reference-unique set of Operators to avoid accdiental overwrites and to allow
       // intentional overwriting of IDs generated in previous AQE iteration
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 5bab8e53eb163..f750a4503be16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -392,17 +392,7 @@ case class InMemoryRelation(
 
   @volatile var statsOfPlanToCache: Statistics = null
 
-
-  override lazy val innerChildren: Seq[SparkPlan] = {
-    // The cachedPlan needs to be cloned here because it does not get cloned when SparkPlan.clone is
-    // called. This is a problem because when the explain output is generated for
-    // a plan it traverses the innerChildren and modifies their TreeNode.tags. If the plan is not
-    // cloned, there is a thread safety issue in the case that two plans with a shared cache
-    // operator have explain called at the same time. The cachedPlan cannot be cloned because
-    // it contains stateful information so we only clone it for the purpose of generating the
-    // explain output.
-    Seq(cachedPlan.clone())
-  }
+  override def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
 
   override def doCanonicalize(): logical.LogicalPlan =
     copy(output = output.map(QueryPlan.normalizeExpressions(_, output)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
index a5c5ec40af6fe..2c73622739a51 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
@@ -18,22 +18,14 @@
 package org.apache.spark.sql.execution.columnar
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.functions.expr
 import org.apache.spark.sql.test.SharedSparkSessionBase
 import org.apache.spark.storage.StorageLevel
 
-class InMemoryRelationSuite extends SparkFunSuite with SharedSparkSessionBase {
-  test("SPARK-43157: Clone innerChildren cached plan") {
-    val d = spark.range(1)
-    val relation = InMemoryRelation(StorageLevel.MEMORY_ONLY, d.queryExecution, None)
-    val cloned = relation.clone().asInstanceOf[InMemoryRelation]
-
-    val relationCachedPlan = relation.innerChildren.head
-    val clonedCachedPlan = cloned.innerChildren.head
-
-    // verify the plans are not the same object but are logically equivalent
-    assert(!relationCachedPlan.eq(clonedCachedPlan))
-    assert(relationCachedPlan === clonedCachedPlan)
-  }
+class InMemoryRelationSuite extends SparkFunSuite
+  with SharedSparkSessionBase with AdaptiveSparkPlanHelper {
 
   test("SPARK-46779: InMemoryRelations with the same cached plan are semantically equivalent") {
     val d = spark.range(1)
@@ -41,4 +33,27 @@ class InMemoryRelationSuite extends SparkFunSuite with SharedSparkSessionBase {
     val r2 = r1.withOutput(r1.output.map(_.newInstance()))
     assert(r1.sameResult(r2))
   }
+
+  test("SPARK-47177: Cached SQL plan do not display final AQE plan in explain string") {
+    def findIMRInnerChild(p: SparkPlan): SparkPlan = {
+      val tableCache = find(p) {
+        case _: InMemoryTableScanExec => true
+        case _ => false
+      }
+      assert(tableCache.isDefined)
+      tableCache.get.asInstanceOf[InMemoryTableScanExec].relation.innerChildren.head
+    }
+
+    val d1 = spark.range(1).withColumn("key", expr("id % 100"))
+      .groupBy("key").agg(Map("key" -> "count"))
+    val cached_d2 = d1.cache()
+    val df = cached_d2.withColumn("key2", expr("key % 10"))
+      .groupBy("key2").agg(Map("key2" -> "count"))
+
+    assert(findIMRInnerChild(df.queryExecution.executedPlan).treeString
+      .contains("AdaptiveSparkPlan isFinalPlan=false"))
+    df.collect()
+    assert(findIMRInnerChild(df.queryExecution.executedPlan).treeString
+      .contains("AdaptiveSparkPlan isFinalPlan=true"))
+  }
 }

From e9f7d36797c4344295556463da16f891bb96d8ac Mon Sep 17 00:00:00 2001
From: JacobZheng0927 <zsh517559523@163.com>
Date: Tue, 5 Mar 2024 20:35:42 -0600
Subject: [PATCH 232/521] [SPARK-47146][CORE][3.5] Possible thread leak when
 doing sort merge join

This pr backport https://github.com/apache/spark/pull/45327 to branch-3.5

### What changes were proposed in this pull request? Add TaskCompletionListener to close inputStream to avoid thread leakage caused by unclosed ReadAheadInputStream.

### Why are the changes needed?
To fix the issue SPARK-47146

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Unit test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45390 from JacobZheng0927/SPARK-47146-3.5.

Authored-by: JacobZheng0927 <zsh517559523@163.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
---
 .../unsafe/sort/UnsafeSorterSpillReader.java  | 12 +++++++
 .../org/apache/spark/sql/JoinSuite.scala      | 33 ++++++++++++++++++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index db79efd008530..8bd44c8c52c14 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -28,6 +28,8 @@
 import org.apache.spark.serializer.SerializerManager;
 import org.apache.spark.storage.BlockId;
 import org.apache.spark.unsafe.Platform;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.*;
 
@@ -36,6 +38,7 @@
  * of the file format).
  */
 public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implements Closeable {
+  private static final Logger logger = LoggerFactory.getLogger(ReadAheadInputStream.class);
   public static final int MAX_BUFFER_SIZE_BYTES = 16777216; // 16 mb
 
   private InputStream in;
@@ -82,6 +85,15 @@ public UnsafeSorterSpillReader(
       Closeables.close(bs, /* swallowIOException = */ true);
       throw e;
     }
+    if (taskContext != null) {
+      taskContext.addTaskCompletionListener(context -> {
+        try {
+          close();
+        } catch (IOException e) {
+          logger.info("error while closing UnsafeSorterSpillReader", e);
+        }
+      });
+    }
   }
 
   @Override
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 9dcf7ec29048d..4d256154c8574 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable.ListBuffer
 import org.mockito.Mockito._
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
+import org.apache.spark.internal.config.SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.{Ascending, GenericRow, SortOrder}
@@ -36,7 +37,7 @@ import org.apache.spark.sql.execution.exchange.{ShuffleExchangeExec, ShuffleExch
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.execution.python.BatchEvalPythonExec
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.{SharedSparkSession, TestSparkSession}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.tags.SlowSQLTest
 
@@ -1756,3 +1757,33 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
     }
   }
 }
+
+class ThreadLeakInSortMergeJoinSuite
+  extends QueryTest
+    with SharedSparkSession
+    with AdaptiveSparkPlanHelper {
+
+  setupTestData()
+  override protected def createSparkSession: TestSparkSession = {
+    SparkSession.cleanupAnyExistingSession()
+    new TestSparkSession(
+      sparkConf.set(SHUFFLE_SPILL_NUM_ELEMENTS_FORCE_SPILL_THRESHOLD, 20))
+  }
+
+  test("SPARK-47146: thread leak when doing SortMergeJoin (with spill)") {
+
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
+
+      assertSpilled(sparkContext, "inner join") {
+        sql("SELECT * FROM testData JOIN testData2 ON key = a").collect()
+      }
+
+      val readAheadThread = Thread.getAllStackTraces.keySet().asScala
+        .find {
+          _.getName.startsWith("read-ahead")
+        }
+      assert(readAheadThread.isEmpty)
+    }
+  }
+}

From ba229613c08eeecd6688cae3f7f2b7a267354125 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu@databricks.com>
Date: Wed, 6 Mar 2024 12:25:32 +0900
Subject: [PATCH 233/521] [SPARK-47277][3.5] PySpark util function
 assertDataFrameEqual should not support streaming DF

### What changes were proposed in this pull request?

Backport https://github.com/apache/spark/pull/45380 to branch-3.5

The handy util function should not support streaming dataframes, currently if you call it upon streaming queries, it throws a relatively hard-to-understand error:
```
>>> df1 = spark.readStream.format("rate").load()
>>> df2 = spark.readStream.format("rate").load()
>>> from pyspark.testing.utils import QuietTest, assertDataFrameEqual
>>> assertDataFrameEqual(df1, df2)
/Users/wei.liu/oss-spark/python/pyspark/pandas/__init__.py:43: UserWarning: 'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to set this environment variable to '1' in both driver and executor sides if you use pyarrow>=2.0.0. pandas-on-Spark will set it for you but it does not work if there is a Spark context already launched.
  warnings.warn(
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/wei.liu/oss-spark/python/pyspark/testing/utils.py", line 936, in assertDataFrameEqual
    actual_list = actual.collect()
  File "/Users/wei.liu/oss-spark/python/pyspark/sql/dataframe.py", line 1453, in collect
    sock_info = self._jdf.collectToPython()
  File "/Users/wei.liu/oss-spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__
  File "/Users/wei.liu/oss-spark/python/pyspark/errors/exceptions/captured.py", line 221, in deco
    raise converted from None
pyspark.errors.exceptions.captured.AnalysisException: Queries with streaming sources must be executed with writeStream.start();
rate
```
Because the function calls `collect` which is not supported on streaming dataframes. It'd be good if we can catch this earlier.

### Why are the changes needed?

Improve usability

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test

### Was this patch authored or co-authored using generative AI tooling?

Generated-by: Github Copilot
It helped me to pick the error class UNSUPPORTED_OPERATION

Closes #45395 from WweiL/assertDFEqual-3.5.

Authored-by: Wei Liu <wei.liu@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_utils.py | 12 ++++++++++++
 python/pyspark/testing/utils.py        | 10 ++++++++++
 2 files changed, 22 insertions(+)

diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index e1b7f298d0a8b..dc25fc116a8bd 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -1611,6 +1611,18 @@ def test_list_row_unequal_schema(self):
             message_parameters={"error_msg": error_msg},
         )
 
+    def test_assert_data_frame_equal_not_support_streaming(self):
+        df1 = self.spark.readStream.format("rate").load()
+        df2 = self.spark.readStream.format("rate").load()
+        exception_thrown = False
+        try:
+            assertDataFrameEqual(df1, df2)
+        except PySparkAssertionError as e:
+            self.assertEqual(e.getErrorClass(), "UNSUPPORTED_OPERATION")
+            exception_thrown = True
+
+        self.assertTrue(exception_thrown)
+
 
 class UtilsTests(ReusedSQLTestCase, UtilsTestsMixin):
     def test_capture_analysis_exception(self):
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index 7dd723634e2f9..aa927ed1c4c2b 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -587,11 +587,21 @@ def assert_rows_equal(rows1: List[Row], rows2: List[Row]):
         assertSchemaEqual(actual.schema, expected.schema)
 
     if not isinstance(actual, list):
+        if actual.isStreaming:
+            raise PySparkAssertionError(
+                error_class="UNSUPPORTED_OPERATION",
+                message_parameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
+            )
         actual_list = actual.collect()
     else:
         actual_list = actual
 
     if not isinstance(expected, list):
+        if expected.isStreaming:
+            raise PySparkAssertionError(
+                error_class="UNSUPPORTED_OPERATION",
+                message_parameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
+            )
         expected_list = expected.collect()
     else:
         expected_list = expected

From 3f547694978db178ab409f8ee101efe485c6df63 Mon Sep 17 00:00:00 2001
From: JacobZheng0927 <zsh517559523@163.com>
Date: Wed, 6 Mar 2024 21:59:52 +0900
Subject: [PATCH 234/521] [SPARK-47146][CORE][FOLLOWUP] Rename incorrect logger
 name

### What changes were proposed in this pull request?
Rename incorrect logger name in `UnsafeSorterSpillReader`.

### Why are the changes needed?
The logger name in UnsafeSorterSpillReader is incorrect.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45404 from JacobZheng0927/loggerNameFix.

Authored-by: JacobZheng0927 <zsh517559523@163.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 5089140e2e6a43ffef584b42aed5cd9bc11268b6)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../util/collection/unsafe/sort/UnsafeSorterSpillReader.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index 8bd44c8c52c14..cf29835b2ce89 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -38,7 +38,7 @@
  * of the file format).
  */
 public final class UnsafeSorterSpillReader extends UnsafeSorterIterator implements Closeable {
-  private static final Logger logger = LoggerFactory.getLogger(ReadAheadInputStream.class);
+  private static final Logger logger = LoggerFactory.getLogger(UnsafeSorterSpillReader.class);
   public static final int MAX_BUFFER_SIZE_BYTES = 16777216; // 16 mb
 
   private InputStream in;

From 679f3b1e5e965a6be12823faf012d0680771a5e2 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Thu, 7 Mar 2024 15:11:15 +0900
Subject: [PATCH 235/521] [SPARK-47305][SQL] Fix PruneFilters to tag the
 isStreaming flag of LocalRelation correctly when the plan has both batch and
 streaming

### What changes were proposed in this pull request?

This PR proposes to fix PruneFilters to tag the isStreaming flag of LocalRelation correctly when the plan has both batch and streaming.

### Why are the changes needed?

When filter is evaluated to be always false, PruneFilters replaces the filter with empty LocalRelation, which effectively prunes filter. The logic cares about migration of the isStreaming flag, but incorrectly migrated in some case, via picking up the value of isStreaming flag from root node rather than filter (or child).

isStreaming flag is true if the value of isStreaming flag from any of children is true. Flipping the coin, some children might have isStreaming flag as "false". If the filter being pruned is a descendant to such children (in other word, ancestor of streaming node), LocalRelation is incorrectly tagged as streaming where it should be batch.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New UT verifying the fix. The new UT fails without this PR and passes with this PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45406 from HeartSaVioR/SPARK-47305.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 8d6bd9bbd29da6023e5740b622e12c7e1f8581ce)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  4 +-
 .../PropagateEmptyRelationSuite.scala         | 43 ++++++++++++++++++-
 2 files changed, 43 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 04d3eb962ed44..239682ab1f847 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1668,9 +1668,9 @@ object PruneFilters extends Rule[LogicalPlan] with PredicateHelper {
     // If the filter condition always evaluate to null or false,
     // replace the input with an empty relation.
     case Filter(Literal(null, _), child) =>
-      LocalRelation(child.output, data = Seq.empty, isStreaming = plan.isStreaming)
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
     case Filter(Literal(false, BooleanType), child) =>
-      LocalRelation(child.output, data = Seq.empty, isStreaming = plan.isStreaming)
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
     // If any deterministic condition is guaranteed to be true given the constraints on the child's
     // output, remove the condition
     case f @ Filter(fc, p: LogicalPlan) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index e8d2ca1ff75de..5aeb27f7ee6b4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -21,10 +21,10 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Literal, UnspecifiedFrame}
+import org.apache.spark.sql.catalyst.expressions.{EqualTo, Literal, UnspecifiedFrame}
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{Expand, LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Expand, Filter, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder}
@@ -222,6 +222,45 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
+  test("SPARK-47305 correctly tag isStreaming when propagating empty relation " +
+    "with the plan containing batch and streaming") {
+    val data = Seq(Row(1))
+
+    val outputForStream = Seq($"a".int)
+    val schemaForStream = DataTypeUtils.fromAttributes(outputForStream)
+    val converterForStream = CatalystTypeConverters.createToCatalystConverter(schemaForStream)
+
+    val outputForBatch = Seq($"b".int)
+    val schemaForBatch = DataTypeUtils.fromAttributes(outputForBatch)
+    val converterForBatch = CatalystTypeConverters.createToCatalystConverter(schemaForBatch)
+
+    val streamingRelation = LocalRelation(
+      outputForStream,
+      data.map(converterForStream(_).asInstanceOf[InternalRow]),
+      isStreaming = true)
+    val batchRelation = LocalRelation(
+      outputForBatch,
+      data.map(converterForBatch(_).asInstanceOf[InternalRow]),
+      isStreaming = false)
+
+    val query = streamingRelation
+      .join(batchRelation.where(false).select($"b"), LeftOuter,
+        Some(EqualTo($"a", $"b")))
+
+    val analyzedQuery = query.analyze
+
+    val optimized = Optimize.execute(analyzedQuery)
+    // This is to deal with analysis for join condition. We expect the analyzed plan to contain
+    // filter and projection in batch relation, and know they will go away after optimization.
+    // The point to check here is that the node is replaced with "empty" LocalRelation, but the
+    // flag `isStreaming` is properly propagated.
+    val correctAnswer = analyzedQuery transform {
+      case Project(_, Filter(_, l: LocalRelation)) => l.copy(data = Seq.empty)
+    }
+
+    comparePlans(optimized, correctAnswer)
+  }
+
   test("don't propagate empty streaming relation through agg") {
     val output = Seq($"a".int)
     val data = Seq(Row(1))

From 37307fb0d14e03b1085ed12d8d540d2606bf1e9d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 7 Mar 2024 17:02:09 +0800
Subject: [PATCH 236/521] [SPARK-47241][SQL] Fix rule order issues for
 ExtractGenerator

### What changes were proposed in this pull request?

The rule `ExtractGenerator` does not define any trigger condition when rewriting generator functions in `Project`, which makes the behavior quite unstable and heavily depends on the execution order of analyzer rules.

Two bugs I've found so far:
1. By design, we want to forbid users from using more than one generator function in SELECT. However, we can't really enforce it if two generator functions are not resolved at the same time: the rule thinks there is only one generate function (the other is still unresolved), then rewrite it. The other one gets resolved later and gets rewritten later.
2. When a generator function is put after `SELECT *`, it's possible that `*` is not expanded yet when we enter `ExtractGenerator`. The rule rewrites the generator function: insert a `Generate` operator below, and add a new column to the projectList for the generator function output. Then we expand `*` to the child plan output which is `Generate`, we end up with two identical columns for the generate function output.

This PR fixes it by adding a trigger condition when rewriting generator functions in `Project`: the projectList should be resolved or a generator function. This is the same trigger condition we used for `Aggregate`. To avoid breaking changes, this PR also allows multiple generator functions in `Project`, which works totally fine.
### Why are the changes needed?

bug fix

### Does this PR introduce _any_ user-facing change?

Yes, now multiple generator functions are allowed in `Project`. And there won't be duplicated columns for generator function output.

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45350 from cloud-fan/generate.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 51f4cfa7560bba576577d3a5f254daaad516849d)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/resources/error/error-classes.json   |  2 +-
 ...tions-unsupported-generator-error-class.md |  2 +-
 .../sql/catalyst/analysis/Analyzer.scala      | 43 ++++++++++++-------
 .../sql/catalyst/analysis/CheckAnalysis.scala | 10 -----
 .../sql/errors/QueryCompilationErrors.scala   |  3 +-
 .../analysis/AnalysisErrorSuite.scala         | 14 +-----
 .../org/apache/spark/sql/DataFrameSuite.scala | 14 ------
 .../spark/sql/GeneratorFunctionSuite.scala    | 27 +++++++++++-
 .../errors/QueryCompilationErrorsSuite.scala  | 12 ------
 .../sql/hive/execution/HiveQuerySuite.scala   | 22 ----------
 10 files changed, 57 insertions(+), 92 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index 2d50fe1a1a1a8..b9d4c2c297f84 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -3056,7 +3056,7 @@
     "subClass" : {
       "MULTI_GENERATOR" : {
         "message" : [
-          "only one generator allowed per <clause> clause but found <num>: <generators>."
+          "only one generator allowed per SELECT clause but found <num>: <generators>."
         ]
       },
       "NESTED_IN_EXPRESSIONS" : {
diff --git a/docs/sql-error-conditions-unsupported-generator-error-class.md b/docs/sql-error-conditions-unsupported-generator-error-class.md
index 7960c14767d17..38b3bbfaa3c3c 100644
--- a/docs/sql-error-conditions-unsupported-generator-error-class.md
+++ b/docs/sql-error-conditions-unsupported-generator-error-class.md
@@ -27,7 +27,7 @@ This error class has the following derived error classes:
 
 ## MULTI_GENERATOR
 
-only one generator allowed per `<clause>` clause but found `<num>`: `<generators>`.
+only one generator allowed per SELECT clause but found `<num>`: `<generators>`.
 
 ## NESTED_IN_EXPRESSIONS
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 8fe87a05d02d3..eae150001249e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -2742,28 +2742,36 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       }
     }
 
+    // We must wait until all expressions except for generator functions are resolved before
+    // rewriting generator functions in Project/Aggregate. This is necessary to make this rule
+    // stable for different execution orders of analyzer rules. See also SPARK-47241.
+    private def canRewriteGenerator(namedExprs: Seq[NamedExpression]): Boolean = {
+      namedExprs.forall { ne =>
+        ne.resolved || {
+          trimNonTopLevelAliases(ne) match {
+            case AliasedGenerator(_, _, _) => true
+            case _ => false
+          }
+        }
+      }
+    }
+
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsPattern(GENERATOR), ruleId) {
       case Project(projectList, _) if projectList.exists(hasNestedGenerator) =>
         val nestedGenerator = projectList.find(hasNestedGenerator).get
         throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
-      case Project(projectList, _) if projectList.count(hasGenerator) > 1 =>
-        val generators = projectList.filter(hasGenerator).map(trimAlias)
-        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "SELECT")
-
       case Aggregate(_, aggList, _) if aggList.exists(hasNestedGenerator) =>
         val nestedGenerator = aggList.find(hasNestedGenerator).get
         throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
       case Aggregate(_, aggList, _) if aggList.count(hasGenerator) > 1 =>
         val generators = aggList.filter(hasGenerator).map(trimAlias)
-        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "aggregate")
+        throw QueryCompilationErrors.moreThanOneGeneratorError(generators)
 
-      case agg @ Aggregate(groupList, aggList, child) if aggList.forall {
-          case AliasedGenerator(_, _, _) => true
-          case other => other.resolved
-        } && aggList.exists(hasGenerator) =>
+      case Aggregate(groupList, aggList, child) if canRewriteGenerator(aggList) &&
+          aggList.exists(hasGenerator) =>
         // If generator in the aggregate list was visited, set the boolean flag true.
         var generatorVisited = false
 
@@ -2808,16 +2816,16 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         // first for replacing `Project` with `Aggregate`.
         p
 
-      case p @ Project(projectList, child) =>
+      case p @ Project(projectList, child) if canRewriteGenerator(projectList) &&
+          projectList.exists(hasGenerator) =>
         val (resolvedGenerator, newProjectList) = projectList
           .map(trimNonTopLevelAliases)
           .foldLeft((None: Option[Generate], Nil: Seq[NamedExpression])) { (res, e) =>
             e match {
-              case AliasedGenerator(generator, names, outer) if generator.childrenResolved =>
-                // It's a sanity check, this should not happen as the previous case will throw
-                // exception earlier.
-                assert(res._1.isEmpty, "More than one generator found in SELECT.")
-
+              // If there are more than one generator, we only rewrite the first one and wait for
+              // the next analyzer iteration to rewrite the next one.
+              case AliasedGenerator(generator, names, outer) if res._1.isEmpty &&
+                  generator.childrenResolved =>
                 val g = Generate(
                   generator,
                   unrequiredChildIndex = Nil,
@@ -2825,7 +2833,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
                   qualifier = None,
                   generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names),
                   child)
-
                 (Some(g), res._2 ++ g.nullableOutput)
               case other =>
                 (res._1, res._2 :+ other)
@@ -2845,6 +2852,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       case u: UnresolvedTableValuedFunction => u
 
+      case p: Project => p
+
+      case a: Aggregate => a
+
       case p if p.expressions.exists(hasGenerator) =>
         throw QueryCompilationErrors.generatorOutsideSelectError(p)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 533ea8a2b7998..7f10bdbc80ca9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -64,12 +64,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       messageParameters = messageParameters)
   }
 
-  protected def containsMultipleGenerators(exprs: Seq[Expression]): Boolean = {
-    exprs.flatMap(_.collect {
-      case e: Generator => e
-    }).length > 1
-  }
-
   protected def hasMapType(dt: DataType): Boolean = {
     dt.existsRecursively(_.isInstanceOf[MapType])
   }
@@ -687,10 +681,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 ))
             }
 
-          case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
-            val generators = exprs.filter(expr => expr.exists(_.isInstanceOf[Generator]))
-            throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "SELECT")
-
           case p @ Project(projectList, _) =>
             projectList.foreach(_.transformDownWithPruning(
               _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION)) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 9dca2c5f2822e..a78e092c4bfa9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -248,11 +248,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("expression" -> toSQLExpr(trimmedNestedGenerator)))
   }
 
-  def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = {
+  def moreThanOneGeneratorError(generators: Seq[Expression]): Throwable = {
     new AnalysisException(
       errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
       messageParameters = Map(
-        "clause" -> clause,
         "num" -> generators.size.toString,
         "generators" -> generators.map(toSQLExpr).mkString(", ")))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index e2e980073307d..e8dc9061199cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -344,11 +344,6 @@ class AnalysisErrorSuite extends AnalysisTest {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
-  errorTest(
-    "too many generators",
-    listRelation.select(Explode($"list").as("a"), Explode($"list").as("b")),
-    "only one generator" :: "explode" :: Nil)
-
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),
@@ -754,18 +749,11 @@ class AnalysisErrorSuite extends AnalysisTest {
     "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
     Map("limit" -> "1000000000", "offset" -> "2000000000"))
 
-  errorTest(
-    "more than one generators in SELECT",
-    listRelation.select(Explode($"list"), Explode($"list")),
-    "The generator is not supported: only one generator allowed per select clause but found 2: " +
-      """"explode(list)", "explode(list)"""" :: Nil
-  )
-
   errorTest(
     "more than one generators for aggregates in SELECT",
     testRelation.select(Explode(CreateArray(min($"a") :: Nil)),
       Explode(CreateArray(max($"a") :: Nil))),
-    "The generator is not supported: only one generator allowed per select clause but found 2: " +
+    "The generator is not supported: only one generator allowed per SELECT clause but found 2: " +
       """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil
   )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 002719f06896d..c586da6105fde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -368,20 +368,6 @@ class DataFrameSuite extends QueryTest
       Row("a", Seq("a"), 1) :: Nil)
   }
 
-  test("more than one generator in SELECT clause") {
-    val df = Seq((Array("a"), 1)).toDF("a", "b")
-
-    checkError(
-      exception = intercept[AnalysisException] {
-        df.select(explode($"a").as("a"), explode($"a").as("b"))
-      },
-      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
-      parameters = Map(
-        "clause" -> "SELECT",
-        "num" -> "2",
-        "generators" -> "\"explode(a)\", \"explode(a)\""))
-  }
-
   test("sort after generate with join=true") {
     val df = Seq((Array("a"), 1)).toDF("a", "b")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 0746a4b92af29..7c285759fcd9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -432,7 +432,6 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
         },
         errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
         parameters = Map(
-          "clause" -> "aggregate",
           "num" -> "2",
           "generators" -> ("\"explode(array(min(c2), max(c2)))\", " +
             "\"posexplode(array(min(c2), max(c2)))\"")))
@@ -543,6 +542,32 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       checkAnswer(df, Row(0.7604953758285915d))
     }
   }
+
+  test("SPARK-47241: two generator functions in SELECT") {
+    def testTwoGenerators(needImplicitCast: Boolean): Unit = {
+      val df = sql(
+        s"""
+          |SELECT
+          |explode(array('a', 'b')) as c1,
+          |explode(array(0L, ${if (needImplicitCast) "0L + 1" else "1L"})) as c2
+          |""".stripMargin)
+      checkAnswer(df, Seq(Row("a", 0L), Row("a", 1L), Row("b", 0L), Row("b", 1L)))
+    }
+    testTwoGenerators(needImplicitCast = true)
+    testTwoGenerators(needImplicitCast = false)
+  }
+
+  test("SPARK-47241: generator function after wildcard in SELECT") {
+    val df = sql(
+      s"""
+         |SELECT *, explode(array('a', 'b')) as c1
+         |FROM
+         |(
+         |  SELECT id FROM range(1) GROUP BY 1
+         |)
+         |""".stripMargin)
+    checkAnswer(df, Seq(Row(0, "a"), Row(0, "b")))
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 7f938deaaa645..ac57c958828b6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -646,18 +646,6 @@ class QueryCompilationErrorsSuite
       parameters = Map("expression" -> "\"(explode(array(1, 2, 3)) + 1)\""))
   }
 
-  test("UNSUPPORTED_GENERATOR: only one generator allowed") {
-    val e = intercept[AnalysisException](
-      sql("""select explode(Array(1, 2, 3)), explode(Array(1, 2, 3))""").collect()
-    )
-
-    checkError(
-      exception = e,
-      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
-      parameters = Map("clause" -> "SELECT", "num" -> "2",
-        "generators" -> "\"explode(array(1, 2, 3))\", \"explode(array(1, 2, 3))\""))
-  }
-
   test("UNSUPPORTED_GENERATOR: generators are not supported outside the SELECT clause") {
     val e = intercept[AnalysisException](
       sql("""select 1 from t order by explode(Array(1, 2, 3))""").collect()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 82b88ec9f35d6..4b85b37b6c2c6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -161,28 +161,6 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       |  SELECT key FROM gen_tmp ORDER BY key ASC;
     """.stripMargin)
 
-  test("multiple generators in projection") {
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT explode(array(key, key)), explode(array(key, key)) FROM src").collect()
-      },
-      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
-      parameters = Map(
-        "clause" -> "SELECT",
-        "num" -> "2",
-        "generators" -> "\"explode(array(key, key))\", \"explode(array(key, key))\""))
-
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT explode(array(key, key)) as k1, explode(array(key, key)) FROM src").collect()
-      },
-      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
-      parameters = Map(
-        "clause" -> "SELECT",
-        "num" -> "2",
-        "generators" -> "\"explode(array(key, key))\", \"explode(array(key, key))\""))
-  }
-
   createQueryTest("! operator",
     """
       |SELECT a FROM (

From d7ce89ab959a0122fa915832901b2383f1b335ea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20Kimborowicz?= <michal.kimbor@gmail.com>
Date: Thu, 7 Mar 2024 19:34:22 +0800
Subject: [PATCH 237/521] [MINOR][DOCS][PYTHON] Fix documentation typo in
 takeSample method
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Fixed an error in the docstring documentation for the parameter `withReplacement` of `takeSample` method in `pyspark.RDD`, should be of type `bool`, but is `list` instead.
https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.RDD.takeSample.html

### Why are the changes needed?
They correct a mistake in the documentation.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
\-

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45419 from kimborowicz/master.

Authored-by: Michał Kimborowicz <michal.kimbor@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 7a429aa84a5ed2c4b6448d43e88c475919ea2210)
Signed-off-by: Kent Yao <yao@apache.org>
---
 python/pyspark/rdd.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 8ea9a31022298..aa63c6509dce8 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1120,7 +1120,7 @@ def takeSample(
 
         Parameters
         ----------
-        withReplacement : list
+        withReplacement : bool
             whether sampling is done with replacement
         num : int
             size of the returned sample

From 6629b9a6a5ae35db486dc69ce1ce5a86246daf1d Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 12 Mar 2024 15:11:34 -0700
Subject: [PATCH 238/521] [SPARK-47370][DOC] Add migration doc: TimestampNTZ
 type inference on Parquet files

### What changes were proposed in this pull request?

Add migration doc: TimestampNTZ type inference on Parquet files

### Why are the changes needed?

Update docs. The behavior change was not mentioned in the SQL migration guide

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

It's just doc change
### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45482 from gengliangwang/ntzMigrationDoc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 621f2c88f3e56257ee517d65e093d32fb44b783e)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/sql-migration-guide.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 2eba9500e907e..0e54c33c6d125 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -49,6 +49,7 @@ license: |
   - Since Spark 3.4, vectorized readers are enabled by default for the nested data types (array, map and struct). To restore the legacy behavior, set `spark.sql.orc.enableNestedColumnVectorizedReader` and `spark.sql.parquet.enableNestedColumnVectorizedReader` to `false`.
   - Since Spark 3.4, `BinaryType` is not supported in CSV datasource. In Spark 3.3 or earlier, users can write binary columns in CSV datasource, but the output content in CSV files is `Object.toString()` which is meaningless; meanwhile, if users read CSV tables with binary columns, Spark will throw an `Unsupported type: binary` exception.
   - Since Spark 3.4, bloom filter joins are enabled by default. To restore the legacy behavior, set `spark.sql.optimizer.runtime.bloomFilter.enabled` to `false`.
+  - Since Spark 3.4, when schema inference on external Parquet files, INT64 timestamps with annotation `isAdjustedToUTC=false` will be inferred as TimestampNTZ type instead of Timestamp type. To restore the legacy behavior, set `spark.sql.parquet.inferTimestampNTZ.enabled` to `false`.
 
 ## Upgrading from Spark SQL 3.2 to 3.3
 

From 3018a5d8cd96a569b3bfe7e11b4b26fb4fb54f32 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 12 Mar 2024 22:42:45 -0700
Subject: [PATCH 239/521] =?UTF-8?q?[SPARK-47368][SQL]][3.5]=20Remove=20inf?=
 =?UTF-8?q?erTimestampNTZ=20config=20check=20in=20ParquetRo=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

The configuration `spark.sql.parquet.inferTimestampNTZ.enabled` is not related the parquet row converter.  This PR is the remove the config check `spark.sql.parquet.inferTimestampNTZ.enabled` in the ParquetRowConverter

### Why are the changes needed?

Bug fix.  Otherwise reading TimestampNTZ columns may fail when `spark.sql.parquet.inferTimestampNTZ.enabled` is disabled.
### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

New UT

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45492 from gengliangwang/PR_TOOL_PICK_PR_45480_BRANCH-3.5.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../parquet/ParquetRowConverter.scala         |  9 +++--
 .../parquet/ParquetSchemaConverter.scala      |  7 ----
 .../parquet/ParquetQuerySuite.scala           | 36 +++++++++++--------
 3 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index e257be3d189aa..f534669d58c81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -509,11 +509,10 @@ private[parquet] class ParquetRowConverter(
   // can be read as Spark's TimestampNTZ type. This is to avoid mistakes in reading the timestamp
   // values.
   private def canReadAsTimestampNTZ(parquetType: Type): Boolean =
-    schemaConverter.isTimestampNTZEnabled() &&
-      parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 &&
-      parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] &&
-      !parquetType.getLogicalTypeAnnotation
-        .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC
+    parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 &&
+    parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] &&
+    !parquetType.getLogicalTypeAnnotation
+      .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC
 
   /**
    * Parquet converter for strings. A dictionary is used to minimize string decoding cost.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index 9c9e7ce729c1b..a78b96ae6fcc5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -72,13 +72,6 @@ class ParquetToSparkSchemaConverter(
     inferTimestampNTZ = conf.get(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key).toBoolean,
     nanosAsLong = conf.get(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key).toBoolean)
 
-  /**
-   * Returns true if TIMESTAMP_NTZ type is enabled in this ParquetToSparkSchemaConverter.
-   */
-  def isTimestampNTZEnabled(): Boolean = {
-    inferTimestampNTZ
-  }
-
   /**
    * Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL [[StructType]].
    */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 828ec39c7d727..29cb224c8787c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -160,21 +160,27 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     }
   }
 
-  test("SPARK-36182: writing and reading TimestampNTZType column") {
-    withTable("ts") {
-      sql("create table ts (c1 timestamp_ntz) using parquet")
-      sql("insert into ts values (timestamp_ntz'2016-01-01 10:11:12.123456')")
-      sql("insert into ts values (null)")
-      sql("insert into ts values (timestamp_ntz'1965-01-01 10:11:12.123456')")
-      val expectedSchema = new StructType().add(StructField("c1", TimestampNTZType))
-      assert(spark.table("ts").schema == expectedSchema)
-      val expected = Seq(
-        ("2016-01-01 10:11:12.123456"),
-        (null),
-        ("1965-01-01 10:11:12.123456"))
-        .toDS().select($"value".cast("timestamp_ntz"))
-      withAllParquetReaders {
-        checkAnswer(sql("select * from ts"), expected)
+  test("SPARK-36182, SPARK-47368: writing and reading TimestampNTZType column") {
+    Seq("true", "false").foreach { inferNTZ =>
+      // The SQL Conf PARQUET_INFER_TIMESTAMP_NTZ_ENABLED should not affect the file written
+      // by Spark.
+      withSQLConf(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> inferNTZ) {
+        withTable("ts") {
+          sql("create table ts (c1 timestamp_ntz) using parquet")
+          sql("insert into ts values (timestamp_ntz'2016-01-01 10:11:12.123456')")
+          sql("insert into ts values (null)")
+          sql("insert into ts values (timestamp_ntz'1965-01-01 10:11:12.123456')")
+          val expectedSchema = new StructType().add(StructField("c1", TimestampNTZType))
+          assert(spark.table("ts").schema == expectedSchema)
+          val expected = Seq(
+            ("2016-01-01 10:11:12.123456"),
+            (null),
+            ("1965-01-01 10:11:12.123456"))
+            .toDS().select($"value".cast("timestamp_ntz"))
+          withAllParquetReaders {
+            checkAnswer(sql("select * from ts"), expected)
+          }
+        }
       }
     }
   }

From 706f54f69fe797027b5fcf1cfb4867811fb41c3d Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 13 Mar 2024 21:00:35 -0700
Subject: [PATCH 240/521] [SPARK-47375][DOC][FOLLOWUP] Correct the
 preferTimestampNTZ option description in JDBC doc

### What changes were proposed in this pull request?

Correct the preferTimestampNTZ option description in JDBC doc as per https://github.com/apache/spark/pull/45496

### Why are the changes needed?

The current doc is wrong about the jdbc option preferTimestampNTZ

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Just doc change
### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45502 from gengliangwang/ntzJdbc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit abfbd2718159d62e3322cca8c2d4ef1c29781b21)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 docs/sql-data-sources-jdbc.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index edcdef4bf0084..d794116091fe0 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -368,8 +368,9 @@ logging into the data sources.
     <td><code>preferTimestampNTZ</code></td>
     <td>false</td>
     <td>
-      When the option is set to <code>true</code>, all timestamps are inferred as TIMESTAMP WITHOUT TIME ZONE.
-      Otherwise, timestamps are read as TIMESTAMP with local time zone.
+      When the option is set to <code>true</code>, TIMESTAMP WITHOUT TIME ZONE type are inferred as Spark's TimestampNTZ type.
+      Otherwise, it is interpreted as Spark's Timestamp type(equivalent to TIMESTAMP WITHOUT LOCAL TIME ZONE).
+      This setting specifically affects only the inference of TIMESTAMP WITHOUT TIME ZONE data type. Both TIMESTAMP WITHOUT LOCAL TIME ZONE and TIMESTAMP WITH TIME ZONE data types are consistently interpreted as Spark's Timestamp type regardless of this setting.
     </td>
     <td>read</td>
   </tr>

From e3317a7cc66cc9ce893f54ebf19a038d32026c4a Mon Sep 17 00:00:00 2001
From: Chenhao Li <chenhao.li@databricks.com>
Date: Thu, 14 Mar 2024 14:27:36 +0800
Subject: [PATCH 241/521] [SPARK-47385] Fix tuple encoders with Option inputs

## What changes were proposed in this pull request?

https://github.com/apache/spark/pull/40755  adds a null check on the input of the child deserializer in the tuple encoder. It breaks the deserializer for the `Option` type, because null should be deserialized into `None` rather than null. This PR adds a boolean parameter to `ExpressionEncoder.tuple` so that only the user that https://github.com/apache/spark/pull/40755 intended to fix has this null check.

## How was this patch tested?

Unit test.

Closes #45508 from chenhao-db/SPARK-47385.

Authored-by: Chenhao Li <chenhao.li@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 9986462811f160eacd766da8a4e14a9cbb4b8710)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/encoders/ExpressionEncoder.scala    | 10 ++++++++--
 .../main/scala/org/apache/spark/sql/Dataset.scala    |  4 +++-
 .../scala/org/apache/spark/sql/DatasetSuite.scala    | 12 ++++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index ff72b5a0d9653..6dc89bb4d4b1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -73,8 +73,14 @@ object ExpressionEncoder {
    * Given a set of N encoders, constructs a new encoder that produce objects as items in an
    * N-tuple.  Note that these encoders should be unresolved so that information about
    * name/positional binding is preserved.
+   * When `useNullSafeDeserializer` is true, the deserialization result for a child will be null if
+   * the input is null. It is false by default as most deserializers handle null input properly and
+   * don't require an extra null check. Some of them are null-tolerant, such as the deserializer for
+   * `Option[T]`, and we must not set it to true in this case.
    */
-  def tuple(encoders: Seq[ExpressionEncoder[_]]): ExpressionEncoder[_] = {
+  def tuple(
+      encoders: Seq[ExpressionEncoder[_]],
+      useNullSafeDeserializer: Boolean = false): ExpressionEncoder[_] = {
     if (encoders.length > 22) {
       throw QueryExecutionErrors.elementsOfTupleExceedLimitError()
     }
@@ -119,7 +125,7 @@ object ExpressionEncoder {
         case GetColumnByOrdinal(0, _) => input
       }
 
-      if (enc.objSerializer.nullable) {
+      if (useNullSafeDeserializer && enc.objSerializer.nullable) {
         nullSafe(input, childDeserializer)
       } else {
         childDeserializer
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c063af9381ff2..9ef48c2ec42ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1229,7 +1229,9 @@ class Dataset[T] private[sql](
         JoinHint.NONE)).analyzed.asInstanceOf[Join]
 
     implicit val tuple2Encoder: Encoder[(T, U)] =
-      ExpressionEncoder.tuple(this.exprEnc, other.exprEnc)
+      ExpressionEncoder
+        .tuple(Seq(this.exprEnc, other.exprEnc), useNullSafeDeserializer = true)
+        .asInstanceOf[Encoder[(T, U)]]
 
     withTypedPlan(JoinWith.typedJoinWith(
       joined,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index c2fe31520acf6..f32b32ffc5a5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -2504,6 +2504,18 @@ class DatasetSuite extends QueryTest
     assert(result == expected)
   }
 
+  test("SPARK-47385: Tuple encoder with Option inputs") {
+    implicit val enc: Encoder[(SingleData, Option[SingleData])] =
+      Encoders.tuple(Encoders.product[SingleData], Encoders.product[Option[SingleData]])
+
+    val input = Seq(
+      (SingleData(1), Some(SingleData(1))),
+      (SingleData(2), None)
+    )
+    val ds = spark.createDataFrame(input).as[(SingleData, Option[SingleData])]
+    checkDataset(ds, input: _*)
+  }
+
   test("SPARK-43124: Show does not trigger job execution on CommandResults") {
     withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
       withTable("t1") {

From ef7e7dbb81c745eb9a2141960c29c2ec785e0e82 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Thu, 14 Mar 2024 20:58:45 +0800
Subject: [PATCH 242/521] [SPARK-47375][DOC][FOLLOWUP] Fix a mistake in JDBC's
 preferTimestampNTZ option doc

### What changes were proposed in this pull request?

Fix a mistake in JDBC's preferTimestampNTZ option doc

### Why are the changes needed?

Fix a mistake in doc

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Just doc change
### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45510 from gengliangwang/reviseJdbcDoc.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 63b79c1eac01fe7ec88e608008916258b088aeff)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/sql-data-sources-jdbc.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index d794116091fe0..bc0573a372198 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -368,9 +368,9 @@ logging into the data sources.
     <td><code>preferTimestampNTZ</code></td>
     <td>false</td>
     <td>
-      When the option is set to <code>true</code>, TIMESTAMP WITHOUT TIME ZONE type are inferred as Spark's TimestampNTZ type.
-      Otherwise, it is interpreted as Spark's Timestamp type(equivalent to TIMESTAMP WITHOUT LOCAL TIME ZONE).
-      This setting specifically affects only the inference of TIMESTAMP WITHOUT TIME ZONE data type. Both TIMESTAMP WITHOUT LOCAL TIME ZONE and TIMESTAMP WITH TIME ZONE data types are consistently interpreted as Spark's Timestamp type regardless of this setting.
+      When the option is set to <code>true</code>, TIMESTAMP WITHOUT TIME ZONE type is inferred as Spark's TimestampNTZ type.
+      Otherwise, it is interpreted as Spark's Timestamp type(equivalent to TIMESTAMP WITH LOCAL TIME ZONE).
+      This setting specifically affects only the inference of TIMESTAMP WITHOUT TIME ZONE data type. Both TIMESTAMP WITH LOCAL TIME ZONE and TIMESTAMP WITH TIME ZONE data types are consistently interpreted as Spark's Timestamp type regardless of this setting.
     </td>
     <td>read</td>
   </tr>

From d59425275cdd0ff678a5bcccef4c7b74fe8170cb Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 15 Mar 2024 22:28:45 -0700
Subject: [PATCH 243/521] [SPARK-47428][BUILD][3.5] Upgrade Jetty to
 9.4.54.v20240208

### What changes were proposed in this pull request?

This PR aims to upgrade Jetty to 9.4.54.v20240208

### Why are the changes needed?

To bring the latest bug fixes.
- https://github.com/jetty/jetty.project/releases/tag/jetty-9.4.54.v20240208
- https://github.com/jetty/jetty.project/releases/tag/jetty-9.4.53.v20231009

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45543 from dongjoon-hyun/SPARK-47428.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++--
 pom.xml                               | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index c76702cd0af01..8ecf931bf513a 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -130,8 +130,8 @@ jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar
 jersey-hk2/2.40//jersey-hk2-2.40.jar
 jersey-server/2.40//jersey-server-2.40.jar
 jettison/1.1//jettison-1.1.jar
-jetty-util-ajax/9.4.52.v20230823//jetty-util-ajax-9.4.52.v20230823.jar
-jetty-util/9.4.52.v20230823//jetty-util-9.4.52.v20230823.jar
+jetty-util-ajax/9.4.54.v20240208//jetty-util-ajax-9.4.54.v20240208.jar
+jetty-util/9.4.54.v20240208//jetty-util-9.4.54.v20240208.jar
 jline/2.14.6//jline-2.14.6.jar
 joda-time/2.12.5//joda-time-2.12.5.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
diff --git a/pom.xml b/pom.xml
index 5db3c78e00eb8..fb6208777d3ff 100644
--- a/pom.xml
+++ b/pom.xml
@@ -143,7 +143,7 @@
     <parquet.version>1.13.1</parquet.version>
     <orc.version>1.9.2</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
-    <jetty.version>9.4.52.v20230823</jetty.version>
+    <jetty.version>9.4.54.v20240208</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>
     <!--

From 8c6eeb8ab0180368cc60de8b2dbae7457bee5794 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 17 Oct 2023 23:38:56 -0700
Subject: [PATCH 244/521] [SPARK-45587][INFRA] Skip UNIDOC and MIMA in `build`
 GitHub Action job

### What changes were proposed in this pull request?

This PR aims to skip `Unidoc` and `MIMA` phases in many general test pipelines. `mima` test is moved to `lint` job.

### Why are the changes needed?

By having an independent document generation and mima checking GitHub Action job, we can skip them in the following many jobs.

https://github.com/apache/spark/blob/73f9f5296e36541db78ab10c4c01a56fbc17cca8/.github/workflows/build_and_test.yml#L142-L190

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manually check the GitHub action logs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43422 from dongjoon-hyun/SPARK-45587.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index ad8685754b316..b0760a955342f 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -204,6 +204,8 @@ jobs:
       HIVE_PROFILE: ${{ matrix.hive }}
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
+      SKIP_UNIDOC: true
+      SKIP_MIMA: true
       SKIP_PACKAGING: true
     steps:
     - name: Checkout Spark repository
@@ -627,6 +629,8 @@ jobs:
       run: ./dev/check-license
     - name: Dependencies test
       run: ./dev/test-dependencies.sh
+    - name: MIMA test
+      run: ./dev/mima
     - name: Scala linter
       run: ./dev/lint-scala
     - name: Java linter

From 8049a203b8c5f2f8045701916e66cfc786e16b57 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Wed, 13 Sep 2023 15:51:27 +0800
Subject: [PATCH 245/521] [SPARK-45141][PYTHON][INFRA][TESTS] Pin
 `pyarrow==12.0.1` in CI

### What changes were proposed in this pull request?
Pin `pyarrow==12.0.1` in CI

### Why are the changes needed?
to fix test failure,  https://github.com/apache/spark/actions/runs/6167186123/job/16738683632

```
======================================================================
FAIL [0.095s]: test_from_to_pandas (pyspark.pandas.tests.data_type_ops.test_datetime_ops.DatetimeOpsTests)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/__w/spark/spark/python/pyspark/testing/pandasutils.py", line 122, in _assert_pandas_equal
    assert_series_equal(
  File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 931, in assert_series_equal
    assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
  File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 415, in assert_attr_equal
    raise_assert_detail(obj, msg, left_attr, right_attr)
  File "/usr/local/lib/python3.9/dist-packages/pandas/_testing/asserters.py", line 599, in raise_assert_detail
    raise AssertionError(msg)
AssertionError: Attributes of Series are different

Attribute "dtype" are different
[left]:  datetime64[ns]
[right]: datetime64[us]
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI and manually test

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #42897 from zhengruifeng/pin_pyarrow.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
(cherry picked from commit e3d2dfa8b514f9358823c3cb1ad6523da8a6646b)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 4 ++--
 dev/infra/Dockerfile                 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index b0760a955342f..8488540b415d5 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -258,7 +258,7 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
+        python3.8 -m pip install 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas scipy unittest-xml-reporting 'grpcio==1.56.0' 'protobuf==3.20.3'
         python3.8 -m pip list
     # Run the tests.
     - name: Run tests
@@ -684,7 +684,7 @@ jobs:
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
         python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
-        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
+        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
         apt-get update -y
         apt-get install -y ruby ruby-dev
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index d3bae836cc631..d3fcd7ab36228 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -65,7 +65,7 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht
 ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
-RUN python3.9 -m pip install numpy pyarrow 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
 
 # Add Python deps for Spark Connect.
 RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'

From cc6912ec612c30e46e1595860a5519bb1caa221b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 17 Mar 2024 15:15:50 -0700
Subject: [PATCH 246/521] [SPARK-47432][PYTHON][CONNECT][DOCS][3.5] Add
 `pyarrow` upper bound requirement, `<13.0.0`

### What changes were proposed in this pull request?

This PR aims to add `pyarrow` upper bound requirement, `<13.0.0`, to Apache Spark 3.5.x.

### Why are the changes needed?

PyArrow 13.0.0 has breaking changes mentioned by #42920 which is a part of Apache Spark 4.0.0.

### Does this PR introduce _any_ user-facing change?

No, this only clarifies the upper bound.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45553 from dongjoon-hyun/SPARK-47432.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/requirements.txt                           | 2 +-
 python/docs/source/getting_started/install.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/requirements.txt b/dev/requirements.txt
index 597417aba1f3d..0749af75aa4be 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -3,7 +3,7 @@ py4j
 
 # PySpark dependencies (optional)
 numpy
-pyarrow
+pyarrow<13.0.0
 pandas
 scipy
 plotly
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 6822285e96172..e97632a8b384b 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -157,7 +157,7 @@ Package                    Supported version Note
 ========================== ========================= ======================================================================================
 `py4j`                     >=0.10.9.7                Required
 `pandas`                   >=1.0.5                   Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
-`pyarrow`                  >=4.0.0                   Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
+`pyarrow`                  >=4.0.0,<13.0.0           Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
 `numpy`                    >=1.15                    Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL
 `grpcio`                   >=1.48,<1.57              Required for Spark Connect
 `grpcio-status`            >=1.48,<1.57              Required for Spark Connect

From bb7a6138b827975fc827813ab42a2b9074bf8d5e Mon Sep 17 00:00:00 2001
From: Huw Campbell <huw.campbell@gmail.com>
Date: Mon, 18 Mar 2024 07:38:10 -0700
Subject: [PATCH 247/521] [SPARK-47434][WEBUI] Fix `statistics` link in
 `StreamingQueryPage`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Like SPARK-24553, this PR aims to fix redirect issues (incorrect 302) when one is using proxy settings. Change the generated link to be consistent with other links and include a trailing slash

### Why are the changes needed?

When using a proxy, an invalid redirect is issued if this is not included

### Does this PR introduce _any_ user-facing change?

Only that people will be able to use these links if they are using a proxy

### How was this patch tested?

With a proxy installed I went to the location this link would generate and could go to the page, when it redirects with the link as it exists.

Edit: Further tested by building a version of our application with this patch applied, the links work now.

### Was this patch authored or co-authored using generative AI tooling?

No.

Page with working link
<img width="913" alt="Screenshot 2024-03-18 at 4 45 27 PM" src="https://github.com/apache/spark/assets/5205457/dbcd1ffc-b7e6-4f84-8ca7-602c41202bf3">

Goes correctly to
<img width="539" alt="Screenshot 2024-03-18 at 4 45 36 PM" src="https://github.com/apache/spark/assets/5205457/89111c82-b24a-4b33-895f-9c0131e8acb5">

Before it would redirect and we'd get a 404.

<img width="639" alt="image" src="https://github.com/apache/spark/assets/5205457/1adfeba1-a1f6-4c35-9c39-e077c680baef">

Closes #45527 from HuwCampbell/patch-1.

Authored-by: Huw Campbell <huw.campbell@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 9b466d329c3c75e89b80109755a41c2d271b8acc)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
index 7cd7db4088ac9..ce3e7cde01b7a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
@@ -174,7 +174,7 @@ private[ui] class StreamingQueryPagedTable(
 
   override def row(query: StructuredStreamingRow): Seq[Node] = {
     val streamingQuery = query.streamingUIData
-    val statisticsLink = "%s/%s/statistics?id=%s"
+    val statisticsLink = "%s/%s/statistics/?id=%s"
       .format(SparkUIUtils.prependBaseUri(request, parent.basePath), parent.prefix,
         streamingQuery.summary.runId)
 

From 3857c16be81f568cc5caf81f65941109bf5f2939 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 19 Mar 2024 13:37:28 +0800
Subject: [PATCH 248/521] [SPARK-47435][SPARK-45561][SQL][3.5] Fix overflow
 issue of MySQL UNSIGNED TINYINT caused by

### What changes were proposed in this pull request?

SPARK-45561 mapped java.sql.Types.TINYINT to ByteType in MySQL Dialect, which caused unsigned TINYINT overflow. As regardless of signed or unsigned types, the TINYINT is used for java.sql.Types.
In this PR, we put the signed info into the metadata for mapping TINYINT to short or byte.

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

Uses can read MySQL UNSIGNED TINYINT values after this PR like versions before 3.5.0 which has breaked since 3.5.1

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #45579 from yaooqinn/SPARK-47435-B.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/jdbc/MySQLIntegrationSuite.scala      |  9 ++--
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |  9 ++--
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |  6 ++-
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   | 14 +++--
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  |  9 ++--
 .../jdbc/v2/PostgresIntegrationSuite.scala    |  9 ++--
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 28 ++++++----
 .../datasources/jdbc/JdbcUtils.scala          |  6 +--
 .../apache/spark/sql/jdbc/MySQLDialect.scala  | 10 ++--
 .../v2/jdbc/JDBCTableCatalogSuite.scala       | 54 +++++++++++--------
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 26 +++++----
 11 files changed, 115 insertions(+), 65 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 20fdc965874ff..dcf4225d522d1 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -56,10 +56,11 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
 
     conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), "
       + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, "
-      + "dbl DOUBLE, tiny TINYINT)").executeUpdate()
+      + "dbl DOUBLE, tiny TINYINT, u_tiny TINYINT UNSIGNED)").executeUpdate()
+
     conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', "
       + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
-      + "42.75, 1.0000000000000002, -128)").executeUpdate()
+      + "42.75, 1.0000000000000002, -128, 255)").executeUpdate()
 
     conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
       + "yr YEAR)").executeUpdate()
@@ -89,7 +90,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     val rows = df.collect()
     assert(rows.length == 1)
     val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 10)
+    assert(types.length == 11)
     assert(types(0).equals("class java.lang.Boolean"))
     assert(types(1).equals("class java.lang.Long"))
     assert(types(2).equals("class java.lang.Integer"))
@@ -100,6 +101,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(types(7).equals("class java.lang.Double"))
     assert(types(8).equals("class java.lang.Double"))
     assert(types(9).equals("class java.lang.Byte"))
+    assert(types(10).equals("class java.lang.Short"))
     assert(rows(0).getBoolean(0) == false)
     assert(rows(0).getLong(1) == 0x225)
     assert(rows(0).getInt(2) == 17)
@@ -111,6 +113,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(rows(0).getDouble(7) == 42.75)
     assert(rows(0).getDouble(8) == 1.0000000000000002)
     assert(rows(0).getByte(9) == 0x80.toByte)
+    assert(rows(0).getShort(10) == 0xff.toShort)
   }
 
   test("Date types") {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 661b1277e9f03..9a78244f53266 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -85,11 +85,13 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    var expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE DOUBLE")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", DoubleType, true, defaultMetadata)
+    expectedSchema = new StructType()
+      .add("ID", DoubleType, true, defaultMetadata(DoubleType))
     assert(t.schema === expectedSchema)
     // Update column type from DOUBLE to STRING
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE VARCHAR(10)"
@@ -112,7 +114,8 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
     sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('CCSID'='UNICODE')")
     val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    val expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index fc93f5cba4c03..e451cc2b8c52a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -93,11 +93,13 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    var expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    expectedSchema = new StructType()
+      .add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     // Update column type from STRING to INTEGER
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 5e340f135c85d..719b858b87b63 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -83,6 +83,11 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
 
   private var mySQLVersion = -1
 
+  override def defaultMetadata(dataType: DataType = StringType): Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putBoolean("isSigned", true)
+    .build()
+
   override def tablePreparation(connection: Connection): Unit = {
     mySQLVersion = connection.getMetaData.getDatabaseMajorVersion
     connection.prepareStatement(
@@ -93,11 +98,13 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    var expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    expectedSchema = new StructType()
+      .add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     // Update column type from STRING to INTEGER
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER"
@@ -145,7 +152,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')")
     val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    val expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 6b5dd043a617f..33ce55b1c6c9f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -87,8 +87,9 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
       s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
   }
 
-  override val defaultMetadata: Metadata = new MetadataBuilder()
+  override def defaultMetadata(dataType: DataType): Metadata = new MetadataBuilder()
     .putLong("scale", 0)
+    .putBoolean("isSigned", dataType.isInstanceOf[NumericType] || dataType.isInstanceOf[StringType])
     .putString(CHAR_VARCHAR_TYPE_STRING_METADATA_KEY, "varchar(255)")
     .build()
 
@@ -110,11 +111,13 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", DecimalType(10, 0), true, super.defaultMetadata)
+    var expectedSchema = new StructType()
+      .add("ID", DecimalType(10, 0), true, super.defaultMetadata(DecimalType(10, 0)))
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE LONG")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", DecimalType(19, 0), true, super.defaultMetadata)
+    expectedSchema = new StructType()
+      .add("ID", DecimalType(19, 0), true, super.defaultMetadata(DecimalType(19, 0)))
     assert(t.schema === expectedSchema)
     // Update column type from LONG to INTEGER
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 85e85f8bf3803..d2a18ff96b44b 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -64,11 +64,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
   override def testUpdateColumnType(tbl: String): Unit = {
     sql(s"CREATE TABLE $tbl (ID INTEGER)")
     var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    var expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
     t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    expectedSchema = new StructType()
+      .add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     // Update column type from STRING to INTEGER
     val sql1 = s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER"
@@ -91,7 +93,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     sql(s"CREATE TABLE $tbl (ID INT)" +
       s" TBLPROPERTIES('TABLESPACE'='pg_default')")
     val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    val expectedSchema = new StructType()
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === expectedSchema)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 99f435611f2c4..b8671455ac6f8 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -49,18 +49,21 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   def notSupportsTableComment: Boolean = false
 
-  def defaultMetadata: Metadata = new MetadataBuilder().putLong("scale", 0).build()
+  def defaultMetadata(dataType: DataType = StringType): Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putBoolean("isSigned", dataType.isInstanceOf[NumericType])
+    .build()
 
   def testUpdateColumnNullability(tbl: String): Unit = {
     sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)")
     var t = spark.table(s"$catalogName.alt_table")
     // nullable is true in the expectedSchema because Spark always sets nullable to true
     // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
-    var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
     t = spark.table(s"$catalogName.alt_table")
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     // Update nullability of not existing column
     val msg = intercept[AnalysisException] {
@@ -72,8 +75,9 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
   def testRenameColumn(tbl: String): Unit = {
     sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
     val t = spark.table(s"$tbl")
-    val expectedSchema = new StructType().add("RENAMED", StringType, true, defaultMetadata)
-      .add("ID1", StringType, true, defaultMetadata).add("ID2", StringType, true, defaultMetadata)
+    val expectedSchema = new StructType().add("RENAMED", StringType, true, defaultMetadata())
+      .add("ID1", StringType, true, defaultMetadata())
+      .add("ID2", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
   }
 
@@ -83,16 +87,19 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     withTable(s"$catalogName.alt_table") {
       sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)")
       var t = spark.table(s"$catalogName.alt_table")
-      var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+      var expectedSchema = new StructType()
+        .add("ID", StringType, true, defaultMetadata())
       assert(t.schema === expectedSchema)
       sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
       t = spark.table(s"$catalogName.alt_table")
-      expectedSchema = expectedSchema.add("C1", StringType, true, defaultMetadata)
-        .add("C2", StringType, true, defaultMetadata)
+      expectedSchema = expectedSchema
+        .add("C1", StringType, true, defaultMetadata())
+        .add("C2", StringType, true, defaultMetadata())
       assert(t.schema === expectedSchema)
       sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 STRING)")
       t = spark.table(s"$catalogName.alt_table")
-      expectedSchema = expectedSchema.add("C3", StringType, true, defaultMetadata)
+      expectedSchema = expectedSchema
+        .add("C3", StringType, true, defaultMetadata())
       assert(t.schema === expectedSchema)
       // Add already existing column
       checkError(
@@ -125,7 +132,8 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1")
       sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3")
       val t = spark.table(s"$catalogName.alt_table")
-      val expectedSchema = new StructType().add("C2", StringType, true, defaultMetadata)
+      val expectedSchema = new StructType()
+        .add("C2", StringType, true, defaultMetadata())
       assert(t.schema === expectedSchema)
       // Drop not existing column
       val msg = intercept[AnalysisException] {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 6e7298710a5d8..3521a50cd2dd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -269,6 +269,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
     val fields = new Array[StructField](ncols)
     var i = 0
     while (i < ncols) {
+      val metadata = new MetadataBuilder()
       val columnName = rsmd.getColumnLabel(i + 1)
       val dataType = rsmd.getColumnType(i + 1)
       val typeName = rsmd.getColumnTypeName(i + 1)
@@ -289,8 +290,6 @@ object JdbcUtils extends Logging with SQLConfHelper {
       } else {
         rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls
       }
-      val metadata = new MetadataBuilder()
-      metadata.putLong("scale", fieldScale)
 
       dataType match {
         case java.sql.Types.TIME =>
@@ -302,7 +301,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
           metadata.putBoolean("rowid", true)
         case _ =>
       }
-
+      metadata.putBoolean("isSigned", isSigned)
+      metadata.putLong("scale", fieldScale)
       val columnType =
         dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse(
           getCatalystType(dataType, typeName, fieldSize, fieldScale, isSigned, isTimestampNTZ))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index c96da2d42a2af..bf27b748e24e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference, NullOrdering, SortDirection}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
-import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, FloatType, LongType, MetadataBuilder, StringType}
+import org.apache.spark.sql.types._
 
 private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
 
@@ -102,8 +102,12 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       // Some MySQL JDBC drivers converts JSON type into Types.VARCHAR with a precision of -1.
       // Explicitly converts it into StringType here.
       Some(StringType)
-    } else if (sqlType == Types.TINYINT && typeName.equals("TINYINT")) {
-      Some(ByteType)
+    } else if (sqlType == Types.TINYINT) {
+      if (md.build().getBoolean("isSigned")) {
+        Some(ByteType)
+      } else {
+        Some(ShortType)
+      }
     } else None
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index eed64b873c451..078c708cc3fdd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -36,7 +36,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
   val tempDir = Utils.createTempDir()
   val url = s"jdbc:h2:${tempDir.getCanonicalPath};user=testUser;password=testPass"
-  val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
+
+  def defaultMetadata(dataType: DataType): Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putBoolean("isSigned", dataType.isInstanceOf[NumericType])
+    .build()
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.h2", classOf[JDBCTableCatalog].getName)
@@ -138,8 +142,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   test("load a table") {
     val t = spark.table("h2.test.people")
     val expectedSchema = new StructType()
-      .add("NAME", VarcharType(32), true, defaultMetadata)
-      .add("ID", IntegerType, true, defaultMetadata)
+      .add("NAME", VarcharType(32), true, defaultMetadata(VarcharType(32)))
+      .add("ID", IntegerType, true, defaultMetadata(IntegerType))
     assert(t.schema === CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedSchema))
     Seq(
       "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
@@ -181,13 +185,13 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName ADD COLUMNS (C1 INTEGER, C2 STRING)")
       var t = spark.table(tableName)
       var expectedSchema = new StructType()
-        .add("ID", IntegerType, true, defaultMetadata)
-        .add("C1", IntegerType, true, defaultMetadata)
-        .add("C2", StringType, true, defaultMetadata)
+        .add("ID", IntegerType, true, defaultMetadata(IntegerType))
+        .add("C1", IntegerType, true, defaultMetadata(IntegerType))
+        .add("C2", StringType, true, defaultMetadata(StringType))
       assert(t.schema === expectedSchema)
       sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)")
       t = spark.table(tableName)
-      expectedSchema = expectedSchema.add("c3", DoubleType, true, defaultMetadata)
+      expectedSchema = expectedSchema.add("c3", DoubleType, true, defaultMetadata(DoubleType))
       assert(t.schema === expectedSchema)
       // Add already existing column
       checkError(
@@ -225,8 +229,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName RENAME COLUMN id TO C")
       val t = spark.table(tableName)
       val expectedSchema = new StructType()
-        .add("C", IntegerType, true, defaultMetadata)
-        .add("C0", IntegerType, true, defaultMetadata)
+        .add("C", IntegerType, true, defaultMetadata(IntegerType))
+        .add("C0", IntegerType, true, defaultMetadata(IntegerType))
       assert(t.schema === expectedSchema)
       // Rename to already existing column
       checkError(
@@ -264,7 +268,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName DROP COLUMN C1")
       sql(s"ALTER TABLE $tableName DROP COLUMN c3")
       val t = spark.table(tableName)
-      val expectedSchema = new StructType().add("C2", IntegerType, true, defaultMetadata)
+      val expectedSchema = new StructType()
+        .add("C2", IntegerType, true, defaultMetadata(IntegerType))
       assert(t.schema === expectedSchema)
       // Drop not existing column
       val msg = intercept[AnalysisException] {
@@ -293,8 +298,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE DOUBLE")
       val t = spark.table(tableName)
       val expectedSchema = new StructType()
-        .add("ID", DoubleType, true, defaultMetadata)
-        .add("deptno", DoubleType, true, defaultMetadata)
+        .add("ID", DoubleType, true, defaultMetadata(DoubleType))
+        .add("deptno", DoubleType, true, defaultMetadata(DoubleType))
       assert(t.schema === expectedSchema)
       // Update not existing column
       val msg1 = intercept[AnalysisException] {
@@ -331,8 +336,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName ALTER COLUMN deptno DROP NOT NULL")
       val t = spark.table(tableName)
       val expectedSchema = new StructType()
-        .add("ID", IntegerType, true, defaultMetadata)
-        .add("deptno", IntegerType, true, defaultMetadata)
+        .add("ID", IntegerType, true, defaultMetadata(IntegerType))
+        .add("deptno", IntegerType, true, defaultMetadata(IntegerType))
       assert(t.schema === expectedSchema)
       // Update nullability of not existing column
       val msg = intercept[AnalysisException] {
@@ -388,8 +393,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"CREATE TABLE $tableName (c1 INTEGER NOT NULL, c2 INTEGER)")
       var t = spark.table(tableName)
       var expectedSchema = new StructType()
-        .add("c1", IntegerType, true, defaultMetadata)
-        .add("c2", IntegerType, true, defaultMetadata)
+        .add("c1", IntegerType, true, defaultMetadata(IntegerType))
+        .add("c2", IntegerType, true, defaultMetadata(IntegerType))
       assert(t.schema === expectedSchema)
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
@@ -402,8 +407,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         sql(s"ALTER TABLE $tableName RENAME COLUMN C2 TO c3")
         expectedSchema = new StructType()
-          .add("c1", IntegerType, true, defaultMetadata)
-          .add("c3", IntegerType, true, defaultMetadata)
+          .add("c1", IntegerType, true, defaultMetadata(IntegerType))
+          .add("c3", IntegerType, true, defaultMetadata(IntegerType))
         t = spark.table(tableName)
         assert(t.schema === expectedSchema)
       }
@@ -417,7 +422,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         sql(s"ALTER TABLE $tableName DROP COLUMN C3")
-        expectedSchema = new StructType().add("c1", IntegerType, true, defaultMetadata)
+        expectedSchema = new StructType()
+          .add("c1", IntegerType, true, defaultMetadata(IntegerType))
         t = spark.table(tableName)
         assert(t.schema === expectedSchema)
       }
@@ -431,7 +437,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         sql(s"ALTER TABLE $tableName ALTER COLUMN C1 TYPE DOUBLE")
-        expectedSchema = new StructType().add("c1", DoubleType, true, defaultMetadata)
+        expectedSchema = new StructType()
+          .add("c1", DoubleType, true, defaultMetadata(DoubleType))
         t = spark.table(tableName)
         assert(t.schema === expectedSchema)
       }
@@ -445,7 +452,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
         sql(s"ALTER TABLE $tableName ALTER COLUMN C1 DROP NOT NULL")
-        expectedSchema = new StructType().add("c1", DoubleType, true, defaultMetadata)
+        expectedSchema = new StructType()
+          .add("c1", DoubleType, true, defaultMetadata(IntegerType))
         t = spark.table(tableName)
         assert(t.schema === expectedSchema)
       }
@@ -507,8 +515,8 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql(s"ALTER TABLE $tableName ALTER COLUMN deptno TYPE VARCHAR(30)")
       val t = spark.table(tableName)
       val expected = new StructType()
-        .add("ID", CharType(10), true, defaultMetadata)
-        .add("deptno", VarcharType(30), true, defaultMetadata)
+        .add("ID", CharType(10), true, defaultMetadata(CharType(10)))
+        .add("deptno", VarcharType(30), true, defaultMetadata(VarcharType(30)))
       val replaced = CharVarcharUtils.replaceCharVarcharWithStringInSchema(expected)
       assert(t.schema === replaced)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index e151f2c0225de..f4702ee9edb3c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -77,7 +77,10 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     }
   }
 
-  val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
+  def defaultMetadata(dataType: DataType): Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putBoolean("isSigned", dataType.isInstanceOf[NumericType])
+    .build()
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -907,7 +910,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
 
   test("MySQLDialect catalyst type mapping") {
     val mySqlDialect = JdbcDialects.get("jdbc:mysql")
-    val metadata = new MetadataBuilder()
+    val metadata = new MetadataBuilder().putBoolean("isSigned", value = true)
     assert(mySqlDialect.getCatalystType(java.sql.Types.VARBINARY, "BIT", 2, metadata) ==
       Some(LongType))
     assert(metadata.build().contains("binarylong"))
@@ -916,6 +919,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       Some(BooleanType))
     assert(mySqlDialect.getCatalystType(java.sql.Types.TINYINT, "TINYINT", 1, metadata) ==
       Some(ByteType))
+    metadata.putBoolean("isSigned", value = false)
+    assert(mySqlDialect.getCatalystType(java.sql.Types.TINYINT, "TINYINT", 1, metadata) ===
+      Some(ShortType))
   }
 
   test("SPARK-35446: MySQLDialect type mapping of float") {
@@ -1363,8 +1369,8 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-16848: jdbc API throws an exception for user specified schema") {
-    val schema = StructType(Seq(StructField("name", StringType, false, defaultMetadata),
-      StructField("theid", IntegerType, false, defaultMetadata)))
+    val schema = StructType(Seq(StructField("name", StringType, false, defaultMetadata(StringType)),
+      StructField("theid", IntegerType, false, defaultMetadata(IntegerType))))
     val parts = Array[String]("THEID < 2", "THEID >= 2")
     val e1 = intercept[AnalysisException] {
       spark.read.schema(schema).jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties())
@@ -1384,8 +1390,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     props.put("customSchema", customSchema)
     val df = spark.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, props)
     assert(df.schema.size === 2)
-    val expectedSchema = new StructType(CatalystSqlParser.parseTableSchema(customSchema).map(
-      f => StructField(f.name, f.dataType, f.nullable, defaultMetadata)).toArray)
+    val structType = CatalystSqlParser.parseTableSchema(customSchema)
+    val expectedSchema = new StructType(structType.map(
+      f => StructField(f.name, f.dataType, f.nullable, defaultMetadata(f.dataType))).toArray)
     assert(df.schema === CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedSchema))
     assert(df.count() === 3)
   }
@@ -1403,7 +1410,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       val df = sql("select * from people_view")
       assert(df.schema.length === 2)
       val expectedSchema = new StructType(CatalystSqlParser.parseTableSchema(customSchema)
-        .map(f => StructField(f.name, f.dataType, f.nullable, defaultMetadata)).toArray)
+        .map(f => StructField(f.name, f.dataType, f.nullable, defaultMetadata(f.dataType))).toArray)
 
       assert(df.schema === CharVarcharUtils.replaceCharVarcharWithStringInSchema(expectedSchema))
       assert(df.count() === 3)
@@ -1550,8 +1557,9 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
     }
 
   test("jdbc data source shouldn't have unnecessary metadata in its schema") {
-    var schema = StructType(Seq(StructField("NAME", VarcharType(32), true, defaultMetadata),
-      StructField("THEID", IntegerType, true, defaultMetadata)))
+    var schema = StructType(
+      Seq(StructField("NAME", VarcharType(32), true, defaultMetadata(VarcharType(32))),
+      StructField("THEID", IntegerType, true, defaultMetadata(IntegerType))))
     schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
     val df = spark.read.format("jdbc")
       .option("Url", urlWithUserAndPass)

From 8fcd9a1b0024d24e3622b1948123e7f239a734a5 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 20 Mar 2024 15:19:33 +0800
Subject: [PATCH 249/521] [SPARK-47455][BUILD] Fix resource leak during the
 initialization of `scalaStyleOnCompileConfig` in `SparkBuild.scala`

### What changes were proposed in this pull request?
https://github.com/apache/spark/blob/e01ed0da22f24204fe23143032ff39be7f4b56af/project/SparkBuild.scala#L157-L173

`Source.fromFile(in)` opens a `BufferedSource` resource handle, but it does not close it, this pr fix this issue.

### Why are the changes needed?
Close resource after used.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45582 from LuciferYang/SPARK-47455.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 85bf7615f85eea3e9192a7684ef711cf44042e05)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 project/SparkBuild.scala | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 79b58deafde57..dfadfea172d8c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -160,16 +160,21 @@ object SparkBuild extends PomBuild {
     val replacements = Map(
       """customId="println" level="error"""" -> """customId="println" level="warn""""
     )
-    var contents = Source.fromFile(in).getLines.mkString("\n")
-    for ((k, v) <- replacements) {
-      require(contents.contains(k), s"Could not rewrite '$k' in original scalastyle config.")
-      contents = contents.replace(k, v)
-    }
-    new PrintWriter(out) {
-      write(contents)
-      close()
+    val source = Source.fromFile(in)
+    try {
+      var contents = source.getLines.mkString("\n")
+      for ((k, v) <- replacements) {
+        require(contents.contains(k), s"Could not rewrite '$k' in original scalastyle config.")
+        contents = contents.replace(k, v)
+      }
+      new PrintWriter(out) {
+        write(contents)
+        close()
+      }
+      out
+    } finally {
+      source.close()
     }
-    out
   }
 
   // Return a cached scalastyle task for a given configuration (usually Compile or Test)

From df2fddd5d00860506534d5302f9b3f944d775deb Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 20 Mar 2024 15:56:06 +0800
Subject: [PATCH 250/521] [SPARK-47473][SQL] Fix correctness issue of
 converting postgres INFINITY timestamps

This PR fixes a bug involved with #41843 that Epoch Second is used instead of epoch millis to create a timestamp value

bugfix

no

revised tests

no

Closes #45599 from yaooqinn/SPARK-47473.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit ad8ac17dbdfa763236ab3303eac6a3115ba710cc)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../apache/spark/sql/jdbc/PostgresIntegrationSuite.scala | 9 ++++-----
 .../org/apache/spark/sql/jdbc/PostgresDialect.scala      | 5 +++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 90d6f6ae2fbfc..e910402e05e77 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.jdbc
 import java.math.{BigDecimal => JBigDecimal}
 import java.sql.{Connection, Date, Timestamp}
 import java.text.SimpleDateFormat
-import java.time.{LocalDateTime, ZoneOffset}
+import java.time.LocalDateTime
 import java.util.Properties
 
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType}
+import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
@@ -445,9 +445,8 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(row.length == 2)
     val infinity = row(0).getAs[Timestamp]("timestamp_column")
     val negativeInfinity = row(1).getAs[Timestamp]("timestamp_column")
-    val minTimeStamp = LocalDateTime.of(1, 1, 1, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)
-    val maxTimestamp = LocalDateTime.of(9999, 12, 31, 23, 59, 59).toEpochSecond(ZoneOffset.UTC)
-
+    val minTimeStamp = -62135596800000L
+    val maxTimestamp = 253402300799999L
     assert(infinity.getTime == maxTimestamp)
     assert(negativeInfinity.getTime == minTimeStamp)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 55cd64fa898c1..f8f72d88589e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -293,8 +293,9 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     val POSTGRESQL_DATE_POSITIVE_INFINITY = 9223372036825200000L
     val POSTGRESQL_DATE_DATE_POSITIVE_SMALLER_INFINITY = 185543533774800000L
 
-    val minTimeStamp = LocalDateTime.of(1, 1, 1, 0, 0, 0).toEpochSecond(ZoneOffset.UTC)
-    val maxTimestamp = LocalDateTime.of(9999, 12, 31, 23, 59, 59).toEpochSecond(ZoneOffset.UTC)
+    val minTimeStamp = LocalDateTime.of(1, 1, 1, 0, 0, 0).toInstant(ZoneOffset.UTC).toEpochMilli
+    val maxTimestamp =
+      LocalDateTime.of(9999, 12, 31, 23, 59, 59, 999999999).toInstant(ZoneOffset.UTC).toEpochMilli
 
     val time = t.getTime
     if (time == POSTGRESQL_DATE_POSITIVE_INFINITY ||

From 9baf82b1c97a792a3733dedccf1c03737b592bbd Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Wed, 20 Mar 2024 07:19:29 -0700
Subject: [PATCH 251/521] [SPARK-47481][INFRA][3.5] Fix Python linter

### What changes were proposed in this pull request?
The pr aims to fix `python linter issue` on `branch-3.5` through pinning `matplotlib==3.7.2`

### Why are the changes needed?
Fix `python linter issue` on `branch-3.5`.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45550 from panbingkun/branch-3.5_scheduled_job.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/infra/Dockerfile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index d3fcd7ab36228..f0b88666c040d 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -65,10 +65,10 @@ RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='ht
 ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
 
 RUN pypy3 -m pip install numpy 'pandas<=2.0.3' scipy coverage matplotlib
-RUN python3.9 -m pip install numpy 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+RUN python3.9 -m pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
 
 # Add Python deps for Spark Connect.
 RUN python3.9 -m pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4'
 
 # Add torch as a testing dependency for TorchDistributor
-RUN python3.9 -m pip install torch torchvision torcheval
+RUN python3.9 -m pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval

From 430a407c39633637dba738482877edf806561ba7 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 20 Mar 2024 15:17:23 -0700
Subject: [PATCH 252/521] [SPARK-47494][DOC] Add migration doc for the behavior
 change of Parquet timestamp inference since Spark 3.3

### What changes were proposed in this pull request?

Add migration doc for the behavior change of Parquet timestamp inference since Spark 3.3

### Why are the changes needed?

Show the behavior change to users.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

It's just doc change

### Was this patch authored or co-authored using generative AI tooling?

Yes, there are some doc suggestion from copilot in docs/sql-migration-guide.md

Closes #45623 from gengliangwang/SPARK-47494.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 11247d804cd370aaeb88736a706c587e7f5c83b3)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-migration-guide.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 0e54c33c6d125..f788d89c4999d 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -99,6 +99,8 @@ license: |
 
   - Since Spark 3.3, the `unbase64` function throws error for a malformed `str` input. Use `try_to_binary(<str>, 'base64')` to tolerate malformed input and return NULL instead. In Spark 3.2 and earlier, the `unbase64` function returns a best-efforts result for a malformed `str` input.
 
+  - Since Spark 3.3, when reading Parquet files that were not produced by Spark, Parquet timestamp columns with annotation `isAdjustedToUTC = false` are inferred as TIMESTAMP_NTZ type during schema inference. In Spark 3.2 and earlier, these columns are inferred as TIMESTAMP type. To restore the behavior before Spark 3.3, you can set `spark.sql.parquet.inferTimestampNTZ.enabled` to `false`.
+
   - Since Spark 3.3.1 and 3.2.3, for `SELECT ... GROUP BY a GROUPING SETS (b)`-style SQL statements, `grouping__id` returns different values from Apache Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0. It computes based on user-given group-by expressions plus grouping set columns. To restore the behavior before 3.3.1 and 3.2.3, you can set `spark.sql.legacy.groupingIdWithAppendedUserGroupBy`. For details, see [SPARK-40218](https://issues.apache.org/jira/browse/SPARK-40218) and [SPARK-40562](https://issues.apache.org/jira/browse/SPARK-40562).
 
 ## Upgrading from Spark SQL 3.1 to 3.2

From e17fdba1f507fda816dcf5af0f15684399f5b7f8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 20 Mar 2024 22:01:37 -0700
Subject: [PATCH 253/521] [MINOR][CORE] Fix a comment typo `slf4j-to-jul` to
 `jul-to-slf4j`

### What changes were proposed in this pull request?

This PR aims to fix a typo `slf4j-to-jul` to `jul-to-slf4j`. There exists only one.

```
$ git grep slf4j-to-jul
common/utils/src/main/scala/org/apache/spark/internal/Logging.scala:    // slf4j-to-jul bridge order to route their logs to JUL.
```

Apache Spark uses `jul-to-slf4j` which includes a `java.util.logging` (jul) handler, namely `SLF4JBridgeHandler`, which routes all incoming jul records to the SLF4j API.

https://github.com/apache/spark/blob/bb3e27581887a094ead0d2f7b4a6b2a17ee84b6f/pom.xml#L735

### Why are the changes needed?

This typo was there since Apache Spark 1.0.0.

### Does this PR introduce _any_ user-facing change?

No, this is a comment fix.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45625 from dongjoon-hyun/jul-to-slf4j.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit bb0867f54d437f6467274e854506aea2900bceb1)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/scala/org/apache/spark/internal/Logging.scala      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 83e01330ce3f6..bd82ce962b8d0 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -196,7 +196,7 @@ private[spark] object Logging {
   val initLock = new Object()
   try {
     // We use reflection here to handle the case where users remove the
-    // slf4j-to-jul bridge order to route their logs to JUL.
+    // jul-to-slf4j bridge order to route their logs to JUL.
     val bridgeClass = SparkClassUtils.classForName("org.slf4j.bridge.SLF4JBridgeHandler")
     bridgeClass.getMethod("removeHandlersForRootLogger").invoke(null)
     val installed = bridgeClass.getMethod("isInstalled").invoke(null).asInstanceOf[Boolean]

From 203f943efcb1ad699f1098c86d4bb4e46fc3bbc2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 21 Mar 2024 12:16:25 -0700
Subject: [PATCH 254/521] [SPARK-47507][BUILD][3.5] Upgrade ORC to 1.9.3

### What changes were proposed in this pull request?

This PR aims to upgrade ORC to 1.9.3 for Apache Spark 3.5.2.

### Why are the changes needed?

Apache ORC 1.9.3 is the latest maintenance release. To bring the latest bug fixes, we had better upgrade.
- https://orc.apache.org/news/2024/03/20/ORC-1.9.3/
  - https://github.com/apache/orc/pull/1692

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45646 from dongjoon-hyun/SPARK-47507.

Lead-authored-by: Dongjoon Hyun <dhyun@apple.com>
Co-authored-by: Gang Wu <ustcwg@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++---
 pom.xml                               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 8ecf931bf513a..1cd7d5a8f2d7b 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.2/shaded-protobuf/orc-core-1.9.2-shaded-protobuf.jar
-orc-mapreduce/1.9.2/shaded-protobuf/orc-mapreduce-1.9.2-shaded-protobuf.jar
-orc-shims/1.9.2//orc-shims-1.9.2.jar
+orc-core/1.9.3/shaded-protobuf/orc-core-1.9.3-shaded-protobuf.jar
+orc-mapreduce/1.9.3/shaded-protobuf/orc-mapreduce-1.9.3-shaded-protobuf.jar
+orc-shims/1.9.3//orc-shims-1.9.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index fb6208777d3ff..269a42d41f172 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,7 +141,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.13.1</parquet.version>
-    <orc.version>1.9.2</orc.version>
+    <orc.version>1.9.3</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>9.4.54.v20240208</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>

From 1fe396213b57e4697145f5fa1b9f0d24a35399df Mon Sep 17 00:00:00 2001
From: Raza Jafri <rjafri@nvidia.com>
Date: Thu, 21 Mar 2024 14:45:56 -0500
Subject: [PATCH 255/521] [SPARK-47398][SQL] Extract a trait for
 InMemoryTableScanExec to allow for extending functionality

### What changes were proposed in this pull request?
We are proposing to allow the users to have a custom `InMemoryTableScanExec`. To accomplish this we can follow the same convention we followed for `ShuffleExchangeLike` and `BroadcastExchangeLike`

### Why are the changes needed?
In the PR added by ulysses-you, we are wrapping `InMemoryTableScanExec` inside `TableCacheQueryStageExec`. This could potentially cause problems, especially in the RAPIDS Accelerator for Apache Spark, where we replace `InMemoryTableScanExec` with a customized version that has optimizations needed by us. This situation could lead to the loss of benefits from [SPARK-42101](https://issues.apache.org/jira/browse/SPARK-42101) or even result in Spark throwing an Exception.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Ran the existing tests

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45525 from razajafri/extract-inmem-trait.

Authored-by: Raza Jafri <rjafri@nvidia.com>
Signed-off-by: Thomas Graves <tgraves@apache.org>
(cherry picked from commit 6a27789ad7d59cd133653a49be0bb49729542abe)
Signed-off-by: Thomas Graves <tgraves@apache.org>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 16 ++++-----
 .../execution/adaptive/QueryStageExec.scala   |  8 ++---
 .../columnar/InMemoryTableScanExec.scala      | 33 +++++++++++++++++--
 .../adaptive/AdaptiveQueryExecSuite.scala     |  6 ++--
 4 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 96b83a91cc739..d2e879e3eddb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -41,7 +41,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
 import org.apache.spark.sql.execution.bucketing.{CoalesceBucketsInJoin, DisableUnnecessaryBucketedScan}
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanLike
 import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SQLPlanMetric}
 import org.apache.spark.sql.internal.SQLConf
@@ -254,7 +254,7 @@ case class AdaptiveSparkPlanExec(
     //    and display SQL metrics correctly.
     // 2. If the `QueryExecution` does not match the current execution ID, it means the execution
     //    ID belongs to another (parent) query, and we should not call update UI in this query.
-    //    e.g., a nested `AdaptiveSparkPlanExec` in `InMemoryTableScanExec`.
+    //    e.g., a nested `AdaptiveSparkPlanExec` in `InMemoryTableScanLike`.
     //
     // That means only the root `AdaptiveSparkPlanExec` of the main query that triggers this
     // query execution need to do a plan update for the UI.
@@ -557,9 +557,9 @@ case class AdaptiveSparkPlanExec(
           }
       }
 
-    case i: InMemoryTableScanExec =>
-      // There is no reuse for `InMemoryTableScanExec`, which is different from `Exchange`. If we
-      // hit it the first time, we should always create a new query stage.
+    case i: InMemoryTableScanLike =>
+      // There is no reuse for `InMemoryTableScanLike`, which is different from `Exchange`.
+      // If we hit it the first time, we should always create a new query stage.
       val newStage = newQueryStage(i)
       CreateStageResult(
         newPlan = newStage,
@@ -604,12 +604,12 @@ case class AdaptiveSparkPlanExec(
           }
           BroadcastQueryStageExec(currentStageId, newPlan, e.canonicalized)
         }
-      case i: InMemoryTableScanExec =>
+      case i: InMemoryTableScanLike =>
         // Apply `queryStageOptimizerRules` so that we can reuse subquery.
-        // No need to apply `postStageCreationRules` for `InMemoryTableScanExec`
+        // No need to apply `postStageCreationRules` for `InMemoryTableScanLike`
         // as it's a leaf node.
         val newPlan = optimizeQueryStage(i, isFinalStage = false)
-        if (!newPlan.isInstanceOf[InMemoryTableScanExec]) {
+        if (!newPlan.isInstanceOf[InMemoryTableScanLike]) {
           throw SparkException.internalError(
             "Custom AQE rules cannot transform table scan node to something else.")
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index b941feb12fc05..433315c493215 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.columnar.CachedBatch
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.columnar.InMemoryTableScanLike
 import org.apache.spark.sql.execution.exchange._
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -261,7 +261,7 @@ case class BroadcastQueryStageExec(
 }
 
 /**
- * A table cache query stage whose child is a [[InMemoryTableScanExec]].
+ * A table cache query stage whose child is a [[InMemoryTableScanLike]].
  *
  * @param id the query stage id.
  * @param plan the underlying plan.
@@ -271,7 +271,7 @@ case class TableCacheQueryStageExec(
     override val plan: SparkPlan) extends QueryStageExec {
 
   @transient val inMemoryTableScan = plan match {
-    case i: InMemoryTableScanExec => i
+    case i: InMemoryTableScanLike => i
     case _ =>
       throw new IllegalStateException(s"wrong plan for table cache stage:\n ${plan.treeString}")
   }
@@ -294,5 +294,5 @@ case class TableCacheQueryStageExec(
 
   override protected def doMaterialize(): Future[Any] = future
 
-  override def getRuntimeStatistics: Statistics = inMemoryTableScan.relation.computeStats()
+  override def getRuntimeStatistics: Statistics = inMemoryTableScan.runtimeStatistics
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index 08244a4f84fea..5ff8bfd75f8a7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.columnar.CachedBatch
 import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, WholeStageCodegenExec}
@@ -28,11 +29,32 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
+/**
+ * Common trait for all InMemoryTableScans implementations to facilitate pattern matching.
+ */
+trait InMemoryTableScanLike extends LeafExecNode {
+
+  /**
+   * Returns whether the cache buffer is loaded
+   */
+  def isMaterialized: Boolean
+
+  /**
+   * Returns the actual cached RDD without filters and serialization of row/columnar.
+   */
+  def baseCacheRDD(): RDD[CachedBatch]
+
+  /**
+   * Returns the runtime statistics after materialization.
+   */
+  def runtimeStatistics: Statistics
+}
+
 case class InMemoryTableScanExec(
     attributes: Seq[Attribute],
     predicates: Seq[Expression],
     @transient relation: InMemoryRelation)
-  extends LeafExecNode {
+  extends InMemoryTableScanLike {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
@@ -167,13 +189,18 @@ case class InMemoryTableScanExec(
     columnarInputRDD
   }
 
-  def isMaterialized: Boolean = relation.cacheBuilder.isCachedColumnBuffersLoaded
+  override def isMaterialized: Boolean = relation.cacheBuilder.isCachedColumnBuffersLoaded
 
   /**
    * This method is only used by AQE which executes the actually cached RDD that without filter and
    * serialization of row/columnar.
    */
-  def baseCacheRDD(): RDD[CachedBatch] = {
+  override def baseCacheRDD(): RDD[CachedBatch] = {
     relation.cacheBuilder.cachedColumnBuffers
   }
+
+  /**
+   * Returns the runtime statistics after shuffle materialization.
+   */
+  override def runtimeStatistics: Statistics = relation.computeStats()
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 68bae34790a00..7c280f72ca176 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
 import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnionExec}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
-import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
+import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
@@ -2758,7 +2758,7 @@ class AdaptiveQueryExecSuite
           case s: SortExec => s
         }.size == (if (firstAccess) 2 else 0))
         assert(collect(initialExecutedPlan) {
-          case i: InMemoryTableScanExec => i
+          case i: InMemoryTableScanLike => i
         }.head.isMaterialized != firstAccess)
 
         df.collect()
@@ -2770,7 +2770,7 @@ class AdaptiveQueryExecSuite
           case s: SortExec => s
         }.isEmpty)
         assert(collect(initialExecutedPlan) {
-          case i: InMemoryTableScanExec => i
+          case i: InMemoryTableScanLike => i
         }.head.isMaterialized)
       }
 

From e57a7d068839d549afe08b4a79e82d027b56a5f5 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 21 Mar 2024 23:06:03 -0700
Subject: [PATCH 256/521] [SPARK-47462][SQL][FOLLOWUP][3.5] Add migration guide
 for TINYINT mapping changes

### What changes were proposed in this pull request?

Add migration guide for TINYINT type mapping changes
### Why are the changes needed?

behavior change doc
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?
doc build

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #45658 from yaooqinn/SPARK-47462-FB.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/sql-migration-guide.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index f788d89c4999d..3bb83750ef927 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,6 +22,14 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.5.1 to 3.5.2
+
+- Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType.
+
+## Upgrading from Spark SQL 3.5.0 to 3.5.1
+
+- Since Spark 3.5.1, MySQL JDBC datasource will read TINYINT(n > 1) and TINYINT UNSIGNED as ByteType, while in Spark 3.5.0 and below, they were read as IntegerType. To restore the previous behavior, you can cast the column to the old type.
+
 ## Upgrading from Spark SQL 3.4 to 3.5
 
 - Since Spark 3.5, the JDBC options related to DS V2 pushdown are `true` by default. These options include: `pushDownAggregate`, `pushDownLimit`, `pushDownOffset` and `pushDownTableSample`. To restore the legacy behavior, please set them to `false`. e.g. set `spark.sql.catalog.your_catalog_name.pushDownAggregate` to `false`.

From 91d11568c2ebd937e0ea6de892649d9115035410 Mon Sep 17 00:00:00 2001
From: Stefan Bukorovic <stefan.bukorovic@databricks.com>
Date: Fri, 22 Mar 2024 18:44:30 +0800
Subject: [PATCH 257/521] [SPARK-47440][SQL] Fix pushing unsupported syntax to
 MsSqlServer

### What changes were proposed in this pull request?
In this PR, I propose a change in SQLQuery builder of MsSqlServer dialect. I override build method to check for boolean operator in binary comparisons and throw exception if encountered.

### Why are the changes needed?
MsSqlServer syntax prevents boolean operators in any binary comparison. Reasoning is lack of boolean data type in MsSqlServer.
It was possible to construct Spark query that would generate this situation in MsSqlServer and engine would throw syntax exception on the MsSqlServer side. This PR solves this bug.
For example, in table `people` there is a `name` column. In MsSqlServer if we try to execute:
`SELECT * FROM people WHERE (name LIKE 'a%') = (name LIKE '%b')`
we would get a syntax error. However this query is fine in other major engines.

### Does this PR introduce _any_ user-facing change?
Yes, user will not encounter syntax exception in MsSqlServer when writing these queries.

### How was this patch tested?
By running a unit test in MsSqlServerIntegrationSuite

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #45564 from stefanbuk-db/SQLServer_like_operator_bugfix.

Authored-by: Stefan Bukorovic <stefan.bukorovic@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 227a50a1766ac1476b0031e1c60d2604eccdb9a7)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala    | 16 ++++++++++++++++
 .../spark/sql/jdbc/MsSqlServerDialect.scala      | 15 +++++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index e451cc2b8c52a..0dc3a39f4db5d 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -127,4 +127,20 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
       },
       errorClass = "_LEGACY_ERROR_TEMP_2271")
   }
+
+  test("SPARK-47440: SQLServer does not support boolean expression in binary comparison") {
+    val df1 = sql("SELECT name FROM " +
+      s"$catalogName.employee WHERE ((name LIKE 'am%') = (name LIKE '%y'))")
+    assert(df1.collect().length == 4)
+
+    val df2 = sql("SELECT name FROM " +
+      s"$catalogName.employee " +
+      "WHERE ((name NOT LIKE 'am%') = (name NOT LIKE '%y'))")
+    assert(df2.collect().length == 4)
+
+    val df3 = sql("SELECT name FROM " +
+      s"$catalogName.employee " +
+      "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))")
+    assert(df3.collect().length == 3)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 78ec3ac42d797..3022bca87a9f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.{Expression, NullOrdering, SortDirection}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.internal.SQLConf
@@ -86,6 +87,20 @@ private object MsSqlServerDialect extends JdbcDialect {
       case "STDDEV_SAMP" => "STDEV"
       case _ => super.dialectFunctionName(funcName)
     }
+
+    override def build(expr: Expression): String = {
+      // MsSqlServer does not support boolean comparison using standard comparison operators
+      // We shouldn't propagate these queries to MsSqlServer
+      expr match {
+        case e: Predicate => e.name() match {
+          case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">="
+              if e.children().exists(_.isInstanceOf[Predicate]) =>
+            super.visitUnexpectedExpr(expr)
+          case _ => super.build(expr)
+        }
+        case _ => super.build(expr)
+      }
+    }
   }
 
   override def compileExpression(expr: Expression): Option[String] = {

From 30cb7edecbf0ef7aed1e216ad147ebb318aea09c Mon Sep 17 00:00:00 2001
From: maheshbehera <maheshbehera@microsoft.com>
Date: Fri, 22 Mar 2024 10:44:55 -0700
Subject: [PATCH 258/521] [SPARK-47521][CORE] Use `Utils.tryWithResource`
 during reading shuffle data from external storage

### What changes were proposed in this pull request?

In method FallbackStorage.open, file open is guarded by Utils.tryWithResource to avoid file handle leakage incase of failure during read.

### Why are the changes needed?

To avoid file handle leakage in case of read failure.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UTs

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45663 from maheshk114/SPARK-47521.

Authored-by: maheshbehera <maheshbehera@microsoft.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 245669053a34cb1d4a84689230e5bd1d163be5c6)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/storage/FallbackStorage.scala     | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
index eb23fb4b1c84d..161120393490f 100644
--- a/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/FallbackStorage.scala
@@ -188,15 +188,15 @@ private[spark] object FallbackStorage extends Logging {
         val name = ShuffleDataBlockId(shuffleId, mapId, NOOP_REDUCE_ID).name
         val hash = JavaUtils.nonNegativeHash(name)
         val dataFile = new Path(fallbackPath, s"$appId/$shuffleId/$hash/$name")
-        val f = fallbackFileSystem.open(dataFile)
         val size = nextOffset - offset
         logDebug(s"To byte array $size")
         val array = new Array[Byte](size.toInt)
         val startTimeNs = System.nanoTime()
-        f.seek(offset)
-        f.readFully(array)
-        logDebug(s"Took ${(System.nanoTime() - startTimeNs) / (1000 * 1000)}ms")
-        f.close()
+        Utils.tryWithResource(fallbackFileSystem.open(dataFile)) { f =>
+          f.seek(offset)
+          f.readFully(array)
+          logDebug(s"Took ${(System.nanoTime() - startTimeNs) / (1000 * 1000)}ms")
+        }
         new NioManagedBuffer(ByteBuffer.wrap(array))
       }
     }

From 2016db66e578a0459672aad2a82a53a69601eaec Mon Sep 17 00:00:00 2001
From: alexey <seko.alexsey13@gmail.com>
Date: Sun, 24 Mar 2024 15:17:10 -0700
Subject: [PATCH 259/521] [SPARK-47503][SQL][3.5] Make makeDotNode escape graph
 node name always

### What changes were proposed in this pull request?

This is a backport of https://github.com/apache/spark/pull/45640

To prevent corruption of dot file a node name should be escaped even if there is no metrics to display

### Why are the changes needed?

This pr fixes a bug in spark history server which fails to display query for cached JDBC relation named in quotes.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
Unit test.

### Was this patch authored or co-authored using generative AI tooling?
 No.

Closes #45684 from alex35736/branch-3.5.

Authored-by: alexey <seko.alexsey13@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/execution/ui/SparkPlanGraph.scala     |  3 +-
 .../execution/ui/SparkPlanGraphSuite.scala    | 44 +++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanGraphSuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index 1504207d39cb1..668cece533353 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -189,7 +189,8 @@ class SparkPlanGraphNode(
     } else {
       // SPARK-30684: when there is no metrics, add empty lines to increase the height of the node,
       // so that there won't be gaps between an edge and a small node.
-      s"""  $id [labelType="html" label="<br><b>$name</b><br><br>"];"""
+      val escapedName = StringEscapeUtils.escapeJava(name)
+      s"""  $id [labelType="html" label="<br><b>$escapedName</b><br><br>"];"""
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanGraphSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanGraphSuite.scala
new file mode 100644
index 0000000000000..88237cd09ac71
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanGraphSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.ui
+
+import org.apache.spark.SparkFunSuite
+
+class SparkPlanGraphSuite extends SparkFunSuite {
+  test("SPARK-47503: name of a node should be escaped even if there is no metrics") {
+    val planGraphNode = new SparkPlanGraphNode(
+      id = 24,
+      name = "Scan JDBCRelation(\"test-schema\".tickets) [numPartitions=1]",
+      desc = "Scan JDBCRelation(\"test-schema\".tickets) [numPartitions=1] " +
+        "[ticket_no#0] PushedFilters: [], ReadSchema: struct<ticket_no:string>",
+      metrics = List(
+        SQLPlanMetric(
+          name = "number of output rows",
+          accumulatorId = 75,
+          metricType = "sum"
+        ),
+        SQLPlanMetric(
+          name = "JDBC query execution time",
+          accumulatorId = 35,
+          metricType = "nsTiming")))
+    val dotNode = planGraphNode.makeDotNode(Map.empty[Long, String])
+    val expectedDotNode = "  24 [labelType=\"html\" label=\"<br><b>" +
+      "Scan JDBCRelation(\\\"test-schema\\\".tickets) [numPartitions=1]</b><br><br>\"];"
+
+    assertResult(expectedDotNode)(dotNode)
+  }
+}

From 9ad7b75784daa48bf20dd00ae3288c718272fd69 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 25 Mar 2024 08:50:00 -0700
Subject: [PATCH 260/521] [SPARK-47537][SQL][3.5] Fix error data type mapping
 on MySQL Connector/J

### What changes were proposed in this pull request?

This PR fixes:
- BIT(n>1) is wrongly mapping to boolean instead of long for MySQL Connector/J. This is because we only have a case branch for Maria Connector/J.
- MySQL Docker Integration Tests were using Maria Connector/J, not MySQL Connector/J

### Why are the changes needed?

Bugfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #45690 from yaooqinn/SPARK-47537-B.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/jdbc/MySQLIntegrationSuite.scala      | 47 ++++++++++++++++++-
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   | 38 ++++++++++++---
 .../sql/jdbc/v2/MySQLNamespaceSuite.scala     |  4 +-
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  5 ++
 4 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index dcf4225d522d1..68d88fbc552ab 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -43,7 +43,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     override val usesIpc = false
     override val jdbcPort: Int = 3306
     override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass&disableMariaDbDriver"
   }
 
   override def dataPreparation(conn: Connection): Unit = {
@@ -75,6 +75,19 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
       "'jumps', 'over', 'the', 'lazy', 'dog', '{\"status\": \"merrily\"}')").executeUpdate()
   }
 
+  def testConnection(): Unit = {
+    val conn = getConnection()
+    try {
+      assert(conn.getClass.getName === "com.mysql.cj.jdbc.ConnectionImpl")
+    } finally {
+      conn.close()
+    }
+  }
+
+  test("SPARK-47537: ensure use the right jdbc driver") {
+    testConnection()
+  }
+
   test("Basic test") {
     val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
     val rows = df.collect()
@@ -200,3 +213,35 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
   }
 }
+
+/**
+ * To run this test suite for a specific version (e.g., mysql:8.3.0):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.3.0
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "docker-integration-tests/testOnly *MySQLOverMariaConnectorIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class MySQLOverMariaConnectorIntegrationSuite extends MySQLIntegrationSuite {
+
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
+  }
+
+  override def testConnection(): Unit = {
+    val conn = getConnection()
+    try {
+      assert(conn.getClass.getName === "org.mariadb.jdbc.MariaDbConnection")
+    } finally {
+      conn.close()
+    }
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index 719b858b87b63..f6f264804e7db 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -68,8 +68,8 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     override val jdbcPort: Int = 3306
 
     override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/" +
-        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true" +
+        "&useSSL=false&disableMariaDbDriver"
   }
 
   override def sparkConf: SparkConf = super.sparkConf
@@ -83,11 +83,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
 
   private var mySQLVersion = -1
 
-  override def defaultMetadata(dataType: DataType = StringType): Metadata = new MetadataBuilder()
-    .putLong("scale", 0)
-    .putBoolean("isSigned", true)
-    .build()
-
   override def tablePreparation(connection: Connection): Unit = {
     mySQLVersion = connection.getMetaData.getDatabaseMajorVersion
     connection.prepareStatement(
@@ -172,3 +167,32 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     }
   }
 }
+
+/**
+ * To run this test suite for a specific version (e.g., mysql:8.3.0):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.3.0
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "docker-integration-tests/testOnly *MySQLOverMariaConnectorIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class MySQLOverMariaConnectorIntegrationSuite extends MySQLIntegrationSuite {
+  override def defaultMetadata(dataType: DataType = StringType): Metadata = new MetadataBuilder()
+    .putLong("scale", 0)
+    .putBoolean("isSigned", true)
+    .build()
+
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true" +
+        "&useSSL=false"
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index d58146fecdf42..8b889f8509f56 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -45,8 +45,8 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
     override val jdbcPort: Int = 3306
 
     override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/" +
-        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true" +
+        "&useSSL=false&disableMariaDbDriver"
   }
 
   val map = new CaseInsensitiveStringMap(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index bf27b748e24e9..ef2be1c9c5c6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -89,10 +89,15 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
     if (sqlType == Types.VARBINARY && typeName.equals("BIT") && size != 1) {
+      // MariaDB connector behaviour
       // This could instead be a BinaryType if we'd rather return bit-vectors of up to 64 bits as
       // byte arrays instead of longs.
       md.putLong("binarylong", 1)
       Option(LongType)
+    } else if (sqlType == Types.BIT && size > 1) {
+      // MySQL connector behaviour
+      md.putLong("binarylong", 1)
+      Option(LongType)
     } else if (sqlType == Types.BIT && typeName.equals("TINYINT")) {
       Option(BooleanType)
     } else if ("TINYTEXT".equalsIgnoreCase(typeName)) {

From 8bcbf7701388a2da06369ae9317d7707624edba0 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 26 Mar 2024 07:45:54 -0700
Subject: [PATCH 261/521] [SPARK-47561][SQL] Fix analyzer rule order issues
 about Alias

### What changes were proposed in this pull request?

We found two analyzer rule execution order issues in our internal workloads:
- `CreateStruct.apply` creates `NamePlaceholder` for unresolved `NamedExpression`. However, with certain rule execution order, the `NamedExpression` may be removed (e.g. remove unnecessary `Alias`) before `NamePlaceholder` is resolved, then `NamePlaceholder` can't be resolved anymore.
- UNPIVOT uses `UnresolvedAlias` to wrap `UnresolvedAttribute`. There is a conflict about how to determine the final alias name. If `ResolveAliases` runs first, then `UnresolvedAlias` will be removed and eventually the alias will be `b` for nested column `a.b`. If `ResolveReferences` runs first, then we resolve `a.b` first and then `UnresolvedAlias` will determine the alias as `a.b` not `b`.

This PR fixes the two issues
- `CreateStruct.apply` should determine the field name immediately if the input is `Alias`
- The parser rule for UNPIVOT should follow how we parse SELECT and return `UnresolvedAttribute` directly without the `UnresolvedAlias` wrapper. It's a bit risky to fix the order issue between `ResolveAliases` and `ResolveReferences` as it can change the final query schema, we will save it for later.

### Why are the changes needed?

fix unstable analyzer behavior with different rule execution orders.

### Does this PR introduce _any_ user-facing change?

Yes, some failed queries can run now. The issue for UNPIVOT only affects the error message.

### How was this patch tested?

verified by our internal workloads. The repro query is quite complicated to trigger a certain rule execution order so we won't add tests for it. The fix is quite obvious.

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #45718 from cloud-fan/rule.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/complexTypeCreator.scala      |  1 +
 .../sql/catalyst/parser/AstBuilder.scala      |  2 +-
 .../catalyst/parser/UnpivotParserSuite.scala  | 39 +++++++++----------
 3 files changed, 20 insertions(+), 22 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 2051219131219..c95a0987330d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -372,6 +372,7 @@ object CreateStruct {
       // alias name inside CreateNamedStruct.
       case (u: UnresolvedAttribute, _) => Seq(Literal(u.nameParts.last), u)
       case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType == StringType => Seq(e, u)
+      case (a: Alias, _) => Seq(Literal(a.name), a)
       case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
       case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
       case (e, index) => Seq(Literal(s"col${index + 1}"), e)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 90fbdd94dc386..5d68aed9245a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1291,7 +1291,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Create an Unpivot column.
    */
   override def visitUnpivotColumn(ctx: UnpivotColumnContext): NamedExpression = withOrigin(ctx) {
-    UnresolvedAlias(UnresolvedAttribute(visitMultipartIdentifier(ctx.multipartIdentifier)))
+    UnresolvedAttribute(visitMultipartIdentifier(ctx.multipartIdentifier))
   }
 
   /**
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
index c680e08c1c832..3012ef6f1544d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
@@ -39,7 +39,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t UNPIVOT (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -59,7 +59,7 @@ class UnpivotParserSuite extends AnalysisTest {
           sql,
           Unpivot(
             None,
-            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            Some(Seq(Seq($"a"), Seq($"b"))),
             Some(Seq(Some("A"), None)),
             "col",
             Seq("val"),
@@ -76,7 +76,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a, b), (c, d)))",
       Unpivot(
         None,
-        Some(Seq(Seq($"a", $"b").map(UnresolvedAlias(_)), Seq($"c", $"d").map(UnresolvedAlias(_)))),
+        Some(Seq(Seq($"a", $"b"), Seq($"c", $"d"))),
         None,
         "col",
         Seq("val1", "val2"),
@@ -96,10 +96,7 @@ class UnpivotParserSuite extends AnalysisTest {
           sql,
           Unpivot(
             None,
-            Some(Seq(
-              Seq($"a", $"b").map(UnresolvedAlias(_)),
-              Seq($"c", $"d").map(UnresolvedAlias(_))
-            )),
+            Some(Seq(Seq($"a", $"b"), Seq($"c", $"d"))),
             Some(Seq(Some("first"), None)),
             "col",
             Seq("val1", "val2"),
@@ -132,7 +129,7 @@ class UnpivotParserSuite extends AnalysisTest {
           sql,
           Unpivot(
             None,
-            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            Some(Seq(Seq($"a"), Seq($"b"))),
             None,
             "col",
             Seq("val"),
@@ -169,7 +166,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t UNPIVOT EXCLUDE NULLS (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -184,7 +181,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t UNPIVOT INCLUDE NULLS (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -199,7 +196,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) JOIN t2",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -211,7 +208,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1 JOIN t2 UNPIVOT (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -224,7 +221,7 @@ class UnpivotParserSuite extends AnalysisTest {
       table("t1").join(
         Unpivot(
           None,
-          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          Some(Seq(Seq($"a"), Seq($"b"))),
           None,
           "col",
           Seq("val"),
@@ -239,7 +236,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)), t2",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -251,7 +248,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1, t2 UNPIVOT (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -267,7 +264,7 @@ class UnpivotParserSuite extends AnalysisTest {
         table("t1").join(
           Unpivot(
             None,
-            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            Some(Seq(Seq($"a"), Seq($"b"))),
             None,
             "col",
             Seq("val"),
@@ -282,7 +279,7 @@ class UnpivotParserSuite extends AnalysisTest {
       table("t1").join(
         Unpivot(
           None,
-          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          Some(Seq(Seq($"a"), Seq($"b"))),
           None,
           "col",
           Seq("val"),
@@ -296,7 +293,7 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1, t2 JOIN t3 UNPIVOT (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
@@ -311,7 +308,7 @@ class UnpivotParserSuite extends AnalysisTest {
         table("t1").join(
           Unpivot(
             None,
-            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            Some(Seq(Seq($"a"), Seq($"b"))),
             None,
             "col",
             Seq("val"),
@@ -326,13 +323,13 @@ class UnpivotParserSuite extends AnalysisTest {
       "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) UNPIVOT (val FOR col in (a, b))",
       Unpivot(
         None,
-        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        Some(Seq(Seq($"a"), Seq($"b"))),
         None,
         "col",
         Seq("val"),
         Unpivot(
           None,
-          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          Some(Seq(Seq($"a"), Seq($"b"))),
           None,
           "col",
           Seq("val"),

From edae8edc3a45a860a9402018cb44760266515154 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 28 Mar 2024 16:34:25 -0700
Subject: [PATCH 262/521] [SPARK-47636][K8S][3.5] Use Java `17` instead of
 `17-jre` image in K8s Dockerfile

### What changes were proposed in this pull request?

This PR aims to use Java 21 instead of 21-jre in K8s Dockerfile .

### Why are the changes needed?

Since Apache Spark 3.5.0, SPARK-44153 starts to use `jmap` like the following.

https://github.com/apache/spark/blob/c832e2ac1d04668c77493577662c639785808657/core/src/main/scala/org/apache/spark/util/Utils.scala#L2030

```
$ docker run -it --rm eclipse-temurin:17-jre jmap
/__cacert_entrypoint.sh: line 30: exec: jmap: not found
```

```
$ docker run -it --rm eclipse-temurin:17 jmap | head -n2
Usage:
    jmap -clstats <pid>
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs and manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45762 from dongjoon-hyun/SPARK-47636.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 88304c87a79c3..22d8f1550128f 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-ARG java_image_tag=17-jre
+ARG java_image_tag=17
 
 FROM eclipse-temurin:${java_image_tag}
 

From e049a6c7f63e2c03525a01c4c2e3ce54cc9fb617 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 29 Mar 2024 17:38:10 +0900
Subject: [PATCH 263/521] [SPARK-47646][SQL] Make try_to_number return NULL for
 malformed input

### What changes were proposed in this pull request?

This PR proposes to add NULL check after parsing the number so the output can be safely null for `try_to_number` expression.

```scala
import org.apache.spark.sql.functions._
val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11")))
df.select(try_to_number($"value", lit("$99.99"))).show()
```
```
java.lang.NullPointerException: Cannot invoke "org.apache.spark.sql.types.Decimal.toPlainString()" because "<local7>" is null
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.serializefromobject_doConsume_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:50)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:388)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:894)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:894)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:368)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:332)
```

### Why are the changes needed?

To fix the bug, and let `try_to_number` return `NULL` for malformed input as designed.

### Does this PR introduce _any_ user-facing change?

Yes, it fixes a bug. Previously, `try_to_number` failed with NPE.

### How was this patch tested?

Unittest was added.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45771 from HyukjinKwon/SPARK-47646.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit d709e20066becf15adf5aa35e1bdd8eecf500b4b)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/expressions/numberFormatExpressions.scala   | 1 +
 .../scala/org/apache/spark/sql/StringFunctionsSuite.scala    | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index 2d4f0438db760..9dcca65efe5a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -86,6 +86,7 @@ abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Bo
         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
         |if (!${ev.isNull}) {
         |  ${ev.value} = $builder.parse(${eval.value});
+        |  ${ev.isNull} = ${ev.isNull} || (${ev.value} == null);
         |}
       """.stripMargin)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 88c9e15570e30..4709c2b5e192c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -1173,6 +1173,11 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(try_to_number(col("a"), lit("$99.99"))), Seq(Row(78.12)))
   }
 
+  test("SPARK-47646: try_to_number should return NULL for malformed input") {
+    val df = spark.createDataset(spark.sparkContext.parallelize(Seq("11")))
+    checkAnswer(df.select(try_to_number($"value", lit("$99.99"))), Seq(Row(null)))
+  }
+
   test("SPARK-44905: stateful lastRegex causes NullPointerException on eval for regexp_replace") {
     val df = sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')")
     intercept[SparkRuntimeException](df.queryExecution.optimizedPlan)

From 2da520e88266530b2283ef3c9ac90bdc806b7556 Mon Sep 17 00:00:00 2001
From: yikaifei <yikaifei@apache.org>
Date: Mon, 1 Apr 2024 15:35:23 +0800
Subject: [PATCH 264/521] [SPARK-45593][BUILD][3.5] Correct relocation connect
 guava dependency

### What changes were proposed in this pull request?

This PR amins to correct relocation connect guava dependency and remove duplicate connect-common from SBT build jars.

This PR cherry-pick from https://github.com/apache/spark/pull/43436 and https://github.com/apache/spark/pull/44801 as a backport to 3.5 branch.

### Why are the changes needed?

Bugfix

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Follow the steps described at https://github.com/apache/spark/pull/43195#issue-1921234067 to test manually.

In addition, will continue to observe the GA situation in recent days.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45775 from Yikf/branch-3.5.

Authored-by: yikaifei <yikaifei@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 assembly/pom.xml                     |  6 +++++
 connector/connect/client/jvm/pom.xml | 22 +++++++++++++++----
 connector/connect/common/pom.xml     | 33 ++++++++++++++++++----------
 connector/connect/server/pom.xml     |  1 +
 project/SparkBuild.scala             |  6 ++++-
 5 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index d1ef9b24afdac..21330058f77dc 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -159,6 +159,12 @@
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-connect_${scala.binary.version}</artifactId>
           <version>${project.version}</version>
+          <exclusions>
+            <exclusion>
+              <groupId>org.apache.spark</groupId>
+              <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+            </exclusion>
+          </exclusions>
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 53ff0b0147e01..6febc5ee6bd64 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -51,9 +51,14 @@
       <version>${project.version}</version>
     </dependency>
     <!--
-      We need to define guava and protobuf here because we need to change the scope of both from
+      We need to define protobuf here because we need to change the scope of both from
       provided to compile. If we don't do this we can't shade these libraries.
     -->
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <scope>compile</scope>
+    </dependency>
     <dependency>
       <groupId>com.google.guava</groupId>
       <artifactId>guava</artifactId>
@@ -61,8 +66,9 @@
       <scope>compile</scope>
     </dependency>
     <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
+      <groupId>com.google.guava</groupId>
+      <artifactId>failureaccess</artifactId>
+      <version>${guava.failureaccess.version}</version>
       <scope>compile</scope>
     </dependency>
     <dependency>
@@ -108,6 +114,7 @@
           <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
           <artifactSet>
             <includes>
+              <include>com.google.guava:*</include>
               <include>com.google.android:*</include>
               <include>com.google.api.grpc:*</include>
               <include>com.google.code.findbugs:*</include>
@@ -127,6 +134,13 @@
             </includes>
           </artifactSet>
           <relocations>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.guava</shadedPattern>
+              <includes>
+                <include>com.google.common.**</include>
+              </includes>
+            </relocation>
             <relocation>
               <pattern>io.grpc</pattern>
               <shadedPattern>${spark.shade.packageName}.io.grpc</shadedPattern>
@@ -138,7 +152,7 @@
               <pattern>com.google</pattern>
               <shadedPattern>${spark.shade.packageName}.com.google</shadedPattern>
               <excludes>
-                <!-- Guava is relocated to ${spark.shade.packageName}.guava (see the parent pom.xml) -->
+                <!-- Guava is relocated to ${spark.shade.packageName}.connect.guava -->
                 <exclude>com.google.common.**</exclude>
               </excludes>
             </relocation>
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 7ce0aa6615d3c..3c07b63c50a53 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -47,18 +47,6 @@
             <groupId>com.google.protobuf</groupId>
             <artifactId>protobuf-java</artifactId>
         </dependency>
-        <dependency>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-            <version>${connect.guava.version}</version>
-            <scope>compile</scope>
-        </dependency>
-        <dependency>
-            <groupId>com.google.guava</groupId>
-            <artifactId>failureaccess</artifactId>
-            <version>${guava.failureaccess.version}</version>
-            <scope>compile</scope>
-        </dependency>
         <dependency>
             <groupId>io.grpc</groupId>
             <artifactId>grpc-netty</artifactId>
@@ -152,6 +140,27 @@
                     </execution>
                 </executions>
             </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-shade-plugin</artifactId>
+                <configuration>
+                    <shadedArtifactAttached>false</shadedArtifactAttached>
+                    <artifactSet>
+                        <includes>
+                            <include>org.spark-project.spark:unused</include>
+                            <include>org.apache.tomcat:annotations-api</include>
+                        </includes>
+                    </artifactSet>
+                </configuration>
+                <executions>
+                    <execution>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>shade</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
         </plugins>
     </build>
     <profiles>
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 724bad616f82c..804ff2ff15a15 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -168,6 +168,7 @@
       <groupId>com.google.guava</groupId>
       <artifactId>failureaccess</artifactId>
       <version>${guava.failureaccess.version}</version>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>com.google.protobuf</groupId>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index dfadfea172d8c..40c90a3461b03 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1535,7 +1535,11 @@ object CopyDependencies {
           if (destJar.isFile()) {
             destJar.delete()
           }
-          if (jar.getName.contains("spark-connect") &&
+
+          if (jar.getName.contains("spark-connect-common") &&
+            !SbtPomKeys.profiles.value.contains("noshade-connect")) {
+            // Don't copy the spark connect common JAR as it is shaded in the spark connect.
+          } else if (jar.getName.contains("spark-connect") &&
             !SbtPomKeys.profiles.value.contains("noshade-connect")) {
             Files.copy(fid.toPath, destJar.toPath)
           } else if (jar.getName.contains("connect-client-jvm") &&

From fed3eb57289a949e5abcecdc64d32b8004d9463d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 1 Apr 2024 16:49:39 -0700
Subject: [PATCH 265/521] [SPARK-47676][BUILD] Clean up the removed
 `VersionsSuite` references

### What changes were proposed in this pull request?

This PR aims to clean up the removed `VersionsSuite` reference.

### Why are the changes needed?

At Apache Spark 3.3.0, `VersionsSuite` is removed via SPARK-38036 .
- https://github.com/apache/spark/pull/35335

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45800 from dongjoon-hyun/SPARK-47676.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 128f74b055d3f290003f42259ffa23861eaa69e1)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/SparkBuild.scala                                         | 1 -
 .../main/scala/org/apache/spark/sql/hive/client/package.scala    | 1 -
 2 files changed, 2 deletions(-)

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 40c90a3461b03..25f04f7bff318 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -572,7 +572,6 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.catalyst.expressions.MathExpressionsSuite",
     "org.apache.spark.sql.hive.HiveExternalCatalogSuite",
     "org.apache.spark.sql.hive.StatisticsSuite",
-    "org.apache.spark.sql.hive.client.VersionsSuite",
     "org.apache.spark.sql.hive.client.HiveClientVersions",
     "org.apache.spark.sql.hive.HiveExternalCatalogVersionsSuite",
     "org.apache.spark.ml.classification.LogisticRegressionSuite",
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index 9304074e866ca..eb69f23d2876a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -101,7 +101,6 @@ package object client {
         "org.pentaho:pentaho-aggdesigner-algorithm"))
 
     // Since HIVE-23980, calcite-core included in Hive package jar.
-    // For spark, only VersionsSuite currently creates a hive materialized view for testing.
     case object v2_3 extends HiveVersion("2.3.9",
       exclusions = Seq("org.apache.calcite:calcite-core",
         "org.apache.calcite:calcite-druid",

From 2dda441bec018b1386248b54ccfef46defa5a07a Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 2 Apr 2024 18:16:35 +0800
Subject: [PATCH 266/521] [SPARK-47666][SQL][3.5] Fix NPE when reading mysql
 bit array as LongType

### What changes were proposed in this pull request?

This PR fixes NPE when reading mysql bit array as LongType

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests
### Was this patch authored or co-authored using generative AI tooling?

no

Closes #45792 from yaooqinn/PR_TOOL_PICK_PR_45790_BRANCH-3.5.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/jdbc/MySQLIntegrationSuite.scala | 10 +++++++++-
 .../execution/datasources/jdbc/JdbcUtils.scala | 18 ++++++++++--------
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 68d88fbc552ab..bc7302163d9a1 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -62,6 +62,9 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
       + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
       + "42.75, 1.0000000000000002, -128, 255)").executeUpdate()
 
+    conn.prepareStatement("INSERT INTO numbers VALUES (null, null, "
+      + "null, null, null, null, null, null, null, null, null)").executeUpdate()
+
     conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
       + "yr YEAR)").executeUpdate()
     conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
@@ -101,7 +104,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
   test("Numeric types") {
     val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
     val rows = df.collect()
-    assert(rows.length == 1)
+    assert(rows.length == 2)
     val types = rows(0).toSeq.map(x => x.getClass.toString)
     assert(types.length == 11)
     assert(types(0).equals("class java.lang.Boolean"))
@@ -212,6 +215,11 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
        """.stripMargin.replaceAll("\n", " "))
     assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
   }
+
+  test("SPARK-47666: Check nulls for result set getters") {
+    val nulls = spark.read.jdbc(jdbcUrl, "numbers", new Properties).tail(1).head
+    assert(nulls === Row(null, null, null, null, null, null, null, null, null, null, null))
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 3521a50cd2dd9..7b5c4cfc9b6e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -430,14 +430,16 @@ object JdbcUtils extends Logging with SQLConfHelper {
 
     case LongType if metadata.contains("binarylong") =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
-        val bytes = rs.getBytes(pos + 1)
-        var ans = 0L
-        var j = 0
-        while (j < bytes.length) {
-          ans = 256 * ans + (255 & bytes(j))
-          j = j + 1
-        }
-        row.setLong(pos, ans)
+        val l = nullSafeConvert[Array[Byte]](rs.getBytes(pos + 1), bytes => {
+          var ans = 0L
+          var j = 0
+          while (j < bytes.length) {
+            ans = 256 * ans + (255 & bytes(j))
+            j = j + 1
+          }
+          ans
+        })
+        row.update(pos, l)
 
     case LongType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>

From 465a85375b1e3b81b96eb365d4a68943478265c9 Mon Sep 17 00:00:00 2001
From: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Date: Fri, 5 Apr 2024 10:51:24 +0900
Subject: [PATCH 267/521] [SPARK-47568][SS][3.5] Fix race condition between
 maintenance thread and load/commit for snapshot files

Backports https://github.com/apache/spark/pull/45724 to 3.5

### What changes were proposed in this pull request?

This PR fixes a race condition between the maintenance thread and task thread when change-log checkpointing is enabled, and ensure all snapshots are valid.

1. The maintenance thread currently relies on class variable lastSnapshot to find the latest checkpoint and uploads it to DFS. This checkpoint can be modified at commit time by Task thread if a new snapshot is created.
2. The task thread was not resetting the lastSnapshot at load time, which can result in newer snapshots (if a old version is loaded) being considered valid and uploaded to DFS. This results in VersionIdMismatch errors.

### Why are the changes needed?

These are logical bugs which can cause `VersionIdMismatch` errors causing user to discard the snapshot and restart the query.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added unit test cases.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45881 from sahnib/rocks-db-fix-3.5.

Authored-by: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../execution/streaming/state/RocksDB.scala   | 65 ++++++++++++-------
 .../streaming/state/RocksDBFileManager.scala  |  3 +-
 .../streaming/state/RocksDBSuite.scala        | 37 +++++++++++
 3 files changed, 81 insertions(+), 24 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 0c9738a6b0817..301d978c90386 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io.File
 import java.util.Locale
+import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.{mutable, Map}
@@ -152,19 +153,23 @@ class RocksDB(
         loadedVersion = latestSnapshotVersion
 
         // reset last snapshot version
-        lastSnapshotVersion = 0L
+        if (lastSnapshotVersion > latestSnapshotVersion) {
+          // discard any newer snapshots
+          lastSnapshotVersion = 0L
+          latestSnapshot = None
+        }
         openDB()
 
         numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
-          // we don't track the total number of rows - discard the number being track
-          -1L
-        } else if (metadata.numKeys < 0) {
-          // we track the total number of rows, but the snapshot doesn't have tracking number
-          // need to count keys now
-          countKeys()
-        } else {
-          metadata.numKeys
-        }
+            // we don't track the total number of rows - discard the number being track
+            -1L
+          } else if (metadata.numKeys < 0) {
+            // we track the total number of rows, but the snapshot doesn't have tracking number
+            // need to count keys now
+            countKeys()
+          } else {
+            metadata.numKeys
+          }
         if (loadedVersion != version) replayChangelog(version)
         // After changelog replay the numKeysOnWritingVersion will be updated to
         // the correct number of keys in the loaded version.
@@ -359,16 +364,14 @@ class RocksDB(
           // background operations.
           val cp = Checkpoint.create(db)
           cp.createCheckpoint(checkpointDir.toString)
-          synchronized {
-            // if changelog checkpointing is disabled, the snapshot is uploaded synchronously
-            // inside the uploadSnapshot() called below.
-            // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
-            // during state store maintenance.
-            latestSnapshot.foreach(_.close())
-            latestSnapshot = Some(
-              RocksDBSnapshot(checkpointDir, newVersion, numKeysOnWritingVersion))
-            lastSnapshotVersion = newVersion
-          }
+          // if changelog checkpointing is disabled, the snapshot is uploaded synchronously
+          // inside the uploadSnapshot() called below.
+          // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
+          // during state store maintenance.
+          latestSnapshot.foreach(_.close())
+          latestSnapshot = Some(
+            RocksDBSnapshot(checkpointDir, newVersion, numKeysOnWritingVersion))
+          lastSnapshotVersion = newVersion
         }
       }
 
@@ -454,7 +457,20 @@ class RocksDB(
 
   def doMaintenance(): Unit = {
     if (enableChangelogCheckpointing) {
-      uploadSnapshot()
+      // There is race to update latestSnapshot between load(), commit()
+      // and uploadSnapshot().
+      // The load method will reset latestSnapshot to discard any snapshots taken
+      // from newer versions (when a old version is reloaded).
+      // commit() method deletes the existing snapshot while creating a new snapshot.
+      // In order to ensure that the snapshot being uploaded would not be modified
+      // concurrently, we need to synchronize the snapshot access between task thread
+      // and maintenance thread.
+      acquire()
+      try {
+        uploadSnapshot()
+      } finally {
+        release()
+      }
     }
     val cleanupTime = timeTakenMs {
       fileManager.deleteOldVersions(conf.minVersionsToRetain)
@@ -549,8 +565,11 @@ class RocksDB(
 
   private def acquire(): Unit = acquireLock.synchronized {
     val newAcquiredThreadInfo = AcquiredThreadInfo()
-    val waitStartTime = System.currentTimeMillis
-    def timeWaitedMs = System.currentTimeMillis - waitStartTime
+    val waitStartTime = System.nanoTime()
+    def timeWaitedMs = {
+      val elapsedNanos = System.nanoTime() - waitStartTime
+      TimeUnit.MILLISECONDS.convert(elapsedNanos, TimeUnit.NANOSECONDS)
+    }
     def isAcquiredByDifferentThread = acquiredThreadInfo != null &&
       acquiredThreadInfo.threadRef.get.isDefined &&
       newAcquiredThreadInfo.threadRef.get.get.getId != acquiredThreadInfo.threadRef.get.get.getId
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 3089de7127e77..c527a6a03ae97 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -532,6 +532,7 @@ class RocksDBFileManager(
     // Delete unnecessary local immutable files
     localImmutableFiles
       .foreach { existingFile =>
+        val existingFileSize = existingFile.length()
         val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
         val prevDfsFile = localFilesToDfsFiles.asScala.get(existingFile.getName)
         val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
@@ -544,7 +545,7 @@ class RocksDBFileManager(
         if (!isSameFile) {
           existingFile.delete()
           localFilesToDfsFiles.remove(existingFile.getName)
-          logInfo(s"Deleted local file $existingFile with size ${existingFile.length()} mapped" +
+          logInfo(s"Deleted local file $existingFile with size $existingFileSize mapped" +
             s" to previous dfsFile ${prevDfsFile.getOrElse("null")}")
         } else {
           logInfo(s"reusing $prevDfsFile present at $existingFile for $requiredFile")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 16bfe2359f437..89b4925db707b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -304,6 +304,14 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         db.load(version, readOnly = true)
         assert(db.iterator().map(toStr).toSet === Set((version.toString, version.toString)))
       }
+
+      // recommit 60 to ensure that acquireLock is released for maintenance
+      for (version <- 60 to 60) {
+        db.load(version - 1)
+        db.put(version.toString, version.toString)
+        db.remove((version - 1).toString)
+        db.commit()
+      }
       // Check that snapshots and changelogs get purged correctly.
       db.doMaintenance()
       assert(snapshotVersionsPresent(remoteDir) === Seq(30, 60))
@@ -1281,6 +1289,35 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithChangelogCheckpointingEnabled("time travel 4 -" +
+    " validate successful RocksDB load") {
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf) { db =>
+      for (version <- 0 to 1) {
+        db.load(version)
+        db.put(version.toString, version.toString)
+        db.commit()
+      }
+
+      // load previous version, and recreate the snapshot
+      db.load(1)
+      db.put("3", "3")
+
+      // do maintenance - upload any latest snapshots so far
+      // would fail to acquire lock and no snapshots would be uploaded
+      db.doMaintenance()
+      db.commit()
+      // upload newly created snapshot 2.zip
+      db.doMaintenance()
+    }
+
+    // reload version 2 - should succeed
+    withDB(remoteDir, version = 2, conf = conf) { db =>
+    }
+  }
+
   test("validate Rocks DB SST files do not have a VersionIdMismatch" +
     " when metadata file is not overwritten - scenario 1") {
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +

From 3cb6a44a8d9112fb53a28ccaedf8bbc648cdf92a Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Fri, 5 Apr 2024 11:14:42 +0900
Subject: [PATCH 268/521] [SPARK-47734][PYTHON][TESTS] Fix flaky
 DataFrame.writeStream doctest by stopping streaming query

### What changes were proposed in this pull request?

This PR deflakes the `pyspark.sql.dataframe.DataFrame.writeStream` doctest.

PR https://github.com/apache/spark/pull/45298 aimed to fix that test but misdiagnosed the root issue. The problem is not that concurrent tests were colliding on a temporary directory. Rather, the issue is specific to the `DataFrame.writeStream` test's logic: that test is starting a streaming query that writes files to the temporary directory, the exits the temp directory context manager without first stopping the streaming query. That creates a race condition where the context manager might be deleting the directory while the streaming query is writing new files into it, leading to the following type of error during cleanup:

```
File "/__w/spark/spark/python/pyspark/sql/dataframe.py", line ?, in pyspark.sql.dataframe.DataFrame.writeStream
Failed example:
    with tempfile.TemporaryDirectory() as d:
        # Create a table with Rate source.
        df.writeStream.toTable(
            "my_table", checkpointLocation=d)
Exception raised:
    Traceback (most recent call last):
      File "/usr/lib/python3.11/doctest.py", line 1353, in __run
        exec(compile(example.source, filename, "single",
      File "<doctest pyspark.sql.dataframe.DataFrame.writeStream[3]>", line 1, in <module>
        with tempfile.TemporaryDirectory() as d:
      File "/usr/lib/python3.11/tempfile.py", line 1043, in __exit__
        self.cleanup()
      File "/usr/lib/python3.11/tempfile.py", line 1047, in cleanup
        self._rmtree(self.name, ignore_errors=self._ignore_cleanup_errors)
      File "/usr/lib/python3.11/tempfile.py", line 1029, in _rmtree
        _rmtree(name, onerror=onerror)
      File "/usr/lib/python3.11/shutil.py", line 738, in rmtree
        onerror(os.rmdir, path, sys.exc_info())
      File "/usr/lib/python3.11/shutil.py", line 736, in rmtree
        os.rmdir(path, dir_fd=dir_fd)
    OSError: [Errno 39] Directory not empty: '/__w/spark/spark/python/target/4f062b09-213f-4ac2-a10a-2d704990141b/tmp29irqweq'
```

In this PR, I update the doctest to properly stop the streaming query.

### Why are the changes needed?

Fix flaky test.

### Does this PR introduce _any_ user-facing change?

No, test-only. Small user-facing doc change, but one that is consistent with other doctest examples.

### How was this patch tested?

Manually ran updated test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45885 from JoshRosen/fix-flaky-writestream-doctest.

Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 0107435cb39d68eccf8a6900c3c781665deca38b)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/dataframe.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 7c382ab1c5a54..97f60967da70e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -529,6 +529,7 @@ def writeStream(self) -> DataStreamWriter:
 
         Examples
         --------
+        >>> import time
         >>> import tempfile
         >>> df = spark.readStream.format("rate").load()
         >>> type(df.writeStream)
@@ -536,9 +537,10 @@ def writeStream(self) -> DataStreamWriter:
 
         >>> with tempfile.TemporaryDirectory() as d:
         ...     # Create a table with Rate source.
-        ...     df.writeStream.toTable(
+        ...     query = df.writeStream.toTable(
         ...         "my_table", checkpointLocation=d)
-        <...streaming.query.StreamingQuery object at 0x...>
+        ...     time.sleep(3)
+        ...     query.stop()
         """
         return DataStreamWriter(self)
 

From dec1e74c24fc07b171c0878815c9e97820889f40 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Mon, 18 Dec 2023 12:19:20 -0800
Subject: [PATCH 269/521] [SPARK-46411][BUILD] Change to use
 `bcprov/bcpkix-jdk18on` for UT

This PR migrates the test dependency `bcprov/bcpkix` from `jdk15on` to `jdk18on`, and upgrades the version from 1.70 to 1.77, the `jdk18on` jars are compiled to work with anything from Java 1.8 up.

The full release notes as follows:
- https://www.bouncycastle.org/releasenotes.html#r1rv77

No, just for test.

Pass GitHub Actions.

No

Closes #44359 from LuciferYang/bouncycastle-177.

Lead-authored-by: yangjie01 <yangjie01@baidu.com>
Co-authored-by: YangJie <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 common/network-common/pom.xml  | 13 +++++++++++++
 pom.xml                        |  6 +++---
 resource-managers/yarn/pom.xml |  4 ++--
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 27a53b0f9f3bd..3916cf16f1de5 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -147,6 +147,19 @@
       <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
+<<<<<<< HEAD
+=======
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcprov-jdk18on</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcpkix-jdk18on</artifactId>
+      <scope>test</scope>
+    </dependency>
+>>>>>>> 9a75a1d69aa ([SPARK-46411][BUILD] Change to use `bcprov/bcpkix-jdk18on` for UT)
 
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/pom.xml b/pom.xml
index 269a42d41f172..b579ed31612e8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -218,7 +218,7 @@
     <maven-antrun.version>3.1.0</maven-antrun.version>
     <commons-crypto.version>1.1.0</commons-crypto.version>
     <commons-cli.version>1.5.0</commons-cli.version>
-    <bouncycastle.version>1.70</bouncycastle.version>
+    <bouncycastle.version>1.77</bouncycastle.version>
     <tink.version>1.9.0</tink.version>
     <netty.version>4.1.96.Final</netty.version>
     <!--
@@ -1434,13 +1434,13 @@
       </dependency>
       <dependency>
         <groupId>org.bouncycastle</groupId>
-        <artifactId>bcprov-jdk15on</artifactId>
+        <artifactId>bcprov-jdk18on</artifactId>
         <version>${bouncycastle.version}</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.bouncycastle</groupId>
-        <artifactId>bcpkix-jdk15on</artifactId>
+        <artifactId>bcpkix-jdk18on</artifactId>
         <version>${bouncycastle.version}</version>
         <scope>test</scope>
       </dependency>
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index b432973d4386f..05cea60346667 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -54,12 +54,12 @@
         <!-- Used by MiniYARNCluster -->
         <dependency>
           <groupId>org.bouncycastle</groupId>
-          <artifactId>bcprov-jdk15on</artifactId>
+          <artifactId>bcprov-jdk18on</artifactId>
           <scope>test</scope>
         </dependency>
         <dependency>
           <groupId>org.bouncycastle</groupId>
-          <artifactId>bcpkix-jdk15on</artifactId>
+          <artifactId>bcpkix-jdk18on</artifactId>
           <scope>test</scope>
         </dependency>
       </dependencies>

From d2cee4d81b2f75de008817409ca716364356c45c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 5 Apr 2024 10:46:41 -0700
Subject: [PATCH 270/521] [SPARK-46411][BUILD][3.5][FOLLOWUP] Fix pom.xml file
 in common/network-common module

### What changes were proposed in this pull request?

This PR aims to fix `common/network-common/pom.xml`.

### Why are the changes needed?

To fix the cherry-pick mistake.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45897 from dongjoon-hyun/SPARK-46411.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 common/network-common/pom.xml | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 3916cf16f1de5..27a53b0f9f3bd 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -147,19 +147,6 @@
       <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
-<<<<<<< HEAD
-=======
-    <dependency>
-      <groupId>org.bouncycastle</groupId>
-      <artifactId>bcprov-jdk18on</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.bouncycastle</groupId>
-      <artifactId>bcpkix-jdk18on</artifactId>
-      <scope>test</scope>
-    </dependency>
->>>>>>> 9a75a1d69aa ([SPARK-46411][BUILD] Change to use `bcprov/bcpkix-jdk18on` for UT)
 
     <dependency>
       <groupId>org.apache.spark</groupId>

From 44cc67626969f6ce4e4616d7bfd1dba5a3a53473 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 5 Apr 2024 11:56:34 -0700
Subject: [PATCH 271/521] [SPARK-47111][SQL][TESTS][3.5] Upgrade `PostgreSQL`
 JDBC driver to 42.7.2 and docker image to 16.2

### What changes were proposed in this pull request?

This PR aims to upgrade `PostgreSQL` JDBC driver and docker images.
- JDBC Driver: `org.postgresql:postgresql` from 42.7.0 to 42.7.2
- Docker Image: `postgres` from `15.1-alpine` to `16.2-alpine`

### Why are the changes needed?

To use the latest PostgreSQL combination in the following integration tests.

- PostgresIntegrationSuite
- PostgresKrbIntegrationSuite
- v2/PostgresIntegrationSuite
- v2/PostgresNamespaceSuite

### Does this PR introduce _any_ user-facing change?

No. This is a pure test-environment update.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45899 from dongjoon-hyun/SPARK-47111.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/jdbc/PostgresIntegrationSuite.scala    | 6 +++---
 .../apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala | 6 +++---
 .../apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala | 6 +++---
 .../apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala   | 6 +++---
 pom.xml                                                     | 2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index e910402e05e77..23fbf39db3be0 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:15.1):
+ * To run this test suite for a specific version (e.g., postgres:16.2):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
  *     ./build/sbt -Pdocker-integration-tests
  *     "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
@@ -40,7 +40,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index 4debe24754de3..667d8c7786187 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:15.1):
+ * To run this test suite for a specific version (e.g., postgres:16.2):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
  *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
  * }}}
  */
@@ -37,7 +37,7 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "postgres.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index d2a18ff96b44b..1f09c2fd3fc59 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:15.1):
+ * To run this test suite for a specific version (e.g., postgres:16.2)
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
  * }}}
  */
@@ -38,7 +38,7 @@ import org.apache.spark.tags.DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "postgresql"
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index cf7266e67e325..b725fc8967514 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -26,16 +26,16 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:15.1):
+ * To run this test suite for a specific version (e.g., postgres:16.2):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.2
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
  * }}}
  */
 @DockerTest
 class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.2-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/pom.xml b/pom.xml
index b579ed31612e8..3dcaa2dd8ac73 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1233,7 +1233,7 @@
       <dependency>
         <groupId>org.postgresql</groupId>
         <artifactId>postgresql</artifactId>
-        <version>42.6.0</version>
+        <version>42.7.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>

From b2bfbc79f982ad2d2e694c43c79ab98256c507e0 Mon Sep 17 00:00:00 2001
From: panbingkun <pbk1982@gmail.com>
Date: Fri, 5 Apr 2024 14:51:18 -0700
Subject: [PATCH 272/521] [SPARK-45445][BUILD][3.5] Upgrade snappy to 1.1.10.5

### What changes were proposed in this pull request?

This is a backport of #43254.

The pr aims to upgrade snappy to 1.1.10.5.

### Why are the changes needed?
- Although the `1.1.10.4` version was upgraded approximately 2-3 weeks ago, the new version includes some bug fixes, eg:
<img width="868" alt="image" src="https://github.com/apache/spark/assets/15246973/6c7f05f7-382f-4e82-bb68-22fc50895b94">
- Full release notes: https://github.com/xerial/snappy-java/releases

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45901 from dongjoon-hyun/SPARK-45445.

Authored-by: panbingkun <pbk1982@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 1cd7d5a8f2d7b..a070dcccd0098 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -238,7 +238,7 @@ shims/0.9.45//shims-0.9.45.jar
 slf4j-api/2.0.7//slf4j-api-2.0.7.jar
 snakeyaml-engine/2.6//snakeyaml-engine-2.6.jar
 snakeyaml/2.0//snakeyaml-2.0.jar
-snappy-java/1.1.10.3//snappy-java-1.1.10.3.jar
+snappy-java/1.1.10.5//snappy-java-1.1.10.5.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
 spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
diff --git a/pom.xml b/pom.xml
index 3dcaa2dd8ac73..965f88ee14d50 100644
--- a/pom.xml
+++ b/pom.xml
@@ -186,7 +186,7 @@
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
     <fasterxml.jackson.version>2.15.2</fasterxml.jackson.version>
     <fasterxml.jackson.databind.version>2.15.2</fasterxml.jackson.databind.version>
-    <snappy.version>1.1.10.3</snappy.version>
+    <snappy.version>1.1.10.5</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
     <commons-codec.version>1.16.0</commons-codec.version>
     <commons-compress.version>1.23.0</commons-compress.version>

From 850ec0b4adcb219d048bed003a7cb42cfc731f33 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Mon, 8 Apr 2024 10:19:26 +0900
Subject: [PATCH 273/521] [SPARK-47299][PYTHON][DOCS] Use the same
 `versions.json` in the dropdown of different versions of PySpark documents

### What changes were proposed in this pull request?
The pr aims to use the same `versions.json` in the dropdown of `different versions` of PySpark documents.

### Why are the changes needed?
As discussed in the email group, using this approach can avoid `maintenance difficulties` and `inconsistencies` that may arise when `multi active release version lines` are released in the future.
<img width="798" alt="image" src="https://github.com/apache/spark/assets/15246973/8a08a4fe-e1fb-4334-a3f9-c6dffb01cbd6">

### Does this PR introduce _any_ user-facing change?
Yes, only for pyspark docs.

### How was this patch tested?
- Manually test.
- Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #45400 from panbingkun/SPARK-47299.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit b299b2bc06a91db630ab39b9c35663342931bb56)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 python/docs/source/_static/versions.json | 22 ----------------------
 python/docs/source/conf.py               |  6 +++++-
 2 files changed, 5 insertions(+), 23 deletions(-)
 delete mode 100644 python/docs/source/_static/versions.json

diff --git a/python/docs/source/_static/versions.json b/python/docs/source/_static/versions.json
deleted file mode 100644
index 3d0bd14818064..0000000000000
--- a/python/docs/source/_static/versions.json
+++ /dev/null
@@ -1,22 +0,0 @@
-[
-    {
-        "name": "3.4.1",
-        "version": "3.4.1"
-    },
-    {
-        "name": "3.4.0",
-        "version": "3.4.0"
-    },
-    {
-        "name": "3.3.2",
-        "version": "3.3.2"
-    },
-    {
-        "name": "3.3.1",
-        "version": "3.3.1"
-    },
-    {
-        "name": "3.3.0",
-        "version": "3.3.0"
-    }
-]
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 08a25c5dd0712..1b5cf34744651 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -182,7 +182,11 @@
 html_theme = 'pydata_sphinx_theme'
 
 html_context = {
-    "switcher_json_url": "_static/versions.json",
+    # When releasing a new Spark version, please update the file
+    # "site/static/versions.json" under the code repository "spark-website"
+    # (item should be added in order), and also set the local environment
+    # variable "RELEASE_VERSION".
+    "switcher_json_url": "https://spark.apache.org/static/versions.json",
     "switcher_template_url": "https://spark.apache.org/docs/{version}/api/python/index.html",
 }
 

From 932f9c7df38dc766c2b7b05df764f24dc9b55acc Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 8 Apr 2024 17:06:33 +0900
Subject: [PATCH 274/521] [SPARK-47762][PYTHON][CONNECT] Add
 pyspark.sql.connect.protobuf into setup.py

This PR is a followup of https://github.com/apache/spark/pull/42563 (but using new JIRA as it's already released), which adds `pyspark.sql.connect.protobuf` into `setup.py`.

So PyPI packaged PySpark can support protobuf function with Spark Connect on.

Yes. The new feature is now available with Spark Connect on if users install Spark Connect by `pip`.

Being tested in https://github.com/apache/spark/pull/45870

No.

Closes #45924 from HyukjinKwon/SPARK-47762.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit f94d95d75886b1af3434cff0c50c99ea1e196092)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/setup.py b/python/setup.py
index b8e4c9a40e046..2110c2839ca70 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -249,6 +249,7 @@ def run(self):
             "pyspark.sql.connect.avro",
             "pyspark.sql.connect.client",
             "pyspark.sql.connect.proto",
+            "pyspark.sql.connect.protobuf",
             "pyspark.sql.connect.streaming",
             "pyspark.sql.pandas",
             "pyspark.sql.protobuf",

From f0752f2701b1b8d5fbc38912edd9cd9325693bef Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 9 Apr 2024 11:14:49 +0800
Subject: [PATCH 275/521] [SPARK-47770][INFRA] Fix
 `GenerateMIMAIgnore.isPackagePrivateModule` to return `false` instead of
 failing

### What changes were proposed in this pull request?

This PR aims to fix `GenerateMIMAIgnore.isPackagePrivateModule` to work correctly.

For example, `Metadata` is a case class inside package private `DefaultParamsReader` class. Currently, MIMA fails at this class analysis.

https://github.com/apache/spark/blob/f8e652e88320528a70e605a6a3cf986725e153a5/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala#L474-L485

The root cause is `isPackagePrivateModule` fails due to `scala.ScalaReflectionException`. We can simply make `isPackagePrivateModule` return `false`  instead of failing.
```
Error instrumenting class:org.apache.spark.ml.util.DefaultParamsReader$Metadata
Exception in thread "main" scala.ScalaReflectionException: type Serializable is not a class
	at scala.reflect.api.Symbols$SymbolApi.asClass(Symbols.scala:284)
	at scala.reflect.api.Symbols$SymbolApi.asClass$(Symbols.scala:284)
	at scala.reflect.internal.Symbols$SymbolContextApiImpl.asClass(Symbols.scala:99)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.classToScala1(JavaMirrors.scala:1085)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.$anonfun$classToScala$1(JavaMirrors.scala:1040)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.$anonfun$toScala$1(JavaMirrors.scala:150)
	at scala.reflect.runtime.TwoWayCaches$TwoWayCache.toScala(TwoWayCaches.scala:50)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.toScala(JavaMirrors.scala:148)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.classToScala(JavaMirrors.scala:1040)
	at scala.reflect.runtime.JavaMirrors$JavaMirror.typeToScala(JavaMirrors.scala:1148)
	at scala.reflect.runtime.JavaMirrors$JavaMirror$FromJavaClassCompleter.$anonfun$completeRest$2(JavaMirrors.scala:816)
	at scala.reflect.runtime.JavaMirrors$JavaMirror$FromJavaClassCompleter.$anonfun$completeRest$1(JavaMirrors.scala:816)
	at scala.reflect.runtime.JavaMirrors$JavaMirror$FromJavaClassCompleter.completeRest(JavaMirrors.scala:810)
	at scala.reflect.runtime.JavaMirrors$JavaMirror$FromJavaClassCompleter.complete(JavaMirrors.scala:806)
	at scala.reflect.internal.Symbols$Symbol.completeInfo(Symbols.scala:1575)
	at scala.reflect.internal.Symbols$Symbol.info(Symbols.scala:1538)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.scala$reflect$runtime$SynchronizedSymbols$SynchronizedSymbol$$super$info(SynchronizedSymbols.scala:221)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info(SynchronizedSymbols.scala:158)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.info$(SynchronizedSymbols.scala:158)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.info(SynchronizedSymbols.scala:221)
	at scala.reflect.internal.Symbols$Symbol.initialize(Symbols.scala:1733)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.privateWithin(SynchronizedSymbols.scala:109)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol.privateWithin$(SynchronizedSymbols.scala:107)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.privateWithin(SynchronizedSymbols.scala:221)
	at scala.reflect.runtime.SynchronizedSymbols$SynchronizedSymbol$$anon$13.privateWithin(SynchronizedSymbols.scala:221)
	at org.apache.spark.tools.GenerateMIMAIgnore$.isPackagePrivateModule(GenerateMIMAIgnore.scala:48)
	at org.apache.spark.tools.GenerateMIMAIgnore$.$anonfun$privateWithin$1(GenerateMIMAIgnore.scala:67)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.apache.spark.tools.GenerateMIMAIgnore$.privateWithin(GenerateMIMAIgnore.scala:61)
	at org.apache.spark.tools.GenerateMIMAIgnore$.main(GenerateMIMAIgnore.scala:125)
	at org.apache.spark.tools.GenerateMIMAIgnore.main(GenerateMIMAIgnore.scala)
```

### Why are the changes needed?

**BEFORE**
```
$ dev/mima | grep org.apache.spark.ml.util.DefaultParamsReader
Using SPARK_LOCAL_IP=localhost
Using SPARK_LOCAL_IP=localhost
Error instrumenting class:org.apache.spark.ml.util.DefaultParamsReader$Metadata$
Error instrumenting class:org.apache.spark.ml.util.DefaultParamsReader$Metadata
Using SPARK_LOCAL_IP=localhost

# I checked the following before deleing `.generated-mima-class-excludes `
$ cat .generated-mima-class-excludes | grep org.apache.spark.ml.util.DefaultParamsReader
org.apache.spark.ml.util.DefaultParamsReader$
org.apache.spark.ml.util.DefaultParamsReader#
org.apache.spark.ml.util.DefaultParamsReader
```

**AFTER**
```
$ dev/mima | grep org.apache.spark.ml.util.DefaultParamsReader
Using SPARK_LOCAL_IP=localhost
Using SPARK_LOCAL_IP=localhost
[WARN] Unable to detect inner functions for class:org.apache.spark.ml.util.DefaultParamsReader.Metadata
[WARN] Unable to detect inner functions for class:org.apache.spark.ml.util.DefaultParamsReader.Metadata
Using SPARK_LOCAL_IP=localhost

# I checked the following before deleting `.generated-mima-class-excludes `.
$ cat .generated-mima-class-excludes | grep org.apache.spark.ml.util.DefaultParamsReader
org.apache.spark.ml.util.DefaultParamsReader$Metadata$
org.apache.spark.ml.util.DefaultParamsReader$
org.apache.spark.ml.util.DefaultParamsReader#Metadata#
org.apache.spark.ml.util.DefaultParamsReader#
org.apache.spark.ml.util.DefaultParamsReader$Metadata
org.apache.spark.ml.util.DefaultParamsReader#Metadata
org.apache.spark.ml.util.DefaultParamsReader
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45938 from dongjoon-hyun/SPARK-47770.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 08c49637795fd56ef550a509648f0890ff22a948)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/tools/GenerateMIMAIgnore.scala      | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index a46a7fbeec497..be863b52c2500 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -44,8 +44,15 @@ object GenerateMIMAIgnore {
   private def isPackagePrivate(sym: unv.Symbol) =
     !sym.privateWithin.fullName.startsWith("<none>")
 
-  private def isPackagePrivateModule(moduleSymbol: unv.ModuleSymbol) =
+  private def isPackagePrivateModule(moduleSymbol: unv.ModuleSymbol) = try {
     !moduleSymbol.privateWithin.fullName.startsWith("<none>")
+  } catch {
+    case e: Throwable =>
+      // scalastyle:off println
+      println("[WARN] Unable to check module:" + moduleSymbol)
+      // scalastyle:on println
+      false
+  }
 
   /**
    * For every class checks via scala reflection if the class itself or contained members

From d424a4bcf2a1c79005e8d0489db2ba844de6fe06 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 9 Apr 2024 00:23:00 -0700
Subject: [PATCH 276/521] [SPARK-47774][INFRA][3.5] Remove redundant rules from
 `MimaExcludes`

### What changes were proposed in this pull request?

This PR aims to remove redundant rules from `MimaExcludes` for Apache Spark 3.5.x.

Previously, these rules were required due to the `dev/mima` limitation which is fixed at
- https://github.com/apache/spark/pull/45938

### Why are the changes needed?

To minimize the exclusion rules for Apache Spark 3.5.x by removing the rules related to the following `private class`.

- `HadoopFSUtils`
https://github.com/apache/spark/blob/f0752f2701b1b8d5fbc38912edd9cd9325693bef/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala#L36

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #45948 from dongjoon-hyun/SPARK-47774-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 project/MimaExcludes.scala | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 376ddfde1b937..ae026165addc1 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -41,11 +41,6 @@ object MimaExcludes {
     ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.types.SQLUserDefinedType"),
     // [SPARK-43165][SQL] Move canWrite to DataTypeUtils
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.DataType.canWrite"),
-    // [SPARK-43195][CORE] Remove unnecessary serializable wrapper in HadoopFSUtils
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableBlockLocation"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableBlockLocation$"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.util.HadoopFSUtils$SerializableFileStatus$"),
     // [SPARK-43792][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listCatalogs
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs"),
     // [SPARK-43881][SQL][PYTHON][CONNECT] Add optional pattern for Catalog.listDatabases

From 73e10b41b23d08ac1abe5e0e25ba5167183feb15 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 26 Feb 2024 22:57:01 -0800
Subject: [PATCH 277/521] [SPARK-47182][BUILD] Exclude `commons-(io|lang3)`
 transitive dependencies from `commons-compress` and `avro*`

### Why are the changes needed?

This PR aims to exclude `commons-(io|lang3)` transitive dependencies from `commons-compress`, `avro`, and `avro-mapred` dependencies.

### Does this PR introduce _any_ user-facing change?

Apache Spark define and use our own versions. The exclusion of the transitive dependencies will clarify that.

https://github.com/apache/spark/blob/1a408033daf458f1ceebbe14a560355a1a2c0a70/pom.xml#L198

https://github.com/apache/spark/blob/1a408033daf458f1ceebbe14a560355a1a2c0a70/pom.xml#L194

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45278 from dongjoon-hyun/SPARK-47182.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pom.xml b/pom.xml
index 965f88ee14d50..5c1cd8d7f7924 100644
--- a/pom.xml
+++ b/pom.xml
@@ -625,6 +625,16 @@
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-compress</artifactId>
         <version>${commons-compress.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.commons</groupId>
@@ -1458,6 +1468,16 @@
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
         <version>${avro.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.apache.avro</groupId>
@@ -1497,6 +1517,14 @@
             <groupId>com.github.luben</groupId>
             <artifactId>zstd-jni</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.apache.commons</groupId>
+            <artifactId>commons-lang3</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
       <!--

From 7bec38751dacd58d48e21f3f913a6fa8e21bd7d9 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Sun, 18 Feb 2024 18:56:46 -0800
Subject: [PATCH 278/521] [SPARK-47083][BUILD] Upgrade `commons-codec` to
 1.16.1

The pr aims to upgrade `commons-codec` from `1.16.0` to `1.16.1`.

1.The new version brings some bug fixed, eg:
- Fix possible IndexOutOfBoundException in PhoneticEngine.encode method #223. Fixes [CODEC-315](https://issues.apache.org/jira/browse/CODEC-315)
- Fix possible IndexOutOfBoundsException in PercentCodec.insertAlwaysEncodeChars() method #222. Fixes [CODEC-314](https://issues.apache.org/jira/browse/CODEC-314).

2.The full release notes:
    https://commons.apache.org/proper/commons-codec/changes-report.html#a1.16.1

No.

Pass GA.

No.

Closes #45152 from panbingkun/SPARK-47083.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index a070dcccd0098..8d2a54189eddc 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -36,7 +36,7 @@ cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
 commons-cli/1.5.0//commons-cli-1.5.0.jar
-commons-codec/1.16.0//commons-codec-1.16.0.jar
+commons-codec/1.16.1//commons-codec-1.16.1.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.1.9//commons-compiler-3.1.9.jar
diff --git a/pom.xml b/pom.xml
index 5c1cd8d7f7924..2954fc0d97cb6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -188,7 +188,7 @@
     <fasterxml.jackson.databind.version>2.15.2</fasterxml.jackson.databind.version>
     <snappy.version>1.1.10.5</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
-    <commons-codec.version>1.16.0</commons-codec.version>
+    <commons-codec.version>1.16.1</commons-codec.version>
     <commons-compress.version>1.23.0</commons-compress.version>
     <commons-io.version>2.13.0</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->

From 2c43d92511b27fc0c106e97c1e9e8f5253e4b894 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 10 Apr 2024 01:37:00 -0700
Subject: [PATCH 279/521] [SPARK-47790][BUILD][3.5] Upgrade `commons-io` to
 2.16.1

### What changes were proposed in this pull request?

This PR aims to upgrade `commons-io` to 2.16.1.

### Why are the changes needed?

To bring the latest bug fixes
- https://commons.apache.org/proper/commons-io/changes-report.html#a2.16.1 (2024-04-04)
- https://commons.apache.org/proper/commons-io/changes-report.html#a2.16.0 (2024-03-25)
- https://commons.apache.org/proper/commons-io/changes-report.html#a2.15.1 (2023-11-24)
- https://commons.apache.org/proper/commons-io/changes-report.html#a2.15.0 (2023-10-21)
- https://commons.apache.org/proper/commons-io/changes-report.html#a2.14.0 (2023-09-24)

### Does this PR introduce _any_ user-facing change?

Yes, this is a dependency change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45974 from dongjoon-hyun/SPARK-47790-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 8d2a54189eddc..378cdb121150f 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -43,7 +43,7 @@ commons-compiler/3.1.9//commons-compiler-3.1.9.jar
 commons-compress/1.23.0//commons-compress-1.23.0.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
-commons-io/2.13.0//commons-io-2.13.0.jar
+commons-io/2.16.1//commons-io-2.16.1.jar
 commons-lang/2.6//commons-lang-2.6.jar
 commons-lang3/3.12.0//commons-lang3-3.12.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
diff --git a/pom.xml b/pom.xml
index 2954fc0d97cb6..34cbefbeb3f7d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -190,7 +190,7 @@
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
     <commons-codec.version>1.16.1</commons-codec.version>
     <commons-compress.version>1.23.0</commons-compress.version>
-    <commons-io.version>2.13.0</commons-io.version>
+    <commons-io.version>2.16.1</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->

From 8a77a012cd6d1d3057bb7f1340850cf567b8a6ed Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Thu, 11 Apr 2024 10:50:11 +0900
Subject: [PATCH 280/521] [SPARK-47704][SQL] JSON parsing fails with
 "java.lang.ClassCastException" when spark.sql.json.enablePartialResults is
 enabled

This PR fixes a bug that was introduced in [SPARK-47704](https://issues.apache.org/jira/browse/SPARK-47704). To be precise, SPARK-47704 missed this corner case because I could not find a small stable repro for the problem at the time.

When `spark.sql.json.enablePartialResults` is enabled (which is the default), if a user tries to read `{"a":[{"key":{"b":0}}]}` with the code:
```scala
val df = spark.read
  .schema("a array<map<string, struct<b boolean>>>")
  .json(path)
```
exception is thrown:
```
java.lang.ClassCastException: class org.apache.spark.sql.catalyst.util.ArrayBasedMapData cannot be cast to class org.apache.spark.sql.catalyst.util.ArrayData (org.apache.spark.sql.catalyst.util.ArrayBasedMapData and org.apache.spark.sql.catalyst.util.ArrayData are in unnamed module of loader 'app')
at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray(rows.scala:53)
at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray$(rows.scala:53)
at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getArray(rows.scala:172)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificUnsafeProjection.apply(Unknown Source)
at scala.collection.Iterator$$anon$10.next(Iterator.scala:461)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1$$anon$2.getNext(FileScanRDD.scala:605)
at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.$anonfun$prepareNextFile$1(FileScanRDD.scala:884)
at scala.concurrent.Future$.$anonfun$apply$1(Future.scala:659)
```

The same happens when map and array are reversed: `{"a":{"key":[{"b":0}]}}`:
```scala
val df = spark.read
  .schema("a map<string, array<struct<b boolean>>>")
  .json(path)
```

In both cases, we should partially parse the record, only struct with boolean type cannot be parsed:
- `Row(Array(Map("key" -> Row(null))))` in the first case.
- `Row(Map("key" -> Array(Row(null))))` in the second case.

We simply did not handle all of the partial results exceptions when converting array and map, instead of catching `PartialResultException` which is only for structs. Instead, we should catch `PartialValueException` that covers struct, map, and array.

Fixes a bug where user would encounter an exception instead of reading a partially parsed JSON record.

No.

I added unit tests that verify the fix.

No.

Closes #45833 from sadikovi/SPARK-47704.

Authored-by: Ivan Sadikov <ivan.sadikov@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit a2b7050e0fc5db6ac98db57309e4737acd26bf3a)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/json/JacksonParser.scala     | 12 ++---
 .../datasources/json/JsonSuite.scala          | 44 +++++++++++++++++++
 2 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index f14f70532e659..3f6ea9a174c0c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -497,9 +497,9 @@ class JacksonParser(
       try {
         values += fieldConverter.apply(parser)
       } catch {
-        case PartialResultException(row, cause) if enablePartialResults =>
-          badRecordException = badRecordException.orElse(Some(cause))
-          values += row
+        case err: PartialValueException if enablePartialResults =>
+          badRecordException = badRecordException.orElse(Some(err.cause))
+          values += err.partialResult
         case NonFatal(e) if enablePartialResults =>
           badRecordException = badRecordException.orElse(Some(e))
           parser.skipChildren()
@@ -534,9 +534,9 @@ class JacksonParser(
         if (isRoot && v == null) throw QueryExecutionErrors.rootConverterReturnNullError()
         values += v
       } catch {
-        case PartialResultException(row, cause) if enablePartialResults =>
-          badRecordException = badRecordException.orElse(Some(cause))
-          values += row
+        case err: PartialValueException if enablePartialResults =>
+          badRecordException = badRecordException.orElse(Some(err.cause))
+          values += err.partialResult
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 11779286ec25f..e8005b2041919 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -3654,6 +3654,50 @@ abstract class JsonSuite
     assert(JSONOptions.getAlternativeOption("charset").contains("encoding"))
     assert(JSONOptions.getAlternativeOption("dateFormat").isEmpty)
   }
+
+  test("SPARK-47704: Handle partial parsing of array<map>") {
+    withTempPath { path =>
+      Seq("""{"a":[{"key":{"b":0}}]}""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      for (enablePartialResults <- Seq(true, false)) {
+        withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"$enablePartialResults") {
+          val df = spark.read
+            .schema("a array<map<string, struct<b boolean>>>")
+            .json(path.getAbsolutePath)
+
+          if (enablePartialResults) {
+            checkAnswer(df, Seq(Row(Array(Map("key" -> Row(null))))))
+          } else {
+            checkAnswer(df, Seq(Row(null)))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-47704: Handle partial parsing of map<string, array>") {
+    withTempPath { path =>
+      Seq("""{"a":{"key":[{"b":0}]}}""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      for (enablePartialResults <- Seq(true, false)) {
+        withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"$enablePartialResults") {
+          val df = spark.read
+            .schema("a map<string, array<struct<b boolean>>>")
+            .json(path.getAbsolutePath)
+
+          if (enablePartialResults) {
+            checkAnswer(df, Seq(Row(Map("key" -> Seq(Row(null))))))
+          } else {
+            checkAnswer(df, Seq(Row(null)))
+          }
+        }
+      }
+    }
+  }
 }
 
 class JsonV1Suite extends JsonSuite {

From 0818ce7eac974a93c684760b0f46ac71a74f63e1 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 11 Apr 2024 11:25:49 +0900
Subject: [PATCH 281/521] [MINOR][DOCS] Clarify relation between grouping API
 and `spark.sql.execution.arrow.maxRecordsPerBatch`

### What changes were proposed in this pull request?

This PR fixes the documentation of `spark.sql.execution.arrow.maxRecordsPerBatch` to clarify the relation between `spark.sql.execution.arrow.maxRecordsPerBatch` and grouping API such as `DataFrame(.cogroup).groupby.applyInPandas`.

### Why are the changes needed?

To address confusion about them.

### Does this PR introduce _any_ user-facing change?

Yes, it fixes the user-facing SQL configuration page https://spark.apache.org/docs/latest/configuration.html#runtime-sql-configuration

### How was this patch tested?

CI in this PR should verify them. I ran linters.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #45993 from HyukjinKwon/minor-doc-change.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 6c8e4cfd6f3f95455b0d4479f2527d425349f1cf)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 2e41374035c8c..3e62f656ac9e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2878,7 +2878,9 @@ object SQLConf {
   val ARROW_EXECUTION_MAX_RECORDS_PER_BATCH =
     buildConf("spark.sql.execution.arrow.maxRecordsPerBatch")
       .doc("When using Apache Arrow, limit the maximum number of records that can be written " +
-        "to a single ArrowRecordBatch in memory. If set to zero or negative there is no limit.")
+        "to a single ArrowRecordBatch in memory. This configuration is not effective for the " +
+        "grouping API such as DataFrame(.cogroup).groupby.applyInPandas because each group " +
+        "becomes each ArrowRecordBatch. If set to zero or negative there is no limit.")
       .version("2.3.0")
       .intConf
       .createWithDefault(10000)

From 7d1e77c3072e278d2552a57746bf3ab7abc58c41 Mon Sep 17 00:00:00 2001
From: beliefer <beliefer@163.com>
Date: Wed, 10 Apr 2024 20:33:43 -0700
Subject: [PATCH 282/521] [MINOR][DOCS] Make the link of spark properties with
 YARN more accurate

### What changes were proposed in this pull request?
This PR propose to make the link of spark properties with YARN more accurate.

### Why are the changes needed?
Currently, the link of `YARN Spark Properties` is just the page of `running-on-yarn.html`.
We should add the anchor point.

### Does this PR introduce _any_ user-facing change?
'Yes'.
More convenient for readers to read.

### How was this patch tested?
N/A

### Was this patch authored or co-authored using generative AI tooling?
'No'.

Closes #45994 from beliefer/accurate-yarn-link.

Authored-by: beliefer <beliefer@163.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit aca3d1025e2d85c02737456bfb01163c87ca3394)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 docs/job-scheduling.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 0875bd5558e5b..8f10d0788e63d 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -57,7 +57,7 @@ Resource allocation can be configured as follows, based on the cluster type:
   on the cluster (`spark.executor.instances` as configuration property), while `--executor-memory`
   (`spark.executor.memory` configuration property) and `--executor-cores` (`spark.executor.cores` configuration
   property) control the resources per executor. For more information, see the
-  [YARN Spark Properties](running-on-yarn.html).
+  [YARN Spark Properties](running-on-yarn.html#spark-properties).
 
 A second option available on Mesos is _dynamic sharing_ of CPU cores. In this mode, each Spark application
 still has a fixed and independent memory allocation (set by `spark.executor.memory`), but when the

From d18659de626cc3743e7f6a5dceca0f2a25b006de Mon Sep 17 00:00:00 2001
From: Mark Jarvin <mark.jarvin@databricks.com>
Date: Fri, 12 Apr 2024 09:37:19 +0900
Subject: [PATCH 283/521] [SPARK-47824][PS] Fix nondeterminism in
 pyspark.pandas.series.asof

### What changes were proposed in this pull request?

Use the monotonically ID as a sorting condition for `max_by` instead of a literal string.

### Why are the changes needed?
https://github.com/apache/spark/pull/35191 had a error where the literal string `"__monotonically_increasing_id__"` was used as the tie-breaker in `max_by` instead of the actual ID.

### Does this PR introduce _any_ user-facing change?
Fixes nondeterminism in `asof`

### How was this patch tested?
In some circumstances `//python:pyspark.pandas.tests.connect.series.test_parity_as_of` is sufficient to reproduce

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46018 from markj-db/SPARK-47824.

Authored-by: Mark Jarvin <mark.jarvin@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit a0ccdf27e5ff30817b8f058f08f98d5b44bad2db)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/series.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 95ca92e78787d..b54ae88616fa5 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -5910,7 +5910,7 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
                     # then return monotonically_increasing_id. This will let max by
                     # to return last index value, which is the behaviour of pandas
                     else spark_column.isNotNull(),
-                    monotonically_increasing_id_column,
+                    F.col(monotonically_increasing_id_column),
                 ),
             )
             for index in where

From b8e2498007a0b4e9a8790ce8a3511427093fa6a7 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sat, 13 Apr 2024 16:54:20 -0700
Subject: [PATCH 284/521] [SPARK-47318][CORE][3.5] Adds HKDF round to
 AuthEngine key derivation to follow standard KEX practices

### What changes were proposed in this pull request?

Backport of SPARK-47318 to v3.5.0

This change adds an additional pass through a key derivation function (KDF) to the key exchange protocol in `AuthEngine`. Currently, it uses the shared secret from a bespoke key negotiation protocol directly. This is an encoded X coordinate on the X25519 curve. It is atypical and not recommended to use that coordinate directly as a key, but rather to pass it to an KDF.

Note, Spark now supports TLS for RPC calls. It is preferable to use that rather than the bespoke AES RPC encryption implemented by `AuthEngine` and `TransportCipher`.

### Why are the changes needed?

This follows best practices of key negotiation protocols. The encoded X coordinate is not guaranteed to be uniformly distributed over the 32-byte key space. Rather, we pass it through a HKDF function to map it uniformly to a 16-byte key space.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Exiting tests under:
`build/sbt "network-common/test:testOnly"`

Specifically:
`build/sbt "network-common/test:testOnly org.apache.spark.network.crypto.AuthEngineSuite"`
`build/sbt "network-common/test:testOnly org.apache.spark.network.crypto.AuthIntegrationSuite"`

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46014 from sweisdb/SPARK-47318-v3.5.0.

Lead-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Co-authored-by: Steve Weis <steve.weis@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/network/crypto/AuthEngine.java      | 15 +++++++-
 .../org/apache/spark/network/crypto/README.md | 17 +++++++++-
 .../spark/network/util/TransportConf.java     |  9 +++++
 .../spark/network/crypto/AuthEngineSuite.java | 34 +++++++++++++++++--
 docs/security.md                              | 12 +++++++
 5 files changed, 82 insertions(+), 5 deletions(-)

diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index 078d9ceb317b8..14f0c23fd05fc 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -41,16 +41,19 @@
  * Exchange, using a pre-shared key to derive an AES-GCM key encrypting key.
  */
 class AuthEngine implements Closeable {
+  public static final byte[] DERIVED_KEY_INFO = "derivedKey".getBytes(UTF_8);
   public static final byte[] INPUT_IV_INFO = "inputIv".getBytes(UTF_8);
   public static final byte[] OUTPUT_IV_INFO = "outputIv".getBytes(UTF_8);
   private static final String MAC_ALGORITHM = "HMACSHA256";
   private static final int AES_GCM_KEY_SIZE_BYTES = 16;
   private static final byte[] EMPTY_TRANSCRIPT = new byte[0];
+  private static final int UNSAFE_SKIP_HKDF_VERSION = 1;
 
   private final String appId;
   private final byte[] preSharedSecret;
   private final TransportConf conf;
   private final Properties cryptoConf;
+  private final boolean unsafeSkipFinalHkdf;
 
   private byte[] clientPrivateKey;
   private TransportCipher sessionCipher;
@@ -62,6 +65,9 @@ class AuthEngine implements Closeable {
     this.preSharedSecret = preSharedSecret.getBytes(UTF_8);
     this.conf = conf;
     this.cryptoConf = conf.cryptoConf();
+    // This is for backward compatibility with version 1.0 of this protocol,
+    // which did not perform a final HKDF round.
+    this.unsafeSkipFinalHkdf = conf.authEngineVersion() == UNSAFE_SKIP_HKDF_VERSION;
   }
 
   @VisibleForTesting
@@ -201,6 +207,13 @@ private TransportCipher generateTransportCipher(
       byte[] sharedSecret,
       boolean isClient,
       byte[] transcript) throws GeneralSecurityException {
+    byte[] derivedKey = unsafeSkipFinalHkdf ? sharedSecret :  // This is for backwards compatibility
+      Hkdf.computeHkdf(
+        MAC_ALGORITHM,
+        sharedSecret,
+        transcript,
+        DERIVED_KEY_INFO,
+        AES_GCM_KEY_SIZE_BYTES);
     byte[] clientIv = Hkdf.computeHkdf(
         MAC_ALGORITHM,
         sharedSecret,
@@ -213,7 +226,7 @@ private TransportCipher generateTransportCipher(
         transcript,  // Passing this as the HKDF salt
         OUTPUT_IV_INFO,  // This is the HKDF info field used to differentiate IV values
         AES_GCM_KEY_SIZE_BYTES);
-    SecretKeySpec sessionKey = new SecretKeySpec(sharedSecret, "AES");
+    SecretKeySpec sessionKey = new SecretKeySpec(derivedKey, "AES");
     return new TransportCipher(
         cryptoConf,
         conf.cipherTransformation(),
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/README.md b/common/network-common/src/main/java/org/apache/spark/network/crypto/README.md
index 78e7459b9995d..5d3584d80462c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/README.md
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/README.md
@@ -1,6 +1,9 @@
-Forward Secure Auth Protocol
+Forward Secure Auth Protocol v2.0
 ==============================================
 
+Summary
+-------
+
 This file describes a forward secure authentication protocol which may be used by Spark. This
 protocol is essentially ephemeral Diffie-Hellman key exchange using Curve25519, referred to as
 X25519.
@@ -77,6 +80,7 @@ Now that the server has the client's ephemeral public key, it can generate its o
 keypair and compute a shared secret.
 
     sharedSecret = X25519.computeSharedSecret(clientPublicKey, serverKeyPair.privateKey())
+    derivedKey = HKDF(sharedSecret, salt=transcript, info="deriveKey")
 
 With the shared secret, the server will also generate two initialization vectors to be used for
 inbound and outbound streams. These IVs are not secret and will be bound to the preceding protocol
@@ -99,3 +103,14 @@ sessions. It would, however, allow impersonation of future sessions.
 In the event of a pre-shared key compromise, messages would still be confidential from a passive
 observer. Only active adversaries spoofing a session would be able to recover plaintext.
 
+Security Changes & Compatibility
+-------------
+
+The original version of this protocol, retroactively called v1.0, did not apply an HKDF to `sharedSecret` to derive
+a key (i.e. `derivedKey`) and was directly using the encoded X coordinate as key material. This is atypical and
+standard practice is to pass that shared coordinate through an HKDF. The latest version adds this additional
+HKDF to derive `derivedKey`.
+
+Consequently, Apache Spark instances using v1.0 of this protocol will not negotiate the same key as
+instances using v2.0 and will be **unable to send encrypted RPCs** across incompatible versions. For this reason, v1.0
+remains the default to preserve backward-compatibility.
\ No newline at end of file
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index 45e9994be7225..e4966b32fb454 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -212,6 +212,15 @@ public boolean encryptionEnabled() {
     return conf.getBoolean("spark.network.crypto.enabled", false);
   }
 
+  /**
+   * Version number to be used by the AuthEngine key agreement protocol. Valid values are 1 or 2.
+   * The default version is 1 for backward compatibility. Version 2 is recommended for stronger
+   * security properties.
+   */
+  public int authEngineVersion() {
+    return conf.getInt("spark.network.crypto.authEngineVersion", 1);
+  }
+
   /**
    * The cipher transformation to use for encrypting session data.
    */
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index c6029a70bd61d..971e3ef2ff98c 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -20,6 +20,7 @@
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
 import java.security.GeneralSecurityException;
+import java.util.Collections;
 import java.util.Random;
 
 import com.google.crypto.tink.subtle.Hex;
@@ -27,6 +28,7 @@
 import io.netty.buffer.Unpooled;
 import io.netty.channel.FileRegion;
 import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.ConfigProvider;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
 import static org.junit.Assert.*;
@@ -48,7 +50,10 @@ public class AuthEngineSuite {
       "fb00000005617070496400000010708451c9dd2792c97c1ca66e6df449ef0000003c64fe899ecdaf458d4" +
       "e25e9d5c5a380b8e6d1a184692fac065ed84f8592c18e9629f9c636809dca2ffc041f20346eb53db78738" +
       "08ecad08b46b5ee3ff";
-  private static final String sharedKey =
+
+  private static final String derivedKey = "2d6e7a9048c8265c33a8f3747bfcc84c";
+  // This key would have been derived for version 1.0 protocol that did not run a final HKDF round.
+  private static final String unsafeDerivedKey =
       "31963f15a320d5c90333f7ecf5cf3a31c7eaf151de07fef8494663a9f47cfd31";
   private static final String inputIv = "fc6a5dc8b90a9dad8f54f08b51a59ed2";
   private static final String outputIv = "a72709baf00785cad6329ce09f631f71";
@@ -56,7 +61,9 @@ public class AuthEngineSuite {
 
   @BeforeClass
   public static void setUp() {
-    conf = new TransportConf("rpc", MapConfigProvider.EMPTY);
+    ConfigProvider v2Provider = new MapConfigProvider(Collections.singletonMap(
+            "spark.network.crypto.authEngineVersion", "2"));
+    conf = new TransportConf("rpc", v2Provider);
   }
 
   @Test
@@ -174,7 +181,28 @@ public void testFixedChallengeResponse() throws Exception {
       // Verify that the client will accept an old transcript.
       client.deriveSessionCipher(clientChallenge, serverResponse);
       TransportCipher clientCipher = client.sessionCipher();
-      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), sharedKey);
+      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), derivedKey);
+      assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
+      assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
+    }
+  }
+
+  @Test
+  public void testFixedChallengeResponseUnsafeVersion() throws Exception {
+    ConfigProvider v1Provider = new MapConfigProvider(Collections.singletonMap(
+            "spark.network.crypto.authEngineVersion", "1"));
+    TransportConf v1Conf = new TransportConf("rpc", v1Provider);
+    try (AuthEngine client = new AuthEngine("appId", "secret", v1Conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = client.sessionCipher();
+      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), unsafeDerivedKey);
       assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
       assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
     }
diff --git a/docs/security.md b/docs/security.md
index c5d132f680a41..c0a4b4da03030 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -154,6 +154,12 @@ authentication must also be enabled and properly configured. AES encryption uses
 [Apache Commons Crypto](https://commons.apache.org/proper/commons-crypto/) library, and Spark's
 configuration system allows access to that library's configuration for advanced users.
 
+This protocol has two mutually incompatible versions. Version 1 omits applying key derivation function
+(KDF) to the key exchange protocol's output, while version 2 applies a KDF to ensure that the derived
+session key is uniformly distributed. Version 1 is default for backward compatibility. It is
+**recommended to use version 2** for better security properties. The version can be configured by setting
+`spark.network.crypto.authEngineVersion` to 1 or 2 respectively.
+
 There is also support for SASL-based encryption, although it should be considered deprecated. It
 is still required when talking to shuffle services from Spark versions older than 2.2.0.
 
@@ -169,6 +175,12 @@ The following table describes the different options available for configuring th
   </td>
   <td>2.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.network.crypto.authEngineVersion</code></td>
+  <td>1</td>
+  <td>Version of AES-based RPC encryption to use. Valid versions are 1 or 2. Version 2 is recommended.</td>
+  <td>3.4.3, 3.5.2</td>
+</tr>
 <tr>
   <td><code>spark.network.crypto.config.*</code></td>
   <td>None</td>

From 3b3903dda363826cecea43cafabd63397b9deb9b Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Mon, 15 Apr 2024 17:00:40 -0700
Subject: [PATCH 285/521] [SPARK-47828][CONNECT][PYTHON][3.5]
 DataFrameWriterV2.overwrite fails with invalid plan

cherry pick fix https://github.com/apache/spark/pull/46023 to 3.5

Closes #46050 from zhengruifeng/connect_fix_overwrite_35.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/connect/plan.py          | 8 ++++----
 python/pyspark/sql/tests/test_readwriter.py | 7 ++++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index b49274e399c48..43af8bb427a5a 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -1655,16 +1655,16 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE
             elif wm == "overwrite":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE
+                if self.overwrite_condition is not None:
+                    plan.write_operation_v2.overwrite_condition.CopyFrom(
+                        self.col_to_expr(self.overwrite_condition, session)
+                    )
             elif wm == "overwrite_partitions":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS
             elif wm == "append":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_APPEND
             elif wm == "replace":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_REPLACE
-                if self.overwrite_condition is not None:
-                    plan.write_operation_v2.overwrite_condition.CopyFrom(
-                        self.col_to_expr(self.overwrite_condition, session)
-                    )
             elif wm == "create_or_replace":
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE
             else:
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 6bcef51732f86..528b88ca0c2d6 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -20,7 +20,7 @@
 import tempfile
 
 from pyspark.errors import AnalysisException
-from pyspark.sql.functions import col
+from pyspark.sql.functions import col, lit
 from pyspark.sql.readwriter import DataFrameWriterV2
 from pyspark.sql.types import StructType, StructField, StringType
 from pyspark.testing.sqlutils import ReusedSQLTestCase
@@ -231,6 +231,11 @@ def test_create_without_provider(self):
         ):
             df.writeTo("test_table").create()
 
+    def test_table_overwrite(self):
+        df = self.df
+        with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
+            df.writeTo("test_table").overwrite(lit(True))
+
 
 class ReadwriterTests(ReadwriterTestsMixin, ReusedSQLTestCase):
     pass

From 6c67c61bfd21ebe68837f889502368ab9d99ebc5 Mon Sep 17 00:00:00 2001
From: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Date: Tue, 16 Apr 2024 12:36:08 +0900
Subject: [PATCH 286/521] [SPARK-47840][SS] Disable foldable propagation across
 Streaming Aggregate/Join nodes

### What changes were proposed in this pull request?

Streaming queries with Union of 2 data streams followed by an Aggregate (groupBy) can produce incorrect results if the grouping key is a constant literal for micro-batch duration.

The query produces incorrect results because the query optimizer recognizes the literal value in the grouping key as foldable and replaces the grouping key expression with the actual literal value. This optimization is correct for batch queries. However Streaming queries also read information from StateStore, and the output contains both the results from StateStore (computed in previous microbatches) and data from input sources (computed in this microbatch). The HashAggregate node after StateStore always reads grouping key value as the optimized literal (as the grouping key expression is optimized into a literal by the optimizer). This ends up replacing keys in StateStore with the literal value resulting in incorrect output.

See an example logical and physical plan below for a query performing a union on 2 data streams, followed by a groupBy. Note that the name#4 expression has been optimized to ds1. The Streaming query Aggregate adds StateStoreSave node as child of HashAggregate, however any grouping key read from StateStore will still be read as ds1 due to the optimization.

### Optimized Logical Plan

```
=== Applying Rule org.apache.spark.sql.catalyst.optimizer.FoldablePropagation ===

=== Old Plan ===

WriteToMicroBatchDataSource MemorySink, eb67645e-30fc-41a8-8006-35bb7649c202, Complete, 0
+- Aggregate [name#4], [name#4, count(1) AS count#31L]
   +- Project [ds1 AS name#4]
  	+- StreamingDataSourceV2ScanRelation[value#1] MemoryStreamDataSource

=== New Plan ===

WriteToMicroBatchDataSource MemorySink, eb67645e-30fc-41a8-8006-35bb7649c202, Complete, 0
+- Aggregate [ds1], [ds1 AS name#4, count(1) AS count#31L]
   +- Project [ds1 AS name#4]
  	+- StreamingDataSourceV2ScanRelation[value#1] MemoryStreamDataSource

====
```

### Corresponding Physical Plan

```
WriteToDataSourceV2 MicroBatchWrite[epoch: 0, writer: org.apache.spark.sql.execution.streaming.sources.MemoryStreamingWrite2b4c6242], org.apache.spark.sql.execution.datasources.v2.DataSourceV2Strategy$$Lambda$3143/185907563435709d26
+- HashAggregate(keys=[ds1#39], functions=[finalmerge_count(merge count#38L) AS count(1)#30L], output=[name#4, count#31L])
   +- StateStoreSave [ds1#39], state info [ checkpoint = file:/tmp/streaming.metadata-e470782a-18a3-463c-9e61-3a10d0bdf180/state, runId = 4dedecca-910c-4518-855e-456702617414, opId = 0, ver = 0, numPartitions = 5], Complete, 0, 0, 2
  	+- HashAggregate(keys=[ds1#39], functions=[merge_count(merge count#38L) AS count#38L], output=[ds1#39, count#38L])
     	+- StateStoreRestore [ds1#39], state info [ checkpoint = file:/tmp/streaming.metadata-e470782a-18a3-463c-9e61-3a10d0bdf180/state, runId = 4dedecca-910c-4518-855e-456702617414, opId = 0, ver = 0, numPartitions = 5], 2
        	+- HashAggregate(keys=[ds1#39], functions=[merge_count(merge count#38L) AS count#38L], output=[ds1#39, count#38L])
           	+- HashAggregate(keys=[ds1 AS ds1#39], functions=[partial_count(1) AS count#38L], output=[ds1#39, count#38L])
              	+- Project
                 	+- MicroBatchScan[value#1] MemoryStreamDataSource

```

This PR disables foldable propagation across Streaming Aggregate/Join nodes in the logical plan.

### Why are the changes needed?

Changes are needed to ensure that Streaming queries with literal value for grouping key/join key produce correct results.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added `sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala` testcase.

```

[info] Run completed in 54 seconds, 150 milliseconds.
[info] Total number of tests run: 9
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 9, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.

```

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46035 from sahnib/SPARK-47840.

Authored-by: Bhuwan Sahni <bhuwan.sahni@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit f21719346eb0492cf9de47495853a4efad37dbab)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/optimizer/expressions.scala  |  18 +-
 ...ingQueryOptimizationCorrectnessSuite.scala | 419 ++++++++++++++++++
 2 files changed, 435 insertions(+), 2 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index d4f0f72c9352b..456d0da54de5f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -934,7 +934,14 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         val newFoldableMap = collectFoldables(newProject.projectList)
         (newProject, newFoldableMap)
 
-      case a: Aggregate =>
+      // FoldablePropagation rule can produce incorrect optimized plan for streaming queries.
+      // This is because the optimizer can replace the grouping expressions, or join column
+      // with a literal value if the grouping key is constant for the micro-batch. However,
+      // as Streaming queries also read from the StateStore, this optimization also
+      // overwrites any keys read from State Store. We need to disable this optimization
+      // until we can make optimizer aware of Streaming state store. The State Store nodes
+      // are currently added in the Physical plan.
+      case a: Aggregate if !a.isStreaming =>
         val (newChild, foldableMap) = propagateFoldables(a.child)
         val newAggregate =
           replaceFoldable(a.withNewChildren(Seq(newChild)).asInstanceOf[Aggregate], foldableMap)
@@ -971,7 +978,14 @@ object FoldablePropagation extends Rule[LogicalPlan] {
       // propagating the foldable expressions.
       // TODO(cloud-fan): It seems more reasonable to use new attributes as the output attributes
       // of outer join.
-      case j: Join =>
+      // FoldablePropagation rule can produce incorrect optimized plan for streaming queries.
+      // This is because the optimizer can replace the grouping expressions, or join column
+      // with a literal value if the grouping key is constant for the micro-batch. However,
+      // as Streaming queries also read from the StateStore, this optimization also
+      // overwrites any keys read from State Store. We need to disable this optimization
+      // until we can make optimizer aware of Streaming state store. The State Store nodes
+      // are currently added in the Physical plan.
+      case j: Join if !j.left.isStreaming || !j.right.isStreaming =>
         val (newChildren, foldableMaps) = j.children.map(propagateFoldables).unzip
         val foldableMap = AttributeMap(
           foldableMaps.foldLeft(Iterable.empty[(Attribute, Alias)])(_ ++ _.baseMap.values))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
new file mode 100644
index 0000000000000..efc84c8e4c7cf
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.sql.Timestamp
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions.{lit, window}
+
+/**
+ * This test ensures that any optimizations done by Spark SQL optimizer are
+ * correct for Streaming queries.
+ */
+class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
+  import testImplicits._
+
+  test("streaming Union with literal produces correct results") {
+    val inputStream1 = MemoryStream[Int]
+    val ds1 = inputStream1
+      .toDS()
+      .withColumn("name", lit("ds1"))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val inputStream2 = MemoryStream[Int]
+    val ds2 = inputStream2
+      .toDS()
+      .withColumn("name", lit("ds2"))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val result =
+      ds1.union(ds2)
+        .groupBy("name")
+        .count()
+
+    testStream(result, OutputMode.Complete())(
+      AddData(inputStream1, 1),
+      ProcessAllAvailable(),
+      AddData(inputStream2, 1),
+      ProcessAllAvailable(),
+      CheckNewAnswer(Row("ds1", 1), Row("ds2", 1))
+    )
+  }
+
+  test("streaming aggregate with literal and watermark after literal column" +
+    " produces correct results on query change") {
+    withTempDir { dir =>
+      val inputStream1 = MemoryStream[Timestamp]
+      val ds1 = inputStream1
+        .toDS()
+        .withColumn("name", lit("ds1"))
+        .withColumn("ts", $"value")
+        .withWatermark("ts", "1 minutes")
+        .select("name", "ts")
+
+      val result =
+        ds1.groupBy("name").count()
+
+      testStream(result, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-02 00:00:00")),
+        ProcessAllAvailable()
+      )
+
+      val ds2 = inputStream1
+        .toDS()
+        .withColumn("name", lit("ds2"))
+        .withColumn("ts", $"value")
+        .withWatermark("ts", "1 minutes")
+        .select("name", "ts")
+
+      val result2 =
+        ds2.groupBy("name").count()
+
+      testStream(result2, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-03 00:00:00")),
+        ProcessAllAvailable(),
+        CheckNewAnswer(Row("ds1", 1), Row("ds2", 1)),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-04 00:00:00")),
+        ProcessAllAvailable(),
+        CheckNewAnswer(Row("ds1", 1), Row("ds2", 2))
+      )
+    }
+  }
+
+  test("streaming aggregate with literal and watermark before literal column" +
+    " produces correct results on query change") {
+    withTempDir { dir =>
+      val inputStream1 = MemoryStream[Timestamp]
+      val ds1 = inputStream1
+        .toDS()
+        .withColumn("ts", $"value")
+        .withWatermark("ts", "1 minutes")
+        .withColumn("name", lit("ds1"))
+        .select("name", "ts")
+
+      val result =
+        ds1.groupBy("name").count()
+
+      testStream(result, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-02 00:00:00")),
+        ProcessAllAvailable()
+      )
+
+      val ds2 = inputStream1
+        .toDS()
+        .withColumn("ts", $"value")
+        .withWatermark("ts", "1 minutes")
+        .withColumn("name", lit("ds2"))
+        .select("name", "ts")
+
+      val result2 =
+        ds2.groupBy("name").count()
+
+      testStream(result2, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-03 00:00:00")),
+        ProcessAllAvailable(),
+        CheckNewAnswer(Row("ds1", 1), Row("ds2", 1)),
+        AddData(inputStream1, Timestamp.valueOf("2023-01-04 00:00:00")),
+        ProcessAllAvailable(),
+        CheckNewAnswer(Row("ds1", 1), Row("ds2", 2))
+      )
+    }
+  }
+
+  test("streaming aggregate with literal" +
+    " produces correct results on query change") {
+    withTempDir { dir =>
+      val inputStream1 = MemoryStream[Int]
+      val ds1 = inputStream1
+        .toDS()
+        .withColumn("name", lit("ds1"))
+        .withColumn("count", $"value")
+        .select("name", "count")
+
+      val result =
+        ds1.groupBy("name").count()
+
+      testStream(result, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, 1),
+        ProcessAllAvailable()
+      )
+
+      val ds2 = inputStream1
+        .toDS()
+        .withColumn("name", lit("ds2"))
+        .withColumn("count", $"value")
+        .select("name", "count")
+
+      val result2 =
+        ds2.groupBy("name").count()
+
+      testStream(result2, OutputMode.Complete())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, 1),
+        ProcessAllAvailable(),
+        CheckNewAnswer(Row("ds1", 1), Row("ds2", 1))
+      )
+    }
+  }
+
+  test("stream stream join with literal" +
+    " produces correct results") {
+    withTempDir { dir =>
+      import java.sql.Timestamp
+      val inputStream1 = MemoryStream[Int]
+      val inputStream2 = MemoryStream[Int]
+
+      val ds1 = inputStream1
+        .toDS()
+        .withColumn("name", lit(Timestamp.valueOf("2023-01-01 00:00:00")))
+        .withWatermark("name", "1 minutes")
+        .withColumn("count1", lit(1))
+
+      val ds2 = inputStream2
+        .toDS()
+        .withColumn("name", lit(Timestamp.valueOf("2023-01-02 00:00:00")))
+        .withWatermark("name", "1 minutes")
+        .withColumn("count2", lit(2))
+
+
+      val result =
+        ds1.join(ds2, "name", "full")
+          .select("name", "count1", "count2")
+
+      testStream(result, OutputMode.Append())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, 1),
+        ProcessAllAvailable(),
+        AddData(inputStream2, 1),
+        ProcessAllAvailable(),
+        AddData(inputStream1, 2),
+        ProcessAllAvailable(),
+        AddData(inputStream2, 2),
+        ProcessAllAvailable(),
+        CheckNewAnswer()
+      )
+
+      // modify the query and update literal values for name
+      val ds3 = inputStream1
+        .toDS()
+        .withColumn("name", lit(Timestamp.valueOf("2023-02-01 00:00:00")))
+        .withWatermark("name", "1 minutes")
+        .withColumn("count1", lit(3))
+
+      val ds4 = inputStream2
+        .toDS()
+        .withColumn("name", lit(Timestamp.valueOf("2023-02-02 00:00:00")))
+        .withWatermark("name", "1 minutes")
+        .withColumn("count2", lit(4))
+
+      val result2 =
+        ds3.join(ds4, "name", "full")
+          .select("name", "count1", "count2")
+
+      testStream(result2, OutputMode.Append())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(inputStream1, 1),
+        ProcessAllAvailable(),
+        AddData(inputStream2, 1),
+        ProcessAllAvailable(),
+        AddData(inputStream1, 2),
+        ProcessAllAvailable(),
+        AddData(inputStream2, 2),
+        ProcessAllAvailable(),
+        CheckNewAnswer(
+          Row(Timestamp.valueOf("2023-01-01 00:00:00"),
+            1, null.asInstanceOf[java.lang.Integer]),
+          Row(Timestamp.valueOf("2023-01-01 00:00:00"),
+            1, null.asInstanceOf[java.lang.Integer]),
+          Row(Timestamp.valueOf("2023-01-02 00:00:00"),
+            null.asInstanceOf[java.lang.Integer], 2),
+          Row(Timestamp.valueOf("2023-01-02 00:00:00"),
+            null.asInstanceOf[java.lang.Integer], 2)
+        )
+      )
+    }
+  }
+
+  test("streaming SQL distinct usage with literal grouping" +
+    " key produces correct results") {
+    val inputStream1 = MemoryStream[Int]
+    val ds1 = inputStream1
+      .toDS()
+      .withColumn("name", lit("ds1"))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val inputStream2 = MemoryStream[Int]
+    val ds2 = inputStream2
+      .toDS()
+      .withColumn("name", lit("ds2"))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val result =
+      ds1.union(ds2)
+        .groupBy("name")
+        .as[String, (String, Int, Int)]
+        .keys
+
+    testStream(result, OutputMode.Complete())(
+      AddData(inputStream1, 1),
+      ProcessAllAvailable(),
+      AddData(inputStream2, 1),
+      ProcessAllAvailable(),
+      CheckNewAnswer(Row("ds1"), Row("ds2"))
+    )
+  }
+
+  test("streaming window aggregation with literal time column" +
+    " key produces correct results") {
+    val inputStream1 = MemoryStream[Int]
+    val ds1 = inputStream1
+      .toDS()
+      .withColumn("name", lit(Timestamp.valueOf("2023-01-01 00:00:00")))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val inputStream2 = MemoryStream[Int]
+    val ds2 = inputStream2
+      .toDS()
+      .withColumn("name", lit(Timestamp.valueOf("2023-01-02 00:00:00")))
+      .withColumn("count", $"value")
+      .select("name", "count")
+
+    val result =
+      ds1.union(ds2)
+        .groupBy(
+          window($"name", "1 second", "1 second")
+        )
+        .count()
+
+    testStream(result, OutputMode.Complete())(
+      AddData(inputStream1, 1),
+      ProcessAllAvailable(),
+      AddData(inputStream2, 1),
+      ProcessAllAvailable(),
+      CheckNewAnswer(
+        Row(
+          Row(Timestamp.valueOf("2023-01-01 00:00:00"), Timestamp.valueOf("2023-01-01 00:00:01")),
+          1),
+        Row(
+          Row(Timestamp.valueOf("2023-01-02 00:00:00"), Timestamp.valueOf("2023-01-02 00:00:01")),
+          1))
+    )
+  }
+
+  test("stream stream join with literals produces correct value") {
+    withTempDir { dir =>
+      val input1 = MemoryStream[Int]
+      val input2 = MemoryStream[Int]
+
+      val df1 = input1
+        .toDF()
+        .withColumn("key", $"value")
+        .withColumn("leftValue", lit(1))
+        .select("key", "leftValue")
+
+      val df2 = input2
+        .toDF()
+        .withColumn("key", $"value")
+        .withColumn("rightValue", lit(2))
+        .select("key", "rightValue")
+
+      val result = df1
+        .join(df2, "key")
+        .select("key", "leftValue", "rightValue")
+
+      testStream(result, OutputMode.Append())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(input1, 1),
+        ProcessAllAvailable(),
+        AddData(input2, 1),
+        ProcessAllAvailable(),
+        CheckAnswer(Row(1, 1, 2))
+      )
+    }
+  }
+
+  test("stream stream join with literals produces correct value on query change") {
+    withTempDir { dir =>
+      val input1 = MemoryStream[Int]
+      val input2 = MemoryStream[Int]
+
+      val df1 = input1
+        .toDF()
+        .withColumn("key", lit("key1"))
+        .withColumn("leftValue", lit(1))
+        .select("key", "leftValue")
+
+      val df2 = input2
+        .toDF()
+        .withColumn("key", lit("key2"))
+        .withColumn("rightValue", lit(2))
+        .select("key", "rightValue")
+
+      val result = df1
+        .join(df2, "key")
+        .select("key", "leftValue", "rightValue")
+
+      testStream(result, OutputMode.Append())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(input1, 1),
+        ProcessAllAvailable(),
+        AddData(input2, 1),
+        ProcessAllAvailable()
+      )
+
+      val df3 = input1
+        .toDF()
+        .withColumn("key", lit("key2"))
+        .withColumn("leftValue", lit(3))
+        .select("key", "leftValue")
+
+      val df4 = input2
+        .toDF()
+        .withColumn("key", lit("key1"))
+        .withColumn("rightValue", lit(4))
+        .select("key", "rightValue")
+
+      val result2 = df3
+        .join(df4, "key")
+        .select("key", "leftValue", "rightValue")
+
+      testStream(result2, OutputMode.Append())(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+        AddData(input1, 1),
+        ProcessAllAvailable(),
+        AddData(input2, 1),
+        ProcessAllAvailable(),
+        CheckAnswer(
+          Row("key1", 1, 4),
+          Row("key2", 3, 2))
+      )
+    }
+  }
+}

From d54f24cf3c3dc8107fc143d47f7c61edb3ebdc32 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 15 Apr 2024 20:39:32 -0700
Subject: [PATCH 287/521] [SPARK-46335][BUILD][3.5] Upgrade Maven to 3.9.6

### What changes were proposed in this pull request?

This PR aims to upgrade `Apache Maven` to 3.9.6 for Apache Spark 3.5.2+

This is a backport of the following PR. `Apache Maven 3.9.6` has been used over 4 months in `master` branch.
- #44267

### Why are the changes needed?

To bring the latest bug fixes,

- https://maven.apache.org/docs/3.9.0/release-notes.html
- https://maven.apache.org/docs/3.9.1/release-notes.html
- https://maven.apache.org/docs/3.9.2/release-notes.html
- https://maven.apache.org/docs/3.9.3/release-notes.html
- https://maven.apache.org/docs/3.9.5/release-notes.html
- https://maven.apache.org/docs/3.9.6/release-notes.html

### Does this PR introduce _any_ user-facing change?

No because this is a build time change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46069 from dongjoon-hyun/SPARK-46335.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/appveyor-install-dependencies.ps1 | 2 +-
 docs/building-spark.md                | 2 +-
 pom.xml                               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index 3737382eb86e2..792a9aa4e9793 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -81,7 +81,7 @@ if (!(Test-Path $tools)) {
 # ========================== Maven
 # Push-Location $tools
 #
-# $mavenVer = "3.8.8"
+# $mavenVer = "3.9.6"
 # Start-FileDownload "https://archive.apache.org/dist/maven/maven-3/$mavenVer/binaries/apache-maven-$mavenVer-bin.zip" "maven.zip"
 #
 # # extract
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 33d253a49dbf3..4f626b4ff58c8 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.8.8 and Java 8/11/17.
+Building Spark using Maven requires Maven 3.9.6 and Java 8/11/17.
 Spark requires Scala 2.12/2.13; support for Scala 2.11 was removed in Spark 3.0.0.
 
 ### Setting up Maven's Memory Usage
diff --git a/pom.xml b/pom.xml
index 34cbefbeb3f7d..6bb764e0c28c2 100644
--- a/pom.xml
+++ b/pom.xml
@@ -115,7 +115,7 @@
     <java.version>1.8</java.version>
     <maven.compiler.source>${java.version}</maven.compiler.source>
     <maven.compiler.target>${java.version}</maven.compiler.target>
-    <maven.version>3.8.8</maven.version>
+    <maven.version>3.9.6</maven.version>
     <exec-maven-plugin.version>3.1.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
     <asm.version>9.5</asm.version>

From 7aea21eae377321633d3eeeeddd34898e9a5ea43 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 18 Apr 2024 16:33:47 +0800
Subject: [PATCH 288/521] [SPARK-47895][SQL] group by all should be idempotent

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/43797 . GROUP BY ALL has the same bug and this PR applies the same fix to GROUP BY ALL

### Why are the changes needed?

For advanced users or Spark plugins, they may manipulate the logical plans directly. We need to make the framework more reliable.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46113 from cloud-fan/group-all.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit b5bb75ca240a98ae5651e5cb429fd4bd31b7bb8a)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../ResolveReferencesInAggregate.scala         | 16 ++++++++++++++--
 .../SubstituteUnresolvedOrdinalsSuite.scala    | 18 ++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
index 09ae87b071fdd..a03d5438ff6aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, IntegerLiteral, Literal, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, UNRESOLVED_ATTRIBUTE}
@@ -134,7 +134,19 @@ object ResolveReferencesInAggregate extends SQLConfHelper
         groupExprs
       } else {
         // This is a valid GROUP BY ALL aggregate.
-        expandedGroupExprs.get
+        expandedGroupExprs.get.zipWithIndex.map { case (expr, index) =>
+          trimAliases(expr) match {
+            // HACK ALERT: If the expanded grouping expression is an integer literal, don't use it
+            //             but use an integer literal of the index. The reason is we may repeatedly
+            //             analyze the plan, and the original integer literal may cause failures
+            //             with a later GROUP BY ordinal resolution. GROUP BY constant is
+            //             meaningless so whatever value does not matter here.
+            case IntegerLiteral(_) =>
+              // GROUP BY ordinal uses 1-based index.
+              Literal(index + 1)
+            case _ => expr
+          }
+        }
       }
     } else {
       groupExprs
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index 953b2c8bb1011..39cf298aec434 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -86,4 +86,22 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
       testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
     )
   }
+
+  test("SPARK-47895: group by all repeated analysis") {
+    val plan = testRelation.groupBy($"all")(Literal(100).as("a")).analyze
+    comparePlans(
+      plan,
+      testRelation.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+
+    val testRelationWithData = testRelation.copy(data = Seq(new GenericInternalRow(Array(1: Any))))
+    // Copy the plan to reset its `analyzed` flag, so that analyzer rules will re-apply.
+    val copiedPlan = plan.transform {
+      case _: LocalRelation => testRelationWithData
+    }
+    comparePlans(
+      copiedPlan.analyze, // repeated analysis
+      testRelationWithData.groupBy(Literal(1))(Literal(100).as("a"))
+    )
+  }
 }

From e439e29a7bf10ffccf68b2ec11f6e1e06e747d06 Mon Sep 17 00:00:00 2001
From: Zhen Wang <643348094@qq.com>
Date: Thu, 18 Apr 2024 21:24:10 +0800
Subject: [PATCH 289/521] [SPARK-47463][SQL][3.5] Use V2Predicate to wrap
 expression with return type of boolean

Backports #45589 to 3.5

### What changes were proposed in this pull request?

Use V2Predicate to wrap If expr when building v2 expressions.

### Why are the changes needed?

The `PushFoldableIntoBranches` optimizer may fold predicate into (if / case) branches and `V2ExpressionBuilder` wraps `If` as `GeneralScalarExpression`, which causes the assertion in `PushablePredicate.unapply` to fail.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

added unit test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46074 from wForget/SPARK-47463_3.5.

Authored-by: Zhen Wang <643348094@qq.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/util/V2ExpressionBuilder.scala   | 159 ++++++++++--------
 .../sql/connector/DataSourceV2Suite.scala     |  10 ++
 2 files changed, 97 insertions(+), 72 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index 947a5e9f383f9..c7bca751e56e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression =>
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate}
 import org.apache.spark.sql.execution.datasources.PushableExpression
-import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType}
+import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, StringType}
 
 /**
  * The builder to generate V2 expressions from catalyst expressions.
@@ -96,45 +96,45 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       generateExpression(child).map(v => new V2Cast(v, dataType))
     case AggregateExpression(aggregateFunction, Complete, isDistinct, None, _) =>
       generateAggregateFunc(aggregateFunction, isDistinct)
-    case Abs(child, true) => generateExpressionWithName("ABS", Seq(child))
-    case Coalesce(children) => generateExpressionWithName("COALESCE", children)
-    case Greatest(children) => generateExpressionWithName("GREATEST", children)
-    case Least(children) => generateExpressionWithName("LEAST", children)
-    case Rand(child, hideSeed) =>
+    case Abs(_, true) => generateExpressionWithName("ABS", expr, isPredicate)
+    case _: Coalesce => generateExpressionWithName("COALESCE", expr, isPredicate)
+    case _: Greatest => generateExpressionWithName("GREATEST", expr, isPredicate)
+    case _: Least => generateExpressionWithName("LEAST", expr, isPredicate)
+    case Rand(_, hideSeed) =>
       if (hideSeed) {
         Some(new GeneralScalarExpression("RAND", Array.empty[V2Expression]))
       } else {
-        generateExpressionWithName("RAND", Seq(child))
+        generateExpressionWithName("RAND", expr, isPredicate)
       }
-    case log: Logarithm => generateExpressionWithName("LOG", log.children)
-    case Log10(child) => generateExpressionWithName("LOG10", Seq(child))
-    case Log2(child) => generateExpressionWithName("LOG2", Seq(child))
-    case Log(child) => generateExpressionWithName("LN", Seq(child))
-    case Exp(child) => generateExpressionWithName("EXP", Seq(child))
-    case pow: Pow => generateExpressionWithName("POWER", pow.children)
-    case Sqrt(child) => generateExpressionWithName("SQRT", Seq(child))
-    case Floor(child) => generateExpressionWithName("FLOOR", Seq(child))
-    case Ceil(child) => generateExpressionWithName("CEIL", Seq(child))
-    case round: Round => generateExpressionWithName("ROUND", round.children)
-    case Sin(child) => generateExpressionWithName("SIN", Seq(child))
-    case Sinh(child) => generateExpressionWithName("SINH", Seq(child))
-    case Cos(child) => generateExpressionWithName("COS", Seq(child))
-    case Cosh(child) => generateExpressionWithName("COSH", Seq(child))
-    case Tan(child) => generateExpressionWithName("TAN", Seq(child))
-    case Tanh(child) => generateExpressionWithName("TANH", Seq(child))
-    case Cot(child) => generateExpressionWithName("COT", Seq(child))
-    case Asin(child) => generateExpressionWithName("ASIN", Seq(child))
-    case Asinh(child) => generateExpressionWithName("ASINH", Seq(child))
-    case Acos(child) => generateExpressionWithName("ACOS", Seq(child))
-    case Acosh(child) => generateExpressionWithName("ACOSH", Seq(child))
-    case Atan(child) => generateExpressionWithName("ATAN", Seq(child))
-    case Atanh(child) => generateExpressionWithName("ATANH", Seq(child))
-    case atan2: Atan2 => generateExpressionWithName("ATAN2", atan2.children)
-    case Cbrt(child) => generateExpressionWithName("CBRT", Seq(child))
-    case ToDegrees(child) => generateExpressionWithName("DEGREES", Seq(child))
-    case ToRadians(child) => generateExpressionWithName("RADIANS", Seq(child))
-    case Signum(child) => generateExpressionWithName("SIGN", Seq(child))
-    case wb: WidthBucket => generateExpressionWithName("WIDTH_BUCKET", wb.children)
+    case _: Logarithm => generateExpressionWithName("LOG", expr, isPredicate)
+    case _: Log10 => generateExpressionWithName("LOG10", expr, isPredicate)
+    case _: Log2 => generateExpressionWithName("LOG2", expr, isPredicate)
+    case _: Log => generateExpressionWithName("LN", expr, isPredicate)
+    case _: Exp => generateExpressionWithName("EXP", expr, isPredicate)
+    case _: Pow => generateExpressionWithName("POWER", expr, isPredicate)
+    case _: Sqrt => generateExpressionWithName("SQRT", expr, isPredicate)
+    case _: Floor => generateExpressionWithName("FLOOR", expr, isPredicate)
+    case _: Ceil => generateExpressionWithName("CEIL", expr, isPredicate)
+    case _: Round => generateExpressionWithName("ROUND", expr, isPredicate)
+    case _: Sin => generateExpressionWithName("SIN", expr, isPredicate)
+    case _: Sinh => generateExpressionWithName("SINH", expr, isPredicate)
+    case _: Cos => generateExpressionWithName("COS", expr, isPredicate)
+    case _: Cosh => generateExpressionWithName("COSH", expr, isPredicate)
+    case _: Tan => generateExpressionWithName("TAN", expr, isPredicate)
+    case _: Tanh => generateExpressionWithName("TANH", expr, isPredicate)
+    case _: Cot => generateExpressionWithName("COT", expr, isPredicate)
+    case _: Asin => generateExpressionWithName("ASIN", expr, isPredicate)
+    case _: Asinh => generateExpressionWithName("ASINH", expr, isPredicate)
+    case _: Acos => generateExpressionWithName("ACOS", expr, isPredicate)
+    case _: Acosh => generateExpressionWithName("ACOSH", expr, isPredicate)
+    case _: Atan => generateExpressionWithName("ATAN", expr, isPredicate)
+    case _: Atanh => generateExpressionWithName("ATANH", expr, isPredicate)
+    case _: Atan2 => generateExpressionWithName("ATAN2", expr, isPredicate)
+    case _: Cbrt => generateExpressionWithName("CBRT", expr, isPredicate)
+    case _: ToDegrees => generateExpressionWithName("DEGREES", expr, isPredicate)
+    case _: ToRadians => generateExpressionWithName("RADIANS", expr, isPredicate)
+    case _: Signum => generateExpressionWithName("SIGN", expr, isPredicate)
+    case _: WidthBucket => generateExpressionWithName("WIDTH_BUCKET", expr, isPredicate)
     case and: And =>
       // AND expects predicate
       val l = generateExpression(and.left, true)
@@ -185,57 +185,56 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
         assert(v.isInstanceOf[V2Predicate])
         new V2Not(v.asInstanceOf[V2Predicate])
       }
-    case UnaryMinus(child, true) => generateExpressionWithName("-", Seq(child))
-    case BitwiseNot(child) => generateExpressionWithName("~", Seq(child))
-    case CaseWhen(branches, elseValue) =>
+    case UnaryMinus(_, true) => generateExpressionWithName("-", expr, isPredicate)
+    case _: BitwiseNot => generateExpressionWithName("~", expr, isPredicate)
+    case caseWhen @ CaseWhen(branches, elseValue) =>
       val conditions = branches.map(_._1).flatMap(generateExpression(_, true))
-      val values = branches.map(_._2).flatMap(generateExpression(_, true))
-      if (conditions.length == branches.length && values.length == branches.length) {
+      val values = branches.map(_._2).flatMap(generateExpression(_))
+      val elseExprOpt = elseValue.flatMap(generateExpression(_))
+      if (conditions.length == branches.length && values.length == branches.length &&
+          elseExprOpt.size == elseValue.size) {
         val branchExpressions = conditions.zip(values).flatMap { case (c, v) =>
           Seq[V2Expression](c, v)
         }
-        if (elseValue.isDefined) {
-          elseValue.flatMap(generateExpression(_)).map { v =>
-            val children = (branchExpressions :+ v).toArray[V2Expression]
-            // The children looks like [condition1, value1, ..., conditionN, valueN, elseValue]
-            new V2Predicate("CASE_WHEN", children)
-          }
+        val children = (branchExpressions ++ elseExprOpt).toArray[V2Expression]
+        // The children looks like [condition1, value1, ..., conditionN, valueN (, elseValue)]
+        if (isPredicate && caseWhen.dataType.isInstanceOf[BooleanType]) {
+          Some(new V2Predicate("CASE_WHEN", children))
         } else {
-          // The children looks like [condition1, value1, ..., conditionN, valueN]
-          Some(new V2Predicate("CASE_WHEN", branchExpressions.toArray[V2Expression]))
+          Some(new GeneralScalarExpression("CASE_WHEN", children))
         }
       } else {
         None
       }
-    case iff: If => generateExpressionWithName("CASE_WHEN", iff.children)
+    case _: If => generateExpressionWithName("CASE_WHEN", expr, isPredicate)
     case substring: Substring =>
       val children = if (substring.len == Literal(Integer.MAX_VALUE)) {
         Seq(substring.str, substring.pos)
       } else {
         substring.children
       }
-      generateExpressionWithName("SUBSTRING", children)
-    case Upper(child) => generateExpressionWithName("UPPER", Seq(child))
-    case Lower(child) => generateExpressionWithName("LOWER", Seq(child))
+      generateExpressionWithNameByChildren("SUBSTRING", children, substring.dataType, isPredicate)
+    case _: Upper => generateExpressionWithName("UPPER", expr, isPredicate)
+    case _: Lower => generateExpressionWithName("LOWER", expr, isPredicate)
     case BitLength(child) if child.dataType.isInstanceOf[StringType] =>
-      generateExpressionWithName("BIT_LENGTH", Seq(child))
+      generateExpressionWithName("BIT_LENGTH", expr, isPredicate)
     case Length(child) if child.dataType.isInstanceOf[StringType] =>
-      generateExpressionWithName("CHAR_LENGTH", Seq(child))
-    case concat: Concat => generateExpressionWithName("CONCAT", concat.children)
-    case translate: StringTranslate => generateExpressionWithName("TRANSLATE", translate.children)
-    case trim: StringTrim => generateExpressionWithName("TRIM", trim.children)
-    case trim: StringTrimLeft => generateExpressionWithName("LTRIM", trim.children)
-    case trim: StringTrimRight => generateExpressionWithName("RTRIM", trim.children)
+      generateExpressionWithName("CHAR_LENGTH", expr, isPredicate)
+    case _: Concat => generateExpressionWithName("CONCAT", expr, isPredicate)
+    case _: StringTranslate => generateExpressionWithName("TRANSLATE", expr, isPredicate)
+    case _: StringTrim => generateExpressionWithName("TRIM", expr, isPredicate)
+    case _: StringTrimLeft => generateExpressionWithName("LTRIM", expr, isPredicate)
+    case _: StringTrimRight => generateExpressionWithName("RTRIM", expr, isPredicate)
     case overlay: Overlay =>
       val children = if (overlay.len == Literal(-1)) {
         Seq(overlay.input, overlay.replace, overlay.pos)
       } else {
         overlay.children
       }
-      generateExpressionWithName("OVERLAY", children)
-    case date: DateAdd => generateExpressionWithName("DATE_ADD", date.children)
-    case date: DateDiff => generateExpressionWithName("DATE_DIFF", date.children)
-    case date: TruncDate => generateExpressionWithName("TRUNC", date.children)
+      generateExpressionWithNameByChildren("OVERLAY", children, overlay.dataType, isPredicate)
+    case _: DateAdd => generateExpressionWithName("DATE_ADD", expr, isPredicate)
+    case _: DateDiff => generateExpressionWithName("DATE_DIFF", expr, isPredicate)
+    case _: TruncDate => generateExpressionWithName("TRUNC", expr, isPredicate)
     case Second(child, _) =>
       generateExpression(child).map(v => new V2Extract("SECOND", v))
     case Minute(child, _) =>
@@ -268,12 +267,12 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       generateExpression(child).map(v => new V2Extract("WEEK", v))
     case YearOfWeek(child) =>
       generateExpression(child).map(v => new V2Extract("YEAR_OF_WEEK", v))
-    case encrypt: AesEncrypt => generateExpressionWithName("AES_ENCRYPT", encrypt.children)
-    case decrypt: AesDecrypt => generateExpressionWithName("AES_DECRYPT", decrypt.children)
-    case Crc32(child) => generateExpressionWithName("CRC32", Seq(child))
-    case Md5(child) => generateExpressionWithName("MD5", Seq(child))
-    case Sha1(child) => generateExpressionWithName("SHA1", Seq(child))
-    case sha2: Sha2 => generateExpressionWithName("SHA2", sha2.children)
+    case _: AesEncrypt => generateExpressionWithName("AES_ENCRYPT", expr, isPredicate)
+    case _: AesDecrypt => generateExpressionWithName("AES_DECRYPT", expr, isPredicate)
+    case _: Crc32 => generateExpressionWithName("CRC32", expr, isPredicate)
+    case _: Md5 => generateExpressionWithName("MD5", expr, isPredicate)
+    case _: Sha1 => generateExpressionWithName("SHA1", expr, isPredicate)
+    case _: Sha2 => generateExpressionWithName("SHA2", expr, isPredicate)
     // TODO supports other expressions
     case ApplyFunctionExpression(function, children) =>
       val childrenExpressions = children.flatMap(generateExpression(_))
@@ -345,10 +344,26 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
   }
 
   private def generateExpressionWithName(
-      v2ExpressionName: String, children: Seq[Expression]): Option[V2Expression] = {
+      v2ExpressionName: String,
+      expr: Expression,
+      isPredicate: Boolean): Option[V2Expression] = {
+    generateExpressionWithNameByChildren(
+      v2ExpressionName, expr.children, expr.dataType, isPredicate)
+  }
+
+  private def generateExpressionWithNameByChildren(
+      v2ExpressionName: String,
+      children: Seq[Expression],
+      dataType: DataType,
+      isPredicate: Boolean): Option[V2Expression] = {
     val childrenExpressions = children.flatMap(generateExpression(_))
     if (childrenExpressions.length == children.length) {
-      Some(new GeneralScalarExpression(v2ExpressionName, childrenExpressions.toArray[V2Expression]))
+      if (isPredicate && dataType.isInstanceOf[BooleanType]) {
+        Some(new V2Predicate(v2ExpressionName, childrenExpressions.toArray[V2Expression]))
+      } else {
+        Some(new GeneralScalarExpression(
+          v2ExpressionName, childrenExpressions.toArray[V2Expression]))
+      }
     } else {
       None
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 52d0151ee4623..d269290e6162d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -626,6 +626,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       }
     }
   }
+
+  test("SPARK-47463: Pushed down v2 filter with if expression") {
+    withTempView("t1") {
+      spark.read.format(classOf[AdvancedDataSourceV2WithV2Filter].getName).load()
+        .createTempView("t1")
+      val df = sql("SELECT * FROM  t1 WHERE if(i = 1, i, 0) > 0")
+      val result = df.collect()
+      assert(result.length == 1)
+    }
+  }
 }
 
 

From afd99d19a2b85dda2245d3557506d1090187c5f4 Mon Sep 17 00:00:00 2001
From: Zhen Wang <643348094@qq.com>
Date: Fri, 19 Apr 2024 10:53:16 +0800
Subject: [PATCH 290/521] [SPARK-47897][SQL][3.5] Fix ExpressionSet performance
 regression in scala 2.12

### What changes were proposed in this pull request?

Fix `ExpressionSet` performance regression in scala 2.12.

### Why are the changes needed?

The implementation of the `SetLike.++` method in scala 2.12 is to iteratively execute the `+` method. The `ExpressionSet.+` method first clones a new object and then adds element, which is very expensive.

https://github.com/scala/scala/blob/ceaf7e68ac93e9bbe8642d06164714b2de709c27/src/library/scala/collection/SetLike.scala#L186

After https://github.com/apache/spark/pull/36121, the `++` and `--` methods in ExpressionSet of scala 2.12 were removed, causing performance regression.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Benchmark code:

```
object TestBenchmark {
  def main(args: Array[String]): Unit = {
    val count = 300
    val benchmark = new Benchmark("Test ExpressionSetV2 ++ ", count)
    val aUpper = AttributeReference("A", IntegerType)(exprId = ExprId(1))

    var initialSet = ExpressionSet((0 until 300).map(i => aUpper + i))
    val setToAddWithSameDeterministicExpression = ExpressionSet((0 until 300).map(i => aUpper + i))

    benchmark.addCase("Test ++", 10) { _: Int =>
      for (_ <- 0L until count) {
        initialSet ++= setToAddWithSameDeterministicExpression
      }
    }
    benchmark.run()
  }
}
```

before this change:

```
OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-957.el7.x86_64
Intel Core Processor (Skylake, IBRS)
Test ExpressionSetV2 ++ :                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Test ++                                            1577           1691          61          0.0     5255516.0       1.0X
```

after this change:

```
OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-957.el7.x86_64
Intel Core Processor (Skylake, IBRS)
Test ExpressionSetV2 ++ :                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
------------------------------------------------------------------------------------------------------------------------
Test ++                                              14             14           0          0.0       45395.2       1.0X
```

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46114 from wForget/SPARK-47897.

Authored-by: Zhen Wang <643348094@qq.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../catalyst/expressions/ExpressionSet.scala  | 21 ++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index 3e545f745baee..c18679330f3a4 100644
--- a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import scala.collection.mutable
+import scala.collection.{mutable, GenTraversableOnce}
 import scala.collection.mutable.ArrayBuffer
 
 object ExpressionSet {
@@ -108,12 +108,31 @@ class ExpressionSet protected(
     newSet
   }
 
+  /**
+   * SPARK-47897: In Scala 2.12, the `SetLike.++` method iteratively calls `+` method.
+   * `ExpressionSet.+` is expensive, so we override `++`.
+   */
+  override def ++(elems: GenTraversableOnce[Expression]): ExpressionSet = {
+    val newSet = clone()
+    elems.foreach(newSet.add)
+    newSet
+  }
+
   override def -(elem: Expression): ExpressionSet = {
     val newSet = clone()
     newSet.remove(elem)
     newSet
   }
 
+  /**
+   * SPARK-47897: We need to override `--` like `++`.
+   */
+  override def --(elems: GenTraversableOnce[Expression]): ExpressionSet = {
+    val newSet = clone()
+    elems.foreach(newSet.remove)
+    newSet
+  }
+
   def map(f: Expression => Expression): ExpressionSet = {
     val newSet = new ExpressionSet()
     this.iterator.foreach(elem => newSet.add(f(elem)))

From 6a358ff7d6334eef49cb5abb0b247f42dfe2de67 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Fri, 19 Apr 2024 08:39:37 -0700
Subject: [PATCH 291/521] [SPARK-47825][DSTREAMS][3.5] Make `KinesisTestUtils`
 & `WriteInputFormatTestDataGenerator` deprecated

### What changes were proposed in this pull request?
The pr aims to make `KinesisTestUtils` & `WriteInputFormatTestDataGenerator` deprecated
Master pr: https://github.com/apache/spark/pull/46000

### Why are the changes needed?
Because these two classes will be moved from `main` to` test` in version `4.0`, we need to first `deprecate` them in version `3.5` to make them exit more naturally.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46019 from panbingkun/branch-3.5_deprecated.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/api/python/WriteInputFormatTestDataGenerator.scala    | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
index d30e9c5e2ce61..692ae45a12f4d 100644
--- a/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/WriteInputFormatTestDataGenerator.scala
@@ -33,6 +33,7 @@ import org.apache.spark.errors.SparkCoreErrors
  * A class to test Pickle serialization on the Scala side, that will be deserialized
  * in Python
  */
+@deprecated("This class will be move to `test`.", "3.5.2")
 case class TestWritable(var str: String, var int: Int, var double: Double) extends Writable {
   def this() = this("", 0, 0.0)
 
@@ -104,6 +105,7 @@ private[python] class WritableToDoubleArrayConverter extends Converter[Any, Arra
  * This object contains method to generate SequenceFile test data and write it to a
  * given directory (probably a temp directory)
  */
+@deprecated("This class will be move to `test`.", "3.5.2")
 object WriteInputFormatTestDataGenerator {
 
   def main(args: Array[String]): Unit = {

From d7c3794a0c567b12e8c8e18132aa362f11acdf5f Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Mon, 22 Apr 2024 15:36:13 -0700
Subject: [PATCH 292/521] [SPARK-47904][SQL][3.5] Preserve case in Avro schema
 when using enableStableIdentifiersForUnionType

### What changes were proposed in this pull request?

Backport of https://github.com/apache/spark/pull/46126 to branch-3.5.

When `enableStableIdentifiersForUnionType` is enabled, all of the types are lowercased which creates a problem when field types are case-sensitive:

Union type with fields:
```
Schema.createEnum("myENUM", "", null, List[String]("E1", "e2").asJava),
Schema.createRecord("myRecord2", "", null, false, List[Schema.Field](new Schema.Field("F", Schema.create(Type.FLOAT))).asJava)
```

would become

```
struct<member_myenum: string, member_myrecord2: struct<f: float>>
```

but instead should be
```
struct<member_myENUM: string, member_myRecord2: struct<F: float>>
```

### Why are the changes needed?

Fixes a bug of lowercasing the field name (the type portion).

### Does this PR introduce _any_ user-facing change?

Yes, if a user enables `enableStableIdentifiersForUnionType` and has Union types, all fields will preserve the case. Previously, the field names would be all in lowercase.

### How was this patch tested?

I added a test case to verify the new field names.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46169 from sadikovi/SPARK-47904-3.5.

Authored-by: Ivan Sadikov <ivan.sadikov@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/avro/SchemaConverters.scala     | 10 +++---
 .../org/apache/spark/sql/avro/AvroSuite.scala | 31 +++++++++++++++++--
 2 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index 06abe977e3b08..af358a8d1c961 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -183,14 +183,14 @@ object SchemaConverters {
                   // Avro's field name may be case sensitive, so field names for two named type
                   // could be "a" and "A" and we need to distinguish them. In this case, we throw
                   // an exception.
-                  val temp_name = s"member_${s.getName.toLowerCase(Locale.ROOT)}"
-                  if (fieldNameSet.contains(temp_name)) {
+                  // Stable id prefix can be empty so the name of the field can be just the type.
+                  val tempFieldName = s"member_${s.getName}"
+                  if (!fieldNameSet.add(tempFieldName.toLowerCase(Locale.ROOT))) {
                     throw new IncompatibleSchemaException(
-                      "Cannot generate stable indentifier for Avro union type due to name " +
+                      "Cannot generate stable identifier for Avro union type due to name " +
                       s"conflict of type name ${s.getName}")
                   }
-                  fieldNameSet.add(temp_name)
-                  temp_name
+                  tempFieldName
                 } else {
                   s"member$i"
                 }
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index 1df99210a55ac..01c9dfb57a191 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -370,7 +370,7 @@ abstract class AvroSuite
           "",
           Seq())
       }
-      assert(e.getMessage.contains("Cannot generate stable indentifier"))
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
     }
     {
       val e = intercept[Exception] {
@@ -381,7 +381,7 @@ abstract class AvroSuite
           "",
           Seq())
       }
-      assert(e.getMessage.contains("Cannot generate stable indentifier"))
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
     }
     // Two array types or two map types are not allowed in union.
     {
@@ -434,6 +434,33 @@ abstract class AvroSuite
     }
   }
 
+  test("SPARK-47904: Test that field name case is preserved") {
+    checkUnionStableId(
+      List(
+        Schema.createEnum("myENUM", "", null, List[String]("E1", "e2").asJava),
+        Schema.createRecord("myRecord", "", null, false,
+          List[Schema.Field](new Schema.Field("f", Schema.createFixed("myField", "", null, 6)))
+            .asJava),
+        Schema.createRecord("myRecord2", "", null, false,
+          List[Schema.Field](new Schema.Field("F", Schema.create(Type.FLOAT)))
+            .asJava)),
+      "struct<member_myENUM: string, member_myRecord: struct<f: binary>, " +
+                    "member_myRecord2: struct<F: float>>",
+      Seq())
+
+    {
+      val e = intercept[Exception] {
+        checkUnionStableId(
+          List(
+            Schema.createRecord("myRecord", "", null, false, List[Schema.Field]().asJava),
+            Schema.createRecord("myrecord", "", null, false, List[Schema.Field]().asJava)),
+          "",
+          Seq())
+      }
+      assert(e.getMessage.contains("Cannot generate stable identifier"))
+    }
+  }
+
   test("SPARK-27858 Union type: More than one non-null type") {
     Seq(true, false).foreach { isStableUnionMember =>
       withTempDir { dir =>

From cdd077a39fd99ba7c2fba4e89f6ef9668cf3cbce Mon Sep 17 00:00:00 2001
From: Xi Lyu <xi.lyu@databricks.com>
Date: Wed, 24 Apr 2024 09:08:59 -0400
Subject: [PATCH 293/521] [SPARK-47819][CONNECT][3.5] Use asynchronous callback
 for execution cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

([Original PR](https://github.com/apache/spark/pull/46027))

### What changes were proposed in this pull request?

Expired sessions are regularly checked and cleaned up by a maintenance thread. However, currently, this process is synchronous. Therefore, in rare cases, interrupting the execution thread of a query in a session can take hours, causing the entire maintenance process to stall, resulting in a large amount of memory not being cleared.

We address this by introducing asynchronous callbacks for execution cleanup, avoiding synchronous joins of execution threads, and preventing the maintenance thread from stalling in the above scenarios. To be more specific, instead of calling `runner.join()` in `ExecutorHolder.close()`, we set a post-cleanup function as the callback through `runner.processOnCompletion`, which will be called asynchronously once the execution runner is completed or interrupted. In this way, the maintenance thread won't get blocked on joining an execution thread.

### Why are the changes needed?

In the rare cases mentioned above, performance can be severely affected.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests and a new test `Async cleanup callback gets called after the execution is closed` in `ReattachableExecuteSuite.scala`.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46064 from xi-db/SPARK-47819-async-cleanup-3.5.

Authored-by: Xi Lyu <xi.lyu@databricks.com>
Signed-off-by: Herman van Hovell <herman@databricks.com>
---
 .../execution/ExecuteThreadRunner.scala       | 31 ++++++++++++++-----
 .../sql/connect/service/ExecuteHolder.scala   | 23 +++++++-------
 .../execution/ReattachableExecuteSuite.scala  | 22 +++++++++++++
 .../planner/SparkConnectServiceSuite.scala    |  7 ++++-
 4 files changed, 64 insertions(+), 19 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
index 62083d4892f78..d503dde3d18c1 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connect.execution
 
+import scala.concurrent.{ExecutionContext, Promise}
+import scala.util.Try
 import scala.util.control.NonFatal
 
 import com.google.protobuf.Message
@@ -29,7 +31,7 @@ import org.apache.spark.sql.connect.common.ProtoUtils
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
 import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteSessionTag}
 import org.apache.spark.sql.connect.utils.ErrorUtils
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * This class launches the actual execution in an execution thread. The execution pushes the
@@ -37,10 +39,12 @@ import org.apache.spark.util.Utils
  */
 private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends Logging {
 
+  private val promise: Promise[Unit] = Promise[Unit]()
+
   // The newly created thread will inherit all InheritableThreadLocals used by Spark,
   // e.g. SparkContext.localProperties. If considering implementing a thread-pool,
   // forwarding of thread locals needs to be taken into account.
-  private var executionThread: Thread = new ExecutionThread()
+  private val executionThread: Thread = new ExecutionThread(promise)
 
   private var interrupted: Boolean = false
 
@@ -53,9 +57,11 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends
     executionThread.start()
   }
 
-  /** Joins the background execution thread after it is finished. */
-  def join(): Unit = {
-    executionThread.join()
+  /**
+   * Register a callback that gets executed after completion/interruption of the execution
+   */
+  private[connect] def processOnCompletion(callback: Try[Unit] => Unit): Unit = {
+    promise.future.onComplete(callback)(ExecuteThreadRunner.namedExecutionContext)
   }
 
   /**
@@ -222,10 +228,21 @@ private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends
       .build()
   }
 
-  private class ExecutionThread
+  private class ExecutionThread(onCompletionPromise: Promise[Unit])
       extends Thread(s"SparkConnectExecuteThread_opId=${executeHolder.operationId}") {
     override def run(): Unit = {
-      execute()
+      try {
+        execute()
+        onCompletionPromise.success(())
+      } catch {
+        case NonFatal(e) =>
+          onCompletionPromise.failure(e)
+      }
     }
   }
 }
+
+private[connect] object ExecuteThreadRunner {
+  private implicit val namedExecutionContext: ExecutionContext = ExecutionContext
+    .fromExecutor(ThreadUtils.newDaemonSingleThreadExecutor("SparkConnectExecuteThreadCallback"))
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index 974c13b08e318..5cf63c2195ab4 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -114,6 +114,9 @@ private[connect] class ExecuteHolder(
       : mutable.ArrayBuffer[ExecuteGrpcResponseSender[proto.ExecutePlanResponse]] =
     new mutable.ArrayBuffer[ExecuteGrpcResponseSender[proto.ExecutePlanResponse]]()
 
+  /** For testing. Whether the async completion callback is called. */
+  @volatile private[connect] var completionCallbackCalled: Boolean = false
+
   /**
    * Start the execution. The execution is started in a background thread in ExecuteThreadRunner.
    * Responses are produced and cached in ExecuteResponseObserver. A GRPC thread consumes the
@@ -125,13 +128,6 @@ private[connect] class ExecuteHolder(
     runner.start()
   }
 
-  /**
-   * Wait for the execution thread to finish and join it.
-   */
-  def join(): Unit = {
-    runner.join()
-  }
-
   /**
    * Attach an ExecuteGrpcResponseSender that will consume responses from the query and send them
    * out on the Grpc response stream. The sender will start from the start of the response stream.
@@ -234,8 +230,15 @@ private[connect] class ExecuteHolder(
     if (closedTime.isEmpty) {
       // interrupt execution, if still running.
       runner.interrupt()
-      // wait for execution to finish, to make sure no more results get pushed to responseObserver
-      runner.join()
+      // Do not wait for the execution to finish, clean up resources immediately.
+      runner.processOnCompletion { _ =>
+        completionCallbackCalled = true
+        // The execution may not immediately get interrupted, clean up any remaining resources when
+        // it does.
+        responseObserver.removeAll()
+        // post closed to UI
+        eventsManager.postClosed()
+      }
       // interrupt any attached grpcResponseSenders
       grpcResponseSenders.foreach(_.interrupt())
       // if there were still any grpcResponseSenders, register detach time
@@ -245,8 +248,6 @@ private[connect] class ExecuteHolder(
       }
       // remove all cached responses from observer
       responseObserver.removeAll()
-      // post closed to UI
-      eventsManager.postClosed()
       closedTime = Some(System.currentTimeMillis())
     }
   }
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index 0e29a07b719af..06cd1a5666b66 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -355,4 +355,26 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
       assertEventuallyNoActiveExecutions()
     }
   }
+
+  test("Async cleanup callback gets called after the execution is closed") {
+    withClient { client =>
+      val query1 = client.execute(buildPlan(MEDIUM_RESULTS_QUERY))
+      // just creating the iterator is lazy, trigger query1 to be sent.
+      query1.hasNext
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(SparkConnectService.executionManager.listExecuteHolders.length == 1)
+      }
+      val executeHolder1 = SparkConnectService.executionManager.listExecuteHolders.head
+      // Close execution
+      SparkConnectService.executionManager.removeExecuteHolder(executeHolder1.key)
+      // Check that queries get cancelled
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(SparkConnectService.executionManager.listExecuteHolders.length == 0)
+      }
+      // Check the async execute cleanup get called
+      Eventually.eventually(timeout(eventuallyTimeout)) {
+        assert(executeHolder1.completionCallbackCalled)
+      }
+    }
+  }
 }
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
index 90c9d13def616..06508bfc6a7c2 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -31,6 +31,8 @@ import org.apache.arrow.vector.{BigIntVector, Float8Vector}
 import org.apache.arrow.vector.ipc.ArrowStreamReader
 import org.mockito.Mockito.when
 import org.scalatest.Tag
+import org.scalatest.concurrent.Eventually
+import org.scalatest.time.SpanSugar.convertIntToGrainOfTime
 import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkContext, SparkEnv}
@@ -879,8 +881,11 @@ class SparkConnectServiceSuite
       assert(executeHolder.eventsManager.hasError.isDefined)
     }
     def onCompleted(producedRowCount: Option[Long] = None): Unit = {
-      assert(executeHolder.eventsManager.status == ExecuteStatus.Closed)
       assert(executeHolder.eventsManager.getProducedRowCount == producedRowCount)
+      // The eventsManager is closed asynchronously
+      Eventually.eventually(timeout(1.seconds)) {
+        assert(executeHolder.eventsManager.status == ExecuteStatus.Closed)
+      }
     }
     def onCanceled(): Unit = {
       assert(executeHolder.eventsManager.hasCanceled.contains(true))

From ce19bfc1068229897454c5f5cb78aeb435821bd2 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 24 Apr 2024 09:48:21 -0700
Subject: [PATCH 294/521] [SPARK-47633][SQL][3.5] Include right-side plan
 output in `LateralJoin#allAttributes` for more consistent canonicalization

This is a backport of #45763 to branch-3.5.

### What changes were proposed in this pull request?

Modify `LateralJoin` to include right-side plan output in `allAttributes`.

### Why are the changes needed?

In the following example, the view v1 is cached, but a query of v1 does not use the cache:
```
CREATE or REPLACE TEMP VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2);
CREATE or REPLACE TEMP VIEW t2(c1, c2) AS VALUES (0, 1), (1, 2);

create or replace temp view v1 as
select *
from t1
join lateral (
  select c1 as a, c2 as b
  from t2)
on c1 = a;

cache table v1;

explain select * from v1;
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- BroadcastHashJoin [c1#180], [a#173], Inner, BuildRight, false
   :- LocalTableScan [c1#180, c2#181]
   +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=113]
      +- LocalTableScan [a#173, b#174]
```

The canonicalized version of the `LateralJoin` node is not consistent when there is a join condition. For example, for the above query, the join condition is canonicalized as follows:
```
Before canonicalization: Some((c1#174 = a#167))
After canonicalization:  Some((none#0 = none#167))
```
You can see that the `exprId` for the second operand of `EqualTo` is not normalized (it remains 167). That's because the attribute `a` from the right-side plan is not included `allAttributes`.

This PR adds right-side attributes to `allAttributes` so that references to right-side attributes in the join condition are normalized during canonicalization.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46190 from bersprockets/lj_canonical_issue_35.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../plans/logical/basicLogicalOperators.scala |  2 ++
 .../apache/spark/sql/CachedTableSuite.scala   | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 58c03ee72d6dc..ca2c6a8505617 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -2017,6 +2017,8 @@ case class LateralJoin(
     joinType: JoinType,
     condition: Option[Expression]) extends UnaryNode {
 
+  override lazy val allAttributes: AttributeSeq = left.output ++ right.plan.output
+
   require(Seq(Inner, LeftOuter, Cross).contains(joinType),
     s"Unsupported lateral join type $joinType")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 8331a3c10fc97..9815cb816c994 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -1710,4 +1710,23 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       }
     }
   }
+
+  test("SPARK-47633: Cache hit for lateral join with join condition") {
+    withTempView("t", "q1") {
+      sql("create or replace temp view t(c1, c2) as values (0, 1), (1, 2)")
+      val query = """select *
+                    |from t
+                    |join lateral (
+                    |  select c1 as a, c2 as b
+                    |  from t)
+                    |on c1 = a;
+                    |""".stripMargin
+      sql(s"cache table q1 as $query")
+      val df = sql(query)
+      checkAnswer(df,
+        Row(0, 1, 0, 1) :: Row(1, 2, 1, 2) :: Nil)
+      assert(getNumInMemoryRelations(df) == 1)
+    }
+
+  }
 }

From fdc0cee179087c31eff9aa1471870ee8a6228b1d Mon Sep 17 00:00:00 2001
From: allisonwang-db <allison.wang@databricks.com>
Date: Wed, 24 Apr 2024 18:22:29 -0700
Subject: [PATCH 295/521] [SPARK-47921][CONNECT] Fix ExecuteJobTag creation in
 ExecuteHolder

### What changes were proposed in this pull request?

This PR fixes a bug in the ExecuteJobTag creation in ExecuteHolder. The sessionId and userId are reversed.

https://github.com/apache/spark/blob/8aa8ad6be7b3eeceafa2ad1e9211fb8133bb675c/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala#L296-L299

### Why are the changes needed?

To fix a bug

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46140 from allisonwang-db/spark-47921-execute-job-tag.

Authored-by: allisonwang-db <allison.wang@databricks.com>
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
(cherry picked from commit 5a1559a7ef03641db223a8b0862e3dd53032ead0)
Signed-off-by: Takuya UESHIN <ueshin@databricks.com>
---
 .../apache/spark/sql/connect/service/ExecuteHolder.scala    | 2 +-
 .../sql/connect/ui/SparkConnectServerListenerSuite.scala    | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index 5cf63c2195ab4..0e4f344da901c 100644
--- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -285,7 +285,7 @@ private[connect] class ExecuteHolder(
 object ExecuteJobTag {
   private val prefix = "SparkConnect_OperationTag"
 
-  def apply(sessionId: String, userId: String, operationId: String): String = {
+  def apply(userId: String, sessionId: String, operationId: String): String = {
     s"${prefix}_" +
       s"User_${userId}_" +
       s"Session_${sessionId}_" +
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
index 3b75c37b2aa00..c9c110dd1e626 100644
--- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
@@ -37,7 +37,7 @@ class SparkConnectServerListenerSuite
 
   private var kvstore: ElementTrackingStore = _
 
-  private val jobTag = ExecuteJobTag("sessionId", "userId", "operationId")
+  private val jobTag = ExecuteJobTag("userId", "sessionId", "operationId")
 
   after {
     if (kvstore != null) {
@@ -174,7 +174,7 @@ class SparkConnectServerListenerSuite
       SparkListenerJobStart(0, System.currentTimeMillis(), Nil, createProperties))
     listener.onOtherEvent(
       SparkListenerConnectSessionClosed("sessionId", "userId", System.currentTimeMillis()))
-    val exec = statusStore.getExecution(ExecuteJobTag("sessionId", "userId", "operationId"))
+    val exec = statusStore.getExecution(ExecuteJobTag("userId", "sessionId", "operationId"))
     assert(exec.isDefined)
     assert(exec.get.jobId === Seq("0"))
     assert(exec.get.sqlExecId === Set("0"))
@@ -190,7 +190,7 @@ class SparkConnectServerListenerSuite
     listener.onOtherEvent(SparkListenerConnectSessionClosed(unknownSession, "userId", 0))
     listener.onOtherEvent(
       SparkListenerConnectOperationStarted(
-        ExecuteJobTag("sessionId", "userId", "operationId"),
+        ExecuteJobTag("userId", "sessionId", "operationId"),
         "operationId",
         System.currentTimeMillis(),
         unknownSession,

From 7a573b967138d64506b311207c8e3630b91a5afe Mon Sep 17 00:00:00 2001
From: Gene Pang <gene.pang@databricks.com>
Date: Sun, 28 Apr 2024 11:07:12 +0800
Subject: [PATCH 296/521] [SPARK-48019] Fix incorrect behavior in
 ColumnVector/ColumnarArray with dictionary and nulls

This fixes how `ColumnVector` handles copying arrays when the vector has a dictionary and null values. The possible issues with the previous implementation:
- An `ArrayIndexOutOfBoundsException` may be thrown when the `ColumnVector` has nulls and dictionaries. This is because the dictionary id for `null` entries might be invalid and should not be used for `null` entries.
- Copying a `ColumnarArray` (which contains a `ColumnVector`) is incorrect, if it contains `null` entries. This is because copying a primitive array does not take into account the `null` entries, so all the null entries get lost.

These changes are needed to avoid `ArrayIndexOutOfBoundsException` and to produce correct results when copying `ColumnarArray`.

The only user facing changes are to fix existing errors and incorrect results.

Added new unit tests.

No.

Closes #46254 from gene-db/dictionary-nulls.

Authored-by: Gene Pang <gene.pang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 76ce6b00e036a699ad172ba4b7d3f2632ab75332)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/vectorized/ColumnarArray.java   |   5 +
 .../vectorized/OffHeapColumnVector.java       |  24 ++-
 .../vectorized/OnHeapColumnVector.java        |  24 ++-
 .../vectorized/ColumnVectorSuite.scala        | 174 ++++++++++++++++++
 4 files changed, 215 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index bd7c3d7c0fd49..c4de83cf8b82d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -51,6 +51,11 @@ public int numElements() {
   public ArrayData copy() {
     DataType dt = data.dataType();
 
+    if (data.hasNull()) {
+      // UnsafeArrayData cannot be used if there are any nulls.
+      return new GenericArrayData(toObjectArray(dt)).copy();
+    }
+
     if (dt instanceof BooleanType) {
       return UnsafeArrayData.fromPrimitiveArray(toBooleanArray());
     } else if (dt instanceof ByteType) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 46f241d92e6bd..122f775c2b0e0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -215,7 +215,9 @@ public byte[] getBytes(int rowId, int count) {
       Platform.copyMemory(null, data + rowId, array, Platform.BYTE_ARRAY_OFFSET, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getByte(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -276,7 +278,9 @@ public short[] getShorts(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 2L, array, Platform.SHORT_ARRAY_OFFSET, count * 2L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getShort(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -342,7 +346,9 @@ public int[] getInts(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 4L, array, Platform.INT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getInt(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -420,7 +426,9 @@ public long[] getLongs(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 8L, array, Platform.LONG_ARRAY_OFFSET, count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getLong(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -484,7 +492,9 @@ public float[] getFloats(int rowId, int count) {
       Platform.copyMemory(null, data + rowId * 4L, array, Platform.FLOAT_ARRAY_OFFSET, count * 4L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getFloat(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -550,7 +560,9 @@ public double[] getDoubles(int rowId, int count) {
         count * 8L);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getDouble(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index b717323753e87..160441e7583ed 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -213,7 +213,9 @@ public byte[] getBytes(int rowId, int count) {
       System.arraycopy(byteData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getByte(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = (byte) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -273,7 +275,9 @@ public short[] getShorts(int rowId, int count) {
       System.arraycopy(shortData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getShort(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = (short) dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -334,7 +338,9 @@ public int[] getInts(int rowId, int count) {
       System.arraycopy(intData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getInt(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToInt(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -406,7 +412,9 @@ public long[] getLongs(int rowId, int count) {
       System.arraycopy(longData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getLong(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToLong(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -463,7 +471,9 @@ public float[] getFloats(int rowId, int count) {
       System.arraycopy(floatData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getFloat(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToFloat(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
@@ -522,7 +532,9 @@ public double[] getDoubles(int rowId, int count) {
       System.arraycopy(doubleData, rowId, array, 0, count);
     } else {
       for (int i = 0; i < count; i++) {
-        array[i] = getDouble(rowId + i);
+        if (!isNullAt(rowId + i)) {
+          array[i] = dictionary.decodeToDouble(dictionaryIds.getDictId(rowId + i));
+        }
       }
     }
     return array;
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 516be9a4e5958..1f79626533ab6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -473,6 +473,180 @@ class ColumnVectorSuite extends SparkFunSuite {
     assert(testVector.getDoubles(0, 3)(2) == 1342.17729d)
   }
 
+  def check(expected: Seq[Any], testVector: WritableColumnVector): Unit = {
+    expected.zipWithIndex.foreach {
+      case (v: Integer, idx) =>
+        assert(testVector.getInt(idx) == v)
+        assert(testVector.getInts(0, testVector.capacity)(idx) == v)
+      case (v: Short, idx) =>
+        assert(testVector.getShort(idx) == v)
+        assert(testVector.getShorts(0, testVector.capacity)(idx) == v)
+      case (v: Byte, idx) =>
+        assert(testVector.getByte(idx) == v)
+        assert(testVector.getBytes(0, testVector.capacity)(idx) == v)
+      case (v: Long, idx) =>
+        assert(testVector.getLong(idx) == v)
+        assert(testVector.getLongs(0, testVector.capacity)(idx) == v)
+      case (v: Float, idx) =>
+        assert(testVector.getFloat(idx) == v)
+        assert(testVector.getFloats(0, testVector.capacity)(idx) == v)
+      case (v: Double, idx) =>
+        assert(testVector.getDouble(idx) == v)
+        assert(testVector.getDoubles(0, testVector.capacity)(idx) == v)
+      case (null, idx) => testVector.isNullAt(idx)
+      case (_, idx) => assert(false, s"Unexpected value at $idx")
+    }
+
+    // Verify ColumnarArray.copy() works as expected
+    val arr = new ColumnarArray(testVector, 0, testVector.capacity)
+    assert(arr.toSeq(testVector.dataType) == expected)
+    assert(arr.copy().toSeq(testVector.dataType) == expected)
+  }
+
+  testVectors("getInts with dictionary and nulls", 3, IntegerType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1, null, 3)
+    expected.foreach {
+      case i: Integer => testVector.appendInt(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7, null, 9)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Integer => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getShorts with dictionary and nulls", 3, ShortType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.toShort, null, 3.toShort)
+    expected.foreach {
+      case i: Short => testVector.appendShort(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7.toShort, null, 9.toShort)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Short => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getBytes with dictionary and nulls", 3, ByteType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.toByte, null, 3.toByte)
+    expected.foreach {
+      case i: Byte => testVector.appendByte(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(7.toByte, null, 9.toByte)
+    val dictArray = (Seq(-1, -1) ++ expectedDictionary.map {
+      case i: Byte => i.toInt
+      case _ => -1
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getLongs with dictionary and nulls", 3, LongType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(2147483L, null, 2147485L)
+    expected.foreach {
+      case i: Long => testVector.appendLong(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(2147483648L, null, 2147483650L)
+    val dictArray = (Seq(-1L, -1L) ++ expectedDictionary.map {
+      case i: Long => i
+      case _ => -1L
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getFloats with dictionary and nulls", 3, FloatType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.1f, null, 3.3f)
+    expected.foreach {
+      case i: Float => testVector.appendFloat(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(0.1f, null, 0.3f)
+    val dictArray = (Seq(-1f, -1f) ++ expectedDictionary.map {
+      case i: Float => i
+      case _ => -1f
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
+  testVectors("getDoubles with dictionary and nulls", 3, DoubleType) { testVector =>
+    // Validate without dictionary
+    val expected = Seq(1.1d, null, 3.3d)
+    expected.foreach {
+      case i: Double => testVector.appendDouble(i)
+      case _ => testVector.appendNull()
+    }
+    check(expected, testVector)
+
+    // Validate with dictionary
+    val expectedDictionary = Seq(1342.17727d, null, 1342.17729d)
+    val dictArray = (Seq(-1d, -1d) ++ expectedDictionary.map {
+      case i: Double => i
+      case _ => -1d
+    }).toArray
+    val dict = new ColumnDictionary(dictArray)
+    testVector.setDictionary(dict)
+    testVector.reserveDictionaryIds(3)
+    testVector.getDictionaryIds.putInt(0, 2)
+    testVector.getDictionaryIds.putInt(1, -1) // This is a null, so the entry should be ignored
+    testVector.getDictionaryIds.putInt(2, 4)
+    check(expectedDictionary, testVector)
+  }
+
   test("[SPARK-22092] off-heap column vector reallocation corrupts array data") {
     withVector(new OffHeapColumnVector(8, arrayType)) { testVector =>
       val data = testVector.arrayData()

From 33768f66d953159ffd2b2bd0ec89957a2ce2eca0 Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Date: Sun, 28 Apr 2024 13:46:03 +0800
Subject: [PATCH 297/521] [SPARK-47927][SQL] Fix nullability attribute in UDF
 decoder

This PR fixes a correctness issue by moving the batch that resolves udf decoders to after the `UpdateNullability` batch. This means we now derive a  decoder with the updated attributes which fixes a correctness issue.

I think the issue has existed since https://github.com/apache/spark/pull/28645 when udf support case class arguments was added. So therefore this issue should be present in all currently supported versions.

Currently the following code
```
scala> val ds1 = Seq(1).toDS()
     | val ds2 = Seq[Int]().toDS()
     | val f = udf[Tuple1[Option[Int]],Tuple1[Option[Int]]](identity)
     | ds1.join(ds2, ds1("value") === ds2("value"), "left_outer").select(f(struct(ds2("value")))).collect()
val ds1: org.apache.spark.sql.Dataset[Int] = [value: int]
val ds2: org.apache.spark.sql.Dataset[Int] = [value: int]
val f: org.apache.spark.sql.expressions.UserDefinedFunction = SparkUserDefinedFunction($Lambda$2481/0x00007f7f50961f086b1a2c9f,StructType(StructField(_1,IntegerType,true)),List(Some(class[_1[0]: int])),Some(class[_1[0]: int]),None,true,true)
val res0: Array[org.apache.spark.sql.Row] = Array([[0]])
```
results in an row containing `0` this is incorrect as the value should be `null`. Removing the udf call
```
scala> ds1.join(ds2, ds1("value") === ds2("value"), "left_outer").select(struct(ds2("value"))).collect()
val res1: Array[org.apache.spark.sql.Row] = Array([[null]])
```
gives the correct value.

Yes, fixes a correctness issue when using ScalaUDFs.

Existing and new unit tests.

No.

Closes #46156 from eejbyfeldt/SPARK-47927.

Authored-by: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 8b8ea60bd4f22ea5763a77bac2d51f25d2479be9)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala |  4 ++--
 .../test/scala/org/apache/spark/sql/UDFSuite.scala    | 11 +++++++++++
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index eae150001249e..93efa5e4a49c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -338,11 +338,11 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       new ResolveHints.RemoveAllHints),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
+    Batch("UpdateNullability", Once,
+      UpdateAttributeNullability),
     Batch("UDF", Once,
       HandleNullInputsForUDF,
       ResolveEncodersInUDF),
-    Batch("UpdateNullability", Once,
-      UpdateAttributeNullability),
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index e54bda1acef59..56bc707450e31 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -1067,4 +1067,15 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       .lookupFunctionInfo(FunctionIdentifier("dummyUDF"))
     assert(expressionInfo.getClassName.contains("org.apache.spark.sql.UDFRegistration$$Lambda"))
   }
+
+  test("SPARK-47927: Correctly pass null values derived from join to UDF") {
+    val f = udf[Tuple1[Option[Int]], Tuple1[Option[Int]]](identity)
+    val ds1 = Seq(1).toDS()
+    val ds2 = Seq[Int]().toDS()
+
+    checkAnswer(
+      ds1.join(ds2, ds1("value") === ds2("value"), "left_outer")
+        .select(f(struct(ds2("value").as("_1")))),
+      Row(Row(null)))
+  }
 }

From 616c2162242f99a3217caa0b7e4344e2979a5e54 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 29 Apr 2024 11:40:39 +0800
Subject: [PATCH 298/521] [SPARK-48034][TESTS] NullPointerException in
 MapStatusesSerDeserBenchmark

### What changes were proposed in this pull request?

This PR fixes an NPE in MapStatusesSerDeserBenchmark. The cause is that we try to stop the tracker twice.

```
3197java.lang.NullPointerException: Cannot invoke "org.apache.spark.rpc.RpcEndpointRef.askSync(Object, scala.reflect.ClassTag)" because the return value of "org.apache.spark.MapOutputTracker.trackerEndpoint()" is null
3198	at org.apache.spark.MapOutputTracker.askTracker(MapOutputTracker.scala:541)
3199	at org.apache.spark.MapOutputTracker.sendTracker(MapOutputTracker.scala:551)
3200	at org.apache.spark.MapOutputTrackerMaster.stop(MapOutputTracker.scala:1242)
3201	at org.apache.spark.SparkEnv.stop(SparkEnv.scala:112)
3202	at org.apache.spark.SparkContext.$anonfun$stop$25(SparkContext.scala:2354)
3203	at org.apache.spark.util.Utils$.tryLogNonFatalError(Utils.scala:1294)
3204	at org.apache.spark.SparkContext.stop(SparkContext.scala:2354)
3205	at org.apache.spark.SparkContext.stop(SparkContext.scala:2259)
3206	at org.apache.spark.MapStatusesSerDeserBenchmark$.afterAll(MapStatusesSerDeserBenchmark.scala:128)
3207	at org.apache.spark.benchmark.BenchmarkBase.main(BenchmarkBase.scala:80)
3208	at org.apache.spark.MapStatusesSerDeserBenchmark.main(MapStatusesSerDeserBenchmark.scala)
3209	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
3210	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77)
3211	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
3212	at java.base/java.lang.reflect.Method.invoke(Method.java:568)
3213	at org.apache.spark.benchmark.Benchmarks$.$anonfun$main$7(Benchmarks.scala:128)
3214	at scala.collection.ArrayOps$.foreach$extension(ArrayOps.scala:1323)
3215	at org.apache.spark.benchmark.Benchmarks$.main(Benchmarks.scala:91)
3216	at org.apache.spark.benchmark.Benchmarks.main(Benchmarks.scala)
```
### Why are the changes needed?

test bugfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

manually

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #46270 from yaooqinn/SPARK-48034.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 59d5946cfd377e9203ccf572deb34f87fab7510c)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
index 797b650799eaf..795da65079d6e 100644
--- a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
@@ -123,7 +123,6 @@ object MapStatusesSerDeserBenchmark extends BenchmarkBase {
   }
 
   override def afterAll(): Unit = {
-    tracker.stop()
     if (sc != null) {
       sc.stop()
     }

From e78ee2c5770218a521340cb84f57a02dd00f7f3a Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Mon, 29 Apr 2024 16:40:56 -0700
Subject: [PATCH 299/521] [SPARK-48016][SQL] Fix a bug in try_divide function
 when with decimals

 Currently, the following query will throw DIVIDE_BY_ZERO error instead of returning null
 ```
SELECT try_divide(1, decimal(0));
```

This is caused by the rule `DecimalPrecision`:
```
case b  BinaryOperator(left, right) if left.dataType != right.dataType =>
  (left, right) match {
 ...
    case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
        l.dataType.isInstanceOf[IntegralType] &&
        literalPickMinimumPrecision =>
      b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
```
The result of the above makeCopy will contain `ANSI` as the `evalMode`, instead of `TRY`.
This PR is to fix this bug by replacing the makeCopy method calls with withNewChildren

Bug fix in try_* functions.

Yes, it fixes a long-standing bug in the try_divide function.

New UT

No

Closes #46286 from gengliangwang/avoidMakeCopy.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
(cherry picked from commit 3fbcb26d8e992c65a2778b96da4142e234786e53)
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../catalyst/analysis/DecimalPrecision.scala  | 14 ++--
 .../sql/catalyst/analysis/TypeCoercion.scala  | 10 +--
 sql/core/src/test/resources/log4j2.properties |  2 +-
 .../ansi/try_arithmetic.sql.out               | 56 ++++++++++++++++
 .../analyzer-results/try_arithmetic.sql.out   | 56 ++++++++++++++++
 .../sql-tests/inputs/try_arithmetic.sql       |  8 +++
 .../results/ansi/try_arithmetic.sql.out       | 64 +++++++++++++++++++
 .../sql-tests/results/try_arithmetic.sql.out  | 64 +++++++++++++++++++
 8 files changed, 261 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 09cf61a77955a..f51127f53b382 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -83,7 +83,7 @@ object DecimalPrecision extends TypeCoercionRule {
       val resultType = widerDecimalType(p1, s1, p2, s2)
       val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
       val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
-      b.makeCopy(Array(newE1, newE2))
+      b.withNewChildren(Seq(newE1, newE2))
   }
 
   /**
@@ -202,21 +202,21 @@ object DecimalPrecision extends TypeCoercionRule {
         case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
             l.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
+          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
         case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
             r.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(l, Cast(r, DataTypeUtils.fromLiteral(r))))
+          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
         // Promote integers inside a binary expression with fixed-precision decimals to decimals,
         // and fixed-precision decimals in an expression with floats / doubles to doubles
         case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
-          b.makeCopy(Array(Cast(l, DecimalType.forType(l.dataType)), r))
+          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
         case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
-          b.makeCopy(Array(l, Cast(r, DecimalType.forType(r.dataType))))
+          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
         case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
-          b.makeCopy(Array(l, Cast(r, DoubleType)))
+          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
         case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
-          b.makeCopy(Array(Cast(l, DoubleType), r))
+          b.withNewChildren(Seq(Cast(l, DoubleType), r))
         case _ => b
       }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 190e72a8e669e..c9a4a2d40246a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -1102,22 +1102,22 @@ object TypeCoercion extends TypeCoercionBase {
 
       case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
         if right.dataType != CalendarIntervalType =>
-        a.makeCopy(Array(Cast(left, DoubleType), right))
+        a.withNewChildren(Seq(Cast(left, DoubleType), right))
       case a @ BinaryArithmetic(left, right @ StringTypeExpression())
         if left.dataType != CalendarIntervalType =>
-        a.makeCopy(Array(left, Cast(right, DoubleType)))
+        a.withNewChildren(Seq(left, Cast(right, DoubleType)))
 
       // For equality between string and timestamp we cast the string to a timestamp
       // so that things like rounding of subsecond precision does not affect the comparison.
       case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
-        p.makeCopy(Array(Cast(left, TimestampType), right))
+        p.withNewChildren(Seq(Cast(left, TimestampType), right))
       case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
-        p.makeCopy(Array(left, Cast(right, TimestampType)))
+        p.withNewChildren(Seq(left, Cast(right, TimestampType)))
 
       case p @ BinaryComparison(left, right)
           if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
         val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
-        p.makeCopy(Array(castExpr(left, commonType), castExpr(right, commonType)))
+        p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
     }
   }
 
diff --git a/sql/core/src/test/resources/log4j2.properties b/sql/core/src/test/resources/log4j2.properties
index 7ab47c16d4f94..d793d16ca6851 100644
--- a/sql/core/src/test/resources/log4j2.properties
+++ b/sql/core/src/test/resources/log4j2.properties
@@ -50,7 +50,7 @@ logger.parquet_recordwriter.name = org.apache.parquet.hadoop.InternalParquetReco
 logger.parquet_recordwriter.additivity = false
 logger.parquet_recordwriter.level = off
 
-logger.parquet_outputcommitter.name = org.apache.parquet.hadoop.ParquetOutputCommitter
+logger.parquet_outputcommitter.name = org.sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scalaapache.parquet.hadoop.ParquetOutputCommitter
 logger.parquet_outputcommitter.additivity = false
 logger.parquet_outputcommitter.level = off
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
index bbc07c22805a6..15fe614ff0d22 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
index bbc07c22805a6..ceda149c48434 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 55907b6701e50..943865b68d39e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -1,6 +1,8 @@
 -- Numeric + Numeric
 SELECT try_add(1, 1);
 SELECT try_add(2147483647, 1);
+SELECT try_add(2147483647, decimal(1));
+SELECT try_add(2147483647, "1");
 SELECT try_add(-2147483648, -1);
 SELECT try_add(9223372036854775807L, 1);
 SELECT try_add(-9223372036854775808L, -1);
@@ -38,6 +40,8 @@ SELECT try_divide(0, 0);
 SELECT try_divide(1, (2147483647 + 1));
 SELECT try_divide(1L, (9223372036854775807L + 1L));
 SELECT try_divide(1, 1.0 / 0.0);
+SELECT try_divide(1, decimal(0));
+SELECT try_divide(1, "0");
 
 -- Interval / Numeric
 SELECT try_divide(interval 2 year, 2);
@@ -50,6 +54,8 @@ SELECT try_divide(interval 106751991 day, 0.5);
 -- Numeric - Numeric
 SELECT try_subtract(1, 1);
 SELECT try_subtract(2147483647, -1);
+SELECT try_subtract(2147483647, decimal(-1));
+SELECT try_subtract(2147483647, "-1");
 SELECT try_subtract(-2147483648, 1);
 SELECT try_subtract(9223372036854775807L, -1);
 SELECT try_subtract(-9223372036854775808L, 1);
@@ -66,6 +72,8 @@ SELECT try_subtract(interval 106751991 day, interval -3 day);
 -- Numeric * Numeric
 SELECT try_multiply(2, 3);
 SELECT try_multiply(2147483647, -2);
+SELECT try_multiply(2147483647, decimal(-2));
+SELECT try_multiply(2147483647, "-2");
 SELECT try_multiply(-2147483648, 2);
 SELECT try_multiply(9223372036854775807L, 2);
 SELECT try_multiply(-9223372036854775808L, -2);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index 414198b19645d..bb630243ee1ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -341,6 +357,22 @@ org.apache.spark.SparkArithmeticException
 }
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -405,6 +437,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -547,6 +595,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):bigint>
+-- !query output
+-4294967294
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index c706a26078926..76f1d89b20927 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -249,6 +265,22 @@ struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 NULL
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -313,6 +345,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -409,6 +457,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):double>
+-- !query output
+-4.294967294E9
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema

From 9bc2ab01dde1eed9c4d6f4edd751f5bf0b28be3a Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Tue, 30 Apr 2024 12:43:56 +0800
Subject: [PATCH 300/521] [SPARK-47129][CONNECT][SQL][3.5] Make
 ResolveRelations cache connect plan properly

### What changes were proposed in this pull request?
Make `ResolveRelations` handle plan id properly

cherry-pick bugfix https://github.com/apache/spark/pull/45214 to 3.5

### Why are the changes needed?
bug fix for Spark Connect, it won't affect classic Spark SQL

before this PR:
```
from pyspark.sql import functions as sf

spark.range(10).withColumn("value_1", sf.lit(1)).write.saveAsTable("test_table_1")
spark.range(10).withColumnRenamed("id", "index").withColumn("value_2", sf.lit(2)).write.saveAsTable("test_table_2")

df1 = spark.read.table("test_table_1")
df2 = spark.read.table("test_table_2")
df3 = spark.read.table("test_table_1")

join1 = df1.join(df2, on=df1.id==df2.index).select(df2.index, df2.value_2)
join2 = df3.join(join1, how="left", on=join1.index==df3.id)

join2.schema
```

fails with
```
AnalysisException: [CANNOT_RESOLVE_DATAFRAME_COLUMN] Cannot resolve dataframe column "id". It's probably because of illegal references like `df1.select(df2.col("a"))`. SQLSTATE: 42704
```

That is due to existing plan caching in `ResolveRelations` doesn't work with Spark Connect

```
=== Applying Rule org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations ===
 '[#12]Join LeftOuter, '`==`('index, 'id)                     '[#12]Join LeftOuter, '`==`('index, 'id)
!:- '[#9]UnresolvedRelation [test_table_1], [], false         :- '[#9]SubqueryAlias spark_catalog.default.test_table_1
!+- '[#11]Project ['index, 'value_2]                          :  +- 'UnresolvedCatalogRelation `spark_catalog`.`default`.`test_table_1`, [], false
!   +- '[#10]Join Inner, '`==`('id, 'index)                   +- '[#11]Project ['index, 'value_2]
!      :- '[#7]UnresolvedRelation [test_table_1], [], false      +- '[#10]Join Inner, '`==`('id, 'index)
!      +- '[#8]UnresolvedRelation [test_table_2], [], false         :- '[#9]SubqueryAlias spark_catalog.default.test_table_1
!                                                                   :  +- 'UnresolvedCatalogRelation `spark_catalog`.`default`.`test_table_1`, [], false
!                                                                   +- '[#8]SubqueryAlias spark_catalog.default.test_table_2
!                                                                      +- 'UnresolvedCatalogRelation `spark_catalog`.`default`.`test_table_2`, [], false

Can not resolve 'id with plan 7
```

`[#7]UnresolvedRelation [test_table_1], [], false` was wrongly resolved to the cached one
```
:- '[#9]SubqueryAlias spark_catalog.default.test_table_1
   +- 'UnresolvedCatalogRelation `spark_catalog`.`default`.`test_table_1`, [], false
```

### Does this PR introduce _any_ user-facing change?
yes, bug fix

### How was this patch tested?
added ut

### Was this patch authored or co-authored using generative AI tooling?
ci

Closes #46291 from zhengruifeng/connect_fix_read_join_35.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Ruifeng Zheng <ruifengz@apache.org>
---
 python/pyspark/sql/tests/test_readwriter.py   | 21 +++++++++++++++
 .../sql/catalyst/analysis/Analyzer.scala      | 27 ++++++++++++++-----
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 528b88ca0c2d6..921d2eba5ac7f 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -181,6 +181,27 @@ def test_insert_into(self):
             df.write.mode("overwrite").insertInto("test_table", False)
             self.assertEqual(6, self.spark.sql("select * from test_table").count())
 
+    def test_cached_table(self):
+        with self.table("test_cached_table_1"):
+            self.spark.range(10).withColumn(
+                "value_1",
+                lit(1),
+            ).write.saveAsTable("test_cached_table_1")
+
+            with self.table("test_cached_table_2"):
+                self.spark.range(10).withColumnRenamed("id", "index").withColumn(
+                    "value_2", lit(2)
+                ).write.saveAsTable("test_cached_table_2")
+
+                df1 = self.spark.read.table("test_cached_table_1")
+                df2 = self.spark.read.table("test_cached_table_2")
+                df3 = self.spark.read.table("test_cached_table_1")
+
+                join1 = df1.join(df2, on=df1.id == df2.index).select(df2.index, df2.value_2)
+                join2 = df3.join(join1, how="left", on=join1.index == df3.id)
+
+                self.assertEqual(join2.columns, ["id", "value_1", "index", "value_2"])
+
 
 class ReadwriterV2TestsMixin:
     def test_api(self):
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 93efa5e4a49c9..ed7b978045c77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1262,16 +1262,29 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         expandIdentifier(u.multipartIdentifier) match {
           case CatalogAndIdentifier(catalog, ident) =>
             val key = ((catalog.name +: ident.namespace :+ ident.name).toSeq, timeTravelSpec)
-            AnalysisContext.get.relationCache.get(key).map(_.transform {
-              case multi: MultiInstanceRelation =>
-                val newRelation = multi.newInstance()
-                newRelation.copyTagsFrom(multi)
-                newRelation
-            }).orElse {
+            AnalysisContext.get.relationCache.get(key).map { cache =>
+              val cachedRelation = cache.transform {
+                case multi: MultiInstanceRelation =>
+                  val newRelation = multi.newInstance()
+                  newRelation.copyTagsFrom(multi)
+                  newRelation
+              }
+              u.getTagValue(LogicalPlan.PLAN_ID_TAG).map { planId =>
+                val cachedConnectRelation = cachedRelation.clone()
+                cachedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
+                cachedConnectRelation
+              }.getOrElse(cachedRelation)
+            }.orElse {
               val table = CatalogV2Util.loadTable(catalog, ident, timeTravelSpec)
               val loaded = createRelation(catalog, ident, table, u.options, u.isStreaming)
               loaded.foreach(AnalysisContext.get.relationCache.update(key, _))
-              loaded
+              u.getTagValue(LogicalPlan.PLAN_ID_TAG).map { planId =>
+                loaded.map { loadedRelation =>
+                  val loadedConnectRelation = loadedRelation.clone()
+                  loadedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
+                  loadedConnectRelation
+                }
+              }.getOrElse(loaded)
             }
           case _ => None
         }

From 953d7f90c6dbee597b0360c551dfac2a1d87d961 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 1 May 2024 10:42:26 -0700
Subject: [PATCH 301/521] [SPARK-48068][PYTHON] `mypy` should have
 `--python-executable` parameter

### What changes were proposed in this pull request?

This PR aims to fix `mypy` failure by propagating `lint-python`'s `PYTHON_EXECUTABLE` to `mypy`'s parameter correctly.

### Why are the changes needed?

We assumed that `PYTHON_EXECUTABLE` is used for `dev/lint-python` like the following. That's not always guaranteed. We need to use `mypy`'s parameter to make it sure.
https://github.com/apache/spark/blob/ff401dde50343c9bbc1c49a0294272f2da7d01e2/.github/workflows/build_and_test.yml#L705

This patch is useful whose `python3` chooses one of multiple Python installation like our CI environment.
```
$ docker run -it --rm ghcr.io/apache/apache-spark-ci-image:master-8905641334 bash
WARNING: The requested image's platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) and no specific platform was requested
root2ef6ce08d2c4:/# python3 --version
Python 3.10.12
root2ef6ce08d2c4:/# python3.9 --version
Python 3.9.19
```

For example, the following shows that `PYTHON_EXECUTABLE` is not considered by `mypy`.
```
root18c8eae5791e:/spark# PYTHON_EXECUTABLE=python3.9 mypy --python-executable=python3.11 --namespace-packages --config-file python/mypy.ini python/pyspark | wc -l
3428
root18c8eae5791e:/spark# PYTHON_EXECUTABLE=python3.9 mypy --namespace-packages --config-file python/mypy.ini python/pyspark | wc -l
1
root18c8eae5791e:/spark# PYTHON_EXECUTABLE=python3.11 mypy --namespace-packages --config-file python/mypy.ini python/pyspark | wc -l
1
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46314 from dongjoon-hyun/SPARK-48068.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 26c871f180306fbf86ce65f14f8e7a71f89885ed)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/lint-python | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dev/lint-python b/dev/lint-python
index d040493c86c42..7ccd32451acc8 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -118,6 +118,7 @@ function mypy_annotation_test {
 
     echo "starting mypy annotations test..."
     MYPY_REPORT=$( ($MYPY_BUILD \
+      --python-executable $PYTHON_EXECUTABLE \
       --namespace-packages \
       --config-file python/mypy.ini \
       --cache-dir /tmp/.mypy_cache/ \
@@ -177,6 +178,7 @@ function mypy_examples_test {
     echo "starting mypy examples test..."
 
     MYPY_REPORT=$( (MYPYPATH=python $MYPY_BUILD \
+      --python-executable $PYTHON_EXECUTABLE \
       --namespace-packages \
       --config-file python/mypy.ini \
       --exclude "mllib/*" \

From fc0ef07f2949c399537c6d9b5fb7b81f546de212 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 1 May 2024 11:18:29 -0700
Subject: [PATCH 302/521] Revert "[SPARK-48016][SQL] Fix a bug in try_divide
 function when with decimals"

This reverts commit e78ee2c5770218a521340cb84f57a02dd00f7f3a.
---
 .../catalyst/analysis/DecimalPrecision.scala  | 14 ++--
 .../sql/catalyst/analysis/TypeCoercion.scala  | 10 +--
 sql/core/src/test/resources/log4j2.properties |  2 +-
 .../ansi/try_arithmetic.sql.out               | 56 ----------------
 .../analyzer-results/try_arithmetic.sql.out   | 56 ----------------
 .../sql-tests/inputs/try_arithmetic.sql       |  8 ---
 .../results/ansi/try_arithmetic.sql.out       | 64 -------------------
 .../sql-tests/results/try_arithmetic.sql.out  | 64 -------------------
 8 files changed, 13 insertions(+), 261 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index f51127f53b382..09cf61a77955a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -83,7 +83,7 @@ object DecimalPrecision extends TypeCoercionRule {
       val resultType = widerDecimalType(p1, s1, p2, s2)
       val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
       val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
-      b.withNewChildren(Seq(newE1, newE2))
+      b.makeCopy(Array(newE1, newE2))
   }
 
   /**
@@ -202,21 +202,21 @@ object DecimalPrecision extends TypeCoercionRule {
         case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
             l.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
+          b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
         case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
             r.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
+          b.makeCopy(Array(l, Cast(r, DataTypeUtils.fromLiteral(r))))
         // Promote integers inside a binary expression with fixed-precision decimals to decimals,
         // and fixed-precision decimals in an expression with floats / doubles to doubles
         case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
-          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
+          b.makeCopy(Array(Cast(l, DecimalType.forType(l.dataType)), r))
         case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
-          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
+          b.makeCopy(Array(l, Cast(r, DecimalType.forType(r.dataType))))
         case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
-          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
+          b.makeCopy(Array(l, Cast(r, DoubleType)))
         case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
-          b.withNewChildren(Seq(Cast(l, DoubleType), r))
+          b.makeCopy(Array(Cast(l, DoubleType), r))
         case _ => b
       }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index c9a4a2d40246a..190e72a8e669e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -1102,22 +1102,22 @@ object TypeCoercion extends TypeCoercionBase {
 
       case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
         if right.dataType != CalendarIntervalType =>
-        a.withNewChildren(Seq(Cast(left, DoubleType), right))
+        a.makeCopy(Array(Cast(left, DoubleType), right))
       case a @ BinaryArithmetic(left, right @ StringTypeExpression())
         if left.dataType != CalendarIntervalType =>
-        a.withNewChildren(Seq(left, Cast(right, DoubleType)))
+        a.makeCopy(Array(left, Cast(right, DoubleType)))
 
       // For equality between string and timestamp we cast the string to a timestamp
       // so that things like rounding of subsecond precision does not affect the comparison.
       case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
-        p.withNewChildren(Seq(Cast(left, TimestampType), right))
+        p.makeCopy(Array(Cast(left, TimestampType), right))
       case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
-        p.withNewChildren(Seq(left, Cast(right, TimestampType)))
+        p.makeCopy(Array(left, Cast(right, TimestampType)))
 
       case p @ BinaryComparison(left, right)
           if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
         val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
-        p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
+        p.makeCopy(Array(castExpr(left, commonType), castExpr(right, commonType)))
     }
   }
 
diff --git a/sql/core/src/test/resources/log4j2.properties b/sql/core/src/test/resources/log4j2.properties
index d793d16ca6851..7ab47c16d4f94 100644
--- a/sql/core/src/test/resources/log4j2.properties
+++ b/sql/core/src/test/resources/log4j2.properties
@@ -50,7 +50,7 @@ logger.parquet_recordwriter.name = org.apache.parquet.hadoop.InternalParquetReco
 logger.parquet_recordwriter.additivity = false
 logger.parquet_recordwriter.level = off
 
-logger.parquet_outputcommitter.name = org.sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scalaapache.parquet.hadoop.ParquetOutputCommitter
+logger.parquet_outputcommitter.name = org.apache.parquet.hadoop.ParquetOutputCommitter
 logger.parquet_outputcommitter.additivity = false
 logger.parquet_outputcommitter.level = off
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
index 15fe614ff0d22..bbc07c22805a6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
@@ -13,20 +13,6 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_add(2147483647, decimal(1))
--- !query analysis
-Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_add(2147483647, "1")
--- !query analysis
-Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -225,20 +211,6 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_divide(1, decimal(0))
--- !query analysis
-Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_divide(1, "0")
--- !query analysis
-Project [try_divide(1, 0) AS try_divide(1, 0)#x]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -295,20 +267,6 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_subtract(2147483647, decimal(-1))
--- !query analysis
-Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_subtract(2147483647, "-1")
--- !query analysis
-Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -393,20 +351,6 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_multiply(2147483647, decimal(-2))
--- !query analysis
-Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_multiply(2147483647, "-2")
--- !query analysis
-Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
index ceda149c48434..bbc07c22805a6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
@@ -13,20 +13,6 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_add(2147483647, decimal(1))
--- !query analysis
-Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_add(2147483647, "1")
--- !query analysis
-Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -225,20 +211,6 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_divide(1, decimal(0))
--- !query analysis
-Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_divide(1, "0")
--- !query analysis
-Project [try_divide(1, 0) AS try_divide(1, 0)#x]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -295,20 +267,6 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_subtract(2147483647, decimal(-1))
--- !query analysis
-Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_subtract(2147483647, "-1")
--- !query analysis
-Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -393,20 +351,6 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
--- !query
-SELECT try_multiply(2147483647, decimal(-2))
--- !query analysis
-Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
-+- OneRowRelation
-
-
--- !query
-SELECT try_multiply(2147483647, "-2")
--- !query analysis
-Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
-+- OneRowRelation
-
-
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 943865b68d39e..55907b6701e50 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -1,8 +1,6 @@
 -- Numeric + Numeric
 SELECT try_add(1, 1);
 SELECT try_add(2147483647, 1);
-SELECT try_add(2147483647, decimal(1));
-SELECT try_add(2147483647, "1");
 SELECT try_add(-2147483648, -1);
 SELECT try_add(9223372036854775807L, 1);
 SELECT try_add(-9223372036854775808L, -1);
@@ -40,8 +38,6 @@ SELECT try_divide(0, 0);
 SELECT try_divide(1, (2147483647 + 1));
 SELECT try_divide(1L, (9223372036854775807L + 1L));
 SELECT try_divide(1, 1.0 / 0.0);
-SELECT try_divide(1, decimal(0));
-SELECT try_divide(1, "0");
 
 -- Interval / Numeric
 SELECT try_divide(interval 2 year, 2);
@@ -54,8 +50,6 @@ SELECT try_divide(interval 106751991 day, 0.5);
 -- Numeric - Numeric
 SELECT try_subtract(1, 1);
 SELECT try_subtract(2147483647, -1);
-SELECT try_subtract(2147483647, decimal(-1));
-SELECT try_subtract(2147483647, "-1");
 SELECT try_subtract(-2147483648, 1);
 SELECT try_subtract(9223372036854775807L, -1);
 SELECT try_subtract(-9223372036854775808L, 1);
@@ -72,8 +66,6 @@ SELECT try_subtract(interval 106751991 day, interval -3 day);
 -- Numeric * Numeric
 SELECT try_multiply(2, 3);
 SELECT try_multiply(2147483647, -2);
-SELECT try_multiply(2147483647, decimal(-2));
-SELECT try_multiply(2147483647, "-2");
 SELECT try_multiply(-2147483648, 2);
 SELECT try_multiply(9223372036854775807L, 2);
 SELECT try_multiply(-9223372036854775808L, -2);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index bb630243ee1ae..414198b19645d 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -15,22 +15,6 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
--- !query
-SELECT try_add(2147483647, decimal(1))
--- !query schema
-struct<try_add(2147483647, 1):decimal(11,0)>
--- !query output
-2147483648
-
-
--- !query
-SELECT try_add(2147483647, "1")
--- !query schema
-struct<try_add(2147483647, 1):bigint>
--- !query output
-2147483648
-
-
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -357,22 +341,6 @@ org.apache.spark.SparkArithmeticException
 }
 
 
--- !query
-SELECT try_divide(1, decimal(0))
--- !query schema
-struct<try_divide(1, 0):decimal(12,11)>
--- !query output
-NULL
-
-
--- !query
-SELECT try_divide(1, "0")
--- !query schema
-struct<try_divide(1, 0):double>
--- !query output
-NULL
-
-
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -437,22 +405,6 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
--- !query
-SELECT try_subtract(2147483647, decimal(-1))
--- !query schema
-struct<try_subtract(2147483647, -1):decimal(11,0)>
--- !query output
-2147483648
-
-
--- !query
-SELECT try_subtract(2147483647, "-1")
--- !query schema
-struct<try_subtract(2147483647, -1):bigint>
--- !query output
-2147483648
-
-
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -595,22 +547,6 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
--- !query
-SELECT try_multiply(2147483647, decimal(-2))
--- !query schema
-struct<try_multiply(2147483647, -2):decimal(21,0)>
--- !query output
--4294967294
-
-
--- !query
-SELECT try_multiply(2147483647, "-2")
--- !query schema
-struct<try_multiply(2147483647, -2):bigint>
--- !query output
--4294967294
-
-
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index 76f1d89b20927..c706a26078926 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -15,22 +15,6 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
--- !query
-SELECT try_add(2147483647, decimal(1))
--- !query schema
-struct<try_add(2147483647, 1):decimal(11,0)>
--- !query output
-2147483648
-
-
--- !query
-SELECT try_add(2147483647, "1")
--- !query schema
-struct<try_add(2147483647, 1):double>
--- !query output
-2.147483648E9
-
-
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -265,22 +249,6 @@ struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 NULL
 
 
--- !query
-SELECT try_divide(1, decimal(0))
--- !query schema
-struct<try_divide(1, 0):decimal(12,11)>
--- !query output
-NULL
-
-
--- !query
-SELECT try_divide(1, "0")
--- !query schema
-struct<try_divide(1, 0):double>
--- !query output
-NULL
-
-
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -345,22 +313,6 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
--- !query
-SELECT try_subtract(2147483647, decimal(-1))
--- !query schema
-struct<try_subtract(2147483647, -1):decimal(11,0)>
--- !query output
-2147483648
-
-
--- !query
-SELECT try_subtract(2147483647, "-1")
--- !query schema
-struct<try_subtract(2147483647, -1):double>
--- !query output
-2.147483648E9
-
-
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -457,22 +409,6 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
--- !query
-SELECT try_multiply(2147483647, decimal(-2))
--- !query schema
-struct<try_multiply(2147483647, -2):decimal(21,0)>
--- !query output
--4294967294
-
-
--- !query
-SELECT try_multiply(2147483647, "-2")
--- !query schema
-struct<try_multiply(2147483647, -2):double>
--- !query output
--4.294967294E9
-
-
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema

From 6a4475c0b8cbcc6fca5fe7a9cd499d05c428c418 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Wed, 1 May 2024 14:32:52 -0700
Subject: [PATCH 303/521] [SPARK-48016][SQL][3.5] Fix a bug in try_divide
 function when with decimals

### What changes were proposed in this pull request?

 Currently, the following query will throw DIVIDE_BY_ZERO error instead of returning null
 ```
SELECT try_divide(1, decimal(0));
```

This is caused by the rule `DecimalPrecision`:
```
case b  BinaryOperator(left, right) if left.dataType != right.dataType =>
  (left, right) match {
 ...
    case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
        l.dataType.isInstanceOf[IntegralType] &&
        literalPickMinimumPrecision =>
      b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
```
The result of the above makeCopy will contain `ANSI` as the `evalMode`, instead of `TRY`.
This PR is to fix this bug by replacing the makeCopy method calls with withNewChildren

### Why are the changes needed?

Bug fix in try_* functions.

### Does this PR introduce _any_ user-facing change?

Yes, it fixes a long-standing bug in the try_divide function.

### How was this patch tested?

New UT

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46323 from gengliangwang/pickFix.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../catalyst/analysis/DecimalPrecision.scala  | 14 ++--
 .../sql/catalyst/analysis/TypeCoercion.scala  | 10 +--
 .../ansi/try_arithmetic.sql.out               | 56 ++++++++++++++++
 .../analyzer-results/try_arithmetic.sql.out   | 56 ++++++++++++++++
 .../sql-tests/inputs/try_arithmetic.sql       |  8 +++
 .../results/ansi/try_arithmetic.sql.out       | 64 +++++++++++++++++++
 .../sql-tests/results/try_arithmetic.sql.out  | 64 +++++++++++++++++++
 7 files changed, 260 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 09cf61a77955a..f51127f53b382 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -83,7 +83,7 @@ object DecimalPrecision extends TypeCoercionRule {
       val resultType = widerDecimalType(p1, s1, p2, s2)
       val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
       val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
-      b.makeCopy(Array(newE1, newE2))
+      b.withNewChildren(Seq(newE1, newE2))
   }
 
   /**
@@ -202,21 +202,21 @@ object DecimalPrecision extends TypeCoercionRule {
         case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
             l.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(Cast(l, DataTypeUtils.fromLiteral(l)), r))
+          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
         case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
             r.dataType.isInstanceOf[IntegralType] &&
             literalPickMinimumPrecision =>
-          b.makeCopy(Array(l, Cast(r, DataTypeUtils.fromLiteral(r))))
+          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
         // Promote integers inside a binary expression with fixed-precision decimals to decimals,
         // and fixed-precision decimals in an expression with floats / doubles to doubles
         case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
-          b.makeCopy(Array(Cast(l, DecimalType.forType(l.dataType)), r))
+          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
         case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
-          b.makeCopy(Array(l, Cast(r, DecimalType.forType(r.dataType))))
+          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
         case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
-          b.makeCopy(Array(l, Cast(r, DoubleType)))
+          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
         case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
-          b.makeCopy(Array(Cast(l, DoubleType), r))
+          b.withNewChildren(Seq(Cast(l, DoubleType), r))
         case _ => b
       }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 190e72a8e669e..c9a4a2d40246a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -1102,22 +1102,22 @@ object TypeCoercion extends TypeCoercionBase {
 
       case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
         if right.dataType != CalendarIntervalType =>
-        a.makeCopy(Array(Cast(left, DoubleType), right))
+        a.withNewChildren(Seq(Cast(left, DoubleType), right))
       case a @ BinaryArithmetic(left, right @ StringTypeExpression())
         if left.dataType != CalendarIntervalType =>
-        a.makeCopy(Array(left, Cast(right, DoubleType)))
+        a.withNewChildren(Seq(left, Cast(right, DoubleType)))
 
       // For equality between string and timestamp we cast the string to a timestamp
       // so that things like rounding of subsecond precision does not affect the comparison.
       case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
-        p.makeCopy(Array(Cast(left, TimestampType), right))
+        p.withNewChildren(Seq(Cast(left, TimestampType), right))
       case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
-        p.makeCopy(Array(left, Cast(right, TimestampType)))
+        p.withNewChildren(Seq(left, Cast(right, TimestampType)))
 
       case p @ BinaryComparison(left, right)
           if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
         val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
-        p.makeCopy(Array(castExpr(left, commonType), castExpr(right, commonType)))
+        p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
     }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
index bbc07c22805a6..15fe614ff0d22 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
index bbc07c22805a6..ceda149c48434 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
@@ -13,6 +13,20 @@ Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query analysis
+Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query analysis
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query analysis
@@ -211,6 +225,20 @@ Project [try_divide(1, (1.0 / 0.0)) AS try_divide(1, (1.0 / 0.0))#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query analysis
+Project [try_divide(1, cast(0 as decimal(10,0))) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query analysis
+Project [try_divide(1, 0) AS try_divide(1, 0)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query analysis
@@ -267,6 +295,20 @@ Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query analysis
+Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query analysis
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query analysis
@@ -351,6 +393,20 @@ Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query analysis
+Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query analysis
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
++- OneRowRelation
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 55907b6701e50..943865b68d39e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -1,6 +1,8 @@
 -- Numeric + Numeric
 SELECT try_add(1, 1);
 SELECT try_add(2147483647, 1);
+SELECT try_add(2147483647, decimal(1));
+SELECT try_add(2147483647, "1");
 SELECT try_add(-2147483648, -1);
 SELECT try_add(9223372036854775807L, 1);
 SELECT try_add(-9223372036854775808L, -1);
@@ -38,6 +40,8 @@ SELECT try_divide(0, 0);
 SELECT try_divide(1, (2147483647 + 1));
 SELECT try_divide(1L, (9223372036854775807L + 1L));
 SELECT try_divide(1, 1.0 / 0.0);
+SELECT try_divide(1, decimal(0));
+SELECT try_divide(1, "0");
 
 -- Interval / Numeric
 SELECT try_divide(interval 2 year, 2);
@@ -50,6 +54,8 @@ SELECT try_divide(interval 106751991 day, 0.5);
 -- Numeric - Numeric
 SELECT try_subtract(1, 1);
 SELECT try_subtract(2147483647, -1);
+SELECT try_subtract(2147483647, decimal(-1));
+SELECT try_subtract(2147483647, "-1");
 SELECT try_subtract(-2147483648, 1);
 SELECT try_subtract(9223372036854775807L, -1);
 SELECT try_subtract(-9223372036854775808L, 1);
@@ -66,6 +72,8 @@ SELECT try_subtract(interval 106751991 day, interval -3 day);
 -- Numeric * Numeric
 SELECT try_multiply(2, 3);
 SELECT try_multiply(2147483647, -2);
+SELECT try_multiply(2147483647, decimal(-2));
+SELECT try_multiply(2147483647, "-2");
 SELECT try_multiply(-2147483648, 2);
 SELECT try_multiply(9223372036854775807L, 2);
 SELECT try_multiply(-9223372036854775808L, -2);
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index 414198b19645d..bb630243ee1ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -341,6 +357,22 @@ org.apache.spark.SparkArithmeticException
 }
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -405,6 +437,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):bigint>
+-- !query output
+2147483648
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -547,6 +595,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):bigint>
+-- !query output
+-4294967294
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index c706a26078926..76f1d89b20927 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -15,6 +15,22 @@ struct<try_add(2147483647, 1):int>
 NULL
 
 
+-- !query
+SELECT try_add(2147483647, decimal(1))
+-- !query schema
+struct<try_add(2147483647, 1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_add(2147483647, "1")
+-- !query schema
+struct<try_add(2147483647, 1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_add(-2147483648, -1)
 -- !query schema
@@ -249,6 +265,22 @@ struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 NULL
 
 
+-- !query
+SELECT try_divide(1, decimal(0))
+-- !query schema
+struct<try_divide(1, 0):decimal(12,11)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_divide(1, "0")
+-- !query schema
+struct<try_divide(1, 0):double>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -313,6 +345,22 @@ struct<try_subtract(2147483647, -1):int>
 NULL
 
 
+-- !query
+SELECT try_subtract(2147483647, decimal(-1))
+-- !query schema
+struct<try_subtract(2147483647, -1):decimal(11,0)>
+-- !query output
+2147483648
+
+
+-- !query
+SELECT try_subtract(2147483647, "-1")
+-- !query schema
+struct<try_subtract(2147483647, -1):double>
+-- !query output
+2.147483648E9
+
+
 -- !query
 SELECT try_subtract(-2147483648, 1)
 -- !query schema
@@ -409,6 +457,22 @@ struct<try_multiply(2147483647, -2):int>
 NULL
 
 
+-- !query
+SELECT try_multiply(2147483647, decimal(-2))
+-- !query schema
+struct<try_multiply(2147483647, -2):decimal(21,0)>
+-- !query output
+-4294967294
+
+
+-- !query
+SELECT try_multiply(2147483647, "-2")
+-- !query schema
+struct<try_multiply(2147483647, -2):double>
+-- !query output
+-4.294967294E9
+
+
 -- !query
 SELECT try_multiply(-2147483648, 2)
 -- !query schema

From b6b8d61f09100de9bd0913a64e6e4ea3f0e78420 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 20 Nov 2023 08:30:42 +0900
Subject: [PATCH 304/521] [SPARK-45988][SPARK-45989][PYTHON] Fix typehints to
 handle `list` GenericAlias in Python 3.11+

### What changes were proposed in this pull request?

This PR aims to fix `type hints` to handle `list` GenericAlias in Python 3.11+ for Apache Spark 4.0.0 and 3.5.1.
- https://github.com/apache/spark/actions/workflows/build_python.yml

### Why are the changes needed?

PEP 646 changes `GenericAlias` instances into `Iterable` ones at Python 3.11.
- https://peps.python.org/pep-0646/

This behavior changes introduce the following failure on Python 3.11.

- **Python 3.11.6**

```python
Python 3.11.6 (main, Nov  1 2023, 07:46:30) [Clang 14.0.0 (clang-1400.0.28.1)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/11/18 16:34:09 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.5.0
      /_/

Using Python version 3.11.6 (main, Nov  1 2023 07:46:30)
Spark context Web UI available at http://localhost:4040
Spark context available as 'sc' (master = local[*], app id = local-1700354049391).
SparkSession available as 'spark'.
>>> from pyspark import pandas as ps
>>> from typing import List
>>> ps.DataFrame[float, [int, List[int]]]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "/Users/dongjoon/APACHE/spark-release/spark-3.5.0-bin-hadoop3/python/pyspark/pandas/frame.py", line 13647, in __class_getitem__
    return create_tuple_for_frame_type(params)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dongjoon/APACHE/spark-release/spark-3.5.0-bin-hadoop3/python/pyspark/pandas/typedef/typehints.py", line 717, in create_tuple_for_frame_type
    return Tuple[_to_type_holders(params)]
                 ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dongjoon/APACHE/spark-release/spark-3.5.0-bin-hadoop3/python/pyspark/pandas/typedef/typehints.py", line 762, in _to_type_holders
    data_types = _new_type_holders(data_types, NameTypeHolder)
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/Users/dongjoon/APACHE/spark-release/spark-3.5.0-bin-hadoop3/python/pyspark/pandas/typedef/typehints.py", line 828, in _new_type_holders
    raise TypeError(
TypeError: Type hints should be specified as one of:
  - DataFrame[type, type, ...]
  - DataFrame[name: type, name: type, ...]
  - DataFrame[dtypes instance]
  - DataFrame[zip(names, types)]
  - DataFrame[index_type, [type, ...]]
  - DataFrame[(index_name, index_type), [(name, type), ...]]
  - DataFrame[dtype instance, dtypes instance]
  - DataFrame[(index_name, index_type), zip(names, types)]
  - DataFrame[[index_type, ...], [type, ...]]
  - DataFrame[[(index_name, index_type), ...], [(name, type), ...]]
  - DataFrame[dtypes instance, dtypes instance]
  - DataFrame[zip(index_names, index_types), zip(names, types)]
However, got (<class 'int'>, typing.List[int]).
```

- **Python 3.10.13**

```python
Python 3.10.13 (main, Sep 29 2023, 16:03:45) [Clang 14.0.0 (clang-1400.0.28.1)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/11/18 16:33:21 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Welcome to
      ____              __
     / __/__  ___ _____/ /__
    _\ \/ _ \/ _ `/ __/  '_/
   /__ / .__/\_,_/_/ /_/\_\   version 3.5.0
      /_/

Using Python version 3.10.13 (main, Sep 29 2023 16:03:45)
Spark context Web UI available at http://localhost:4040
Spark context available as 'sc' (master = local[*], app id = local-1700354002048).
SparkSession available as 'spark'.
>>> from pyspark import pandas as ps
>>> from typing import List
>>> ps.DataFrame[float, [int, List[int]]]
typing.Tuple[pyspark.pandas.typedef.typehints.IndexNameType, pyspark.pandas.typedef.typehints.NameType, pyspark.pandas.typedef.typehints.NameType]
>>>
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs. Manually test with Python 3.11.

```
$ build/sbt -Phadoop-3 -Pkinesis-asl -Pyarn -Pkubernetes -Pdocker-integration-tests -Pconnect -Pspark-ganglia-lgpl -Pvolcano -Phadoop-cloud -Phive-thriftserver -Phive Test/package streaming-kinesis-asl-assembly/assembly connect/assembly
$ python/run-tests --modules pyspark-pandas-slow --python-executables python3.11
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #43888 from dongjoon-hyun/SPARK-45988.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/pandas/typedef/typehints.py | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
index 6e41395186d3d..012eabf958eb8 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -792,9 +792,21 @@ def _new_type_holders(
         isinstance(param, slice) and param.step is None and param.stop is not None
         for param in params
     )
-    is_unnamed_params = all(
-        not isinstance(param, slice) and not isinstance(param, Iterable) for param in params
-    )
+    if sys.version_info < (3, 11):
+        is_unnamed_params = all(
+            not isinstance(param, slice) and not isinstance(param, Iterable) for param in params
+        )
+    else:
+        # PEP 646 changes `GenericAlias` instances into iterable ones at Python 3.11
+        is_unnamed_params = all(
+            not isinstance(param, slice)
+            and (
+                not isinstance(param, Iterable)
+                or isinstance(param, typing.GenericAlias)
+                or isinstance(param, typing._GenericAlias)
+            )
+            for param in params
+        )
 
     if is_named_params:
         # DataFrame["id": int, "A": int]

From 3d72063ccec6167bd3fe92e24a0ebd11bec8637b Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 May 2024 07:22:44 -0700
Subject: [PATCH 305/521] [SPARK-48081] Fix ClassCastException in
 NTile.checkInputDataTypes() when argument is non-foldable or of wrong type

### What changes were proposed in this pull request?

While migrating the `NTile` expression's type check failures to the new error class framework, PR https://github.com/apache/spark/pull/38457 removed a pair of not-unnecessary `return` statements and thus caused certain branches' values to be discarded rather than returned.

As a result, invalid usages like

```
select ntile(99.9) OVER (order by id) from range(10)
```

trigger internal errors like errors like

```
 java.lang.ClassCastException: class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Integer (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Integer is in module java.base of loader 'bootstrap')
  at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:99)
  at org.apache.spark.sql.catalyst.expressions.NTile.checkInputDataTypes(windowExpressions.scala:877)
```

instead of clear error framework errors like

```
org.apache.spark.sql.catalyst.ExtendedAnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "ntile(99.9)" due to data type mismatch: The first parameter requires the "INT" type, however "99.9" has the type "DECIMAL(3,1)". SQLSTATE: 42K09; line 1 pos 7;
'Project [unresolvedalias(ntile(99.9) windowspecdefinition(id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())))]
+- Range (0, 10, step=1, splits=None)

  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:315)
```

### Why are the changes needed?

Improve error messages.

### Does this PR introduce _any_ user-facing change?

Yes, it improves an error message.

### How was this patch tested?

Added a new test case to AnalysisErrorSuite.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46333 from JoshRosen/SPARK-48081.

Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit b99a64b0fd1cf4b32dd2f17423775db87bae20a6)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/windowExpressions.scala       |  4 +--
 .../analysis/AnalysisErrorSuite.scala         | 34 +++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 50c98c01645d9..a4ce78d1bb6d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -850,7 +850,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> "buckets",
@@ -861,7 +861,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     }
 
     if (buckets.dataType != IntegerType) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> "1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index e8dc9061199cf..48d9266542f19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -344,6 +344,40 @@ class AnalysisErrorSuite extends AnalysisTest {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
+  errorClassTest(
+    "the buckets of ntile window function is not foldable",
+    testRelation2.select(
+      WindowExpression(
+        NTile(Literal(99.9f)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(99.9)\"",
+      "paramIndex" -> "first",
+      "inputSql" -> "\"99.9\"",
+      "inputType" -> "\"FLOAT\"",
+      "requiredType" -> "\"INT\""))
+
+
+  errorClassTest(
+    "the buckets of ntile window function is not int literal",
+    testRelation2.select(
+      WindowExpression(
+        NTile(AttributeReference("b", IntegerType)()),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(b)\"",
+      "inputName" -> "`buckets`",
+      "inputExpr" -> "\"b\"",
+      "inputType" -> "\"INT\""))
+
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),

From d82403f980334cd40b1f24518c9c766827710c8c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 2 May 2024 08:42:24 -0700
Subject: [PATCH 306/521] Revert "[SPARK-48081] Fix ClassCastException in
 NTile.checkInputDataTypes() when argument is non-foldable or of wrong type"

This reverts commit 3d72063ccec6167bd3fe92e24a0ebd11bec8637b.
---
 .../expressions/windowExpressions.scala       |  4 +--
 .../analysis/AnalysisErrorSuite.scala         | 34 -------------------
 2 files changed, 2 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index a4ce78d1bb6d1..50c98c01645d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -850,7 +850,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      return DataTypeMismatch(
+      DataTypeMismatch(
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> "buckets",
@@ -861,7 +861,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     }
 
     if (buckets.dataType != IntegerType) {
-      return DataTypeMismatch(
+      DataTypeMismatch(
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> "1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 48d9266542f19..e8dc9061199cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -344,40 +344,6 @@ class AnalysisErrorSuite extends AnalysisTest {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
-  errorClassTest(
-    "the buckets of ntile window function is not foldable",
-    testRelation2.select(
-      WindowExpression(
-        NTile(Literal(99.9f)),
-        WindowSpecDefinition(
-          UnresolvedAttribute("a") :: Nil,
-          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          UnspecifiedFrame)).as("window")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-    messageParameters = Map(
-      "sqlExpr" -> "\"ntile(99.9)\"",
-      "paramIndex" -> "first",
-      "inputSql" -> "\"99.9\"",
-      "inputType" -> "\"FLOAT\"",
-      "requiredType" -> "\"INT\""))
-
-
-  errorClassTest(
-    "the buckets of ntile window function is not int literal",
-    testRelation2.select(
-      WindowExpression(
-        NTile(AttributeReference("b", IntegerType)()),
-        WindowSpecDefinition(
-          UnresolvedAttribute("a") :: Nil,
-          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
-          UnspecifiedFrame)).as("window")),
-    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
-    messageParameters = Map(
-      "sqlExpr" -> "\"ntile(b)\"",
-      "inputName" -> "`buckets`",
-      "inputExpr" -> "\"b\"",
-      "inputType" -> "\"INT\""))
-
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),

From 9cd312574e9706e9a1784c18ef1c1bccb957bcba Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Thu, 2 May 2024 12:49:54 -0700
Subject: [PATCH 307/521] [SPARK-48081][SQL][3.5] Fix ClassCastException in
 NTile.checkInputDataTypes() when argument is non-foldable or of wrong type

branch-3.5 pick of PR https://github.com/apache/spark/pull/46333 , fixing test issue due to difference in expected error message parameter formatting across branches; original description follows below:

---

### What changes were proposed in this pull request?

While migrating the `NTile` expression's type check failures to the new error class framework, PR https://github.com/apache/spark/pull/38457 removed a pair of not-unnecessary `return` statements and thus caused certain branches' values to be discarded rather than returned.

As a result, invalid usages like

```
select ntile(99.9) OVER (order by id) from range(10)
```

trigger internal errors like errors like

```
 java.lang.ClassCastException: class org.apache.spark.sql.types.Decimal cannot be cast to class java.lang.Integer (org.apache.spark.sql.types.Decimal is in unnamed module of loader 'app'; java.lang.Integer is in module java.base of loader 'bootstrap')
  at scala.runtime.BoxesRunTime.unboxToInt(BoxesRunTime.java:99)
  at org.apache.spark.sql.catalyst.expressions.NTile.checkInputDataTypes(windowExpressions.scala:877)
```

instead of clear error framework errors like

```
org.apache.spark.sql.catalyst.ExtendedAnalysisException: [DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE] Cannot resolve "ntile(99.9)" due to data type mismatch: The first parameter requires the "INT" type, however "99.9" has the type "DECIMAL(3,1)". SQLSTATE: 42K09; line 1 pos 7;
'Project [unresolvedalias(ntile(99.9) windowspecdefinition(id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())))]
+- Range (0, 10, step=1, splits=None)

  at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.dataTypeMismatch(package.scala:73)
  at org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis0$7(CheckAnalysis.scala:315)
```

### Why are the changes needed?

Improve error messages.

### Does this PR introduce _any_ user-facing change?

Yes, it improves an error message.

### How was this patch tested?

Added a new test case to AnalysisErrorSuite.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46336 from JoshRosen/SPARK-48081-branch-3.5.

Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/windowExpressions.scala       |  4 +--
 .../analysis/AnalysisErrorSuite.scala         | 34 +++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 50c98c01645d9..a4ce78d1bb6d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -850,7 +850,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> "buckets",
@@ -861,7 +861,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     }
 
     if (buckets.dataType != IntegerType) {
-      DataTypeMismatch(
+      return DataTypeMismatch(
         errorSubClass = "UNEXPECTED_INPUT_TYPE",
         messageParameters = Map(
           "paramIndex" -> "1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index e8dc9061199cf..a7df53db936f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -344,6 +344,40 @@ class AnalysisErrorSuite extends AnalysisTest {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
+  errorClassTest(
+    "the buckets of ntile window function is not foldable",
+    testRelation2.select(
+      WindowExpression(
+        NTile(Literal(99.9f)),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(99.9)\"",
+      "paramIndex" -> "1",
+      "inputSql" -> "\"99.9\"",
+      "inputType" -> "\"FLOAT\"",
+      "requiredType" -> "\"INT\""))
+
+
+  errorClassTest(
+    "the buckets of ntile window function is not int literal",
+    testRelation2.select(
+      WindowExpression(
+        NTile(AttributeReference("b", IntegerType)()),
+        WindowSpecDefinition(
+          UnresolvedAttribute("a") :: Nil,
+          SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+          UnspecifiedFrame)).as("window")),
+    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+    messageParameters = Map(
+      "sqlExpr" -> "\"ntile(b)\"",
+      "inputName" -> "buckets",
+      "inputExpr" -> "\"b\"",
+      "inputType" -> "\"INT\""))
+
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),

From 71cb9306085b07b63f2474e05144334cb7e4109d Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 3 May 2024 15:20:29 +0900
Subject: [PATCH 308/521] [SPARK-48088][PYTHON][CONNECT][TESTS][3.5] Skip tests
 that fail in 3.5 client <> 4.0 server

### What changes were proposed in this pull request?

This PR proposes to skip the tests that fail with 3.5 client and 4.0 server in Spark Connect (by adding `SPARK_SKIP_CONNECT_COMPAT_TESTS`). This is a base work for https://github.com/apache/spark/pull/46298. This partially backports https://github.com/apache/spark/pull/45870

This PR also adds `SPARK_CONNECT_TESTING_REMOTE` environment variable so developers can run PySpark unittests against a Spark Connect server.

### Why are the changes needed?

In order to set up the CI that tests 3.5 client and 4.0 server in Spark Connect.

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Tested it in my fork, see https://github.com/apache/spark/pull/46298

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46334 from HyukjinKwon/SPARK-48088.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/connect/functions.py        |  3 +-
 .../connect/test_connect_classification.py    |  8 +-
 .../tests/connect/test_connect_evaluation.py  | 10 ++-
 .../ml/tests/connect/test_connect_feature.py  |  7 +-
 .../ml/tests/connect/test_connect_function.py |  1 +
 .../ml/tests/connect/test_connect_pipeline.py |  5 +-
 .../tests/connect/test_connect_summarizer.py  |  7 +-
 .../ml/tests/connect/test_connect_tuning.py   |  5 +-
 .../connect/test_parity_torch_data_loader.py  |  3 +-
 .../connect/test_parity_torch_distributor.py  |  4 +-
 .../ml/torch/tests/test_data_loader.py        |  3 +-
 python/pyspark/sql/connect/avro/functions.py  |  2 +-
 python/pyspark/sql/connect/catalog.py         |  5 +-
 python/pyspark/sql/connect/column.py          |  5 +-
 python/pyspark/sql/connect/conf.py            |  5 +-
 python/pyspark/sql/connect/dataframe.py       |  2 +-
 python/pyspark/sql/connect/functions.py       |  3 +-
 python/pyspark/sql/connect/group.py           |  5 +-
 .../pyspark/sql/connect/protobuf/functions.py |  2 +-
 python/pyspark/sql/connect/readwriter.py      |  3 +-
 python/pyspark/sql/connect/session.py         |  5 +-
 python/pyspark/sql/connect/streaming/query.py |  2 +-
 .../sql/connect/streaming/readwriter.py       |  3 +-
 python/pyspark/sql/connect/window.py          |  5 +-
 python/pyspark/sql/dataframe.py               |  8 +-
 python/pyspark/sql/functions.py               |  6 +-
 .../sql/tests/connect/client/test_artifact.py |  4 +-
 .../connect/streaming/test_parity_listener.py |  6 +-
 .../streaming/test_parity_streaming.py        |  9 +-
 .../sql/tests/connect/test_connect_basic.py   |  7 +-
 .../tests/connect/test_connect_function.py    |  1 +
 .../connect/test_parity_pandas_udf_scalar.py  |  5 ++
 .../sql/tests/connect/test_parity_udtf.py     | 86 +++++++++++++++++++
 .../pyspark/sql/tests/connect/test_utils.py   |  8 +-
 .../sql/tests/pandas/test_pandas_map.py       |  2 +-
 .../sql/tests/pandas/test_pandas_udf.py       | 10 ++-
 python/pyspark/sql/tests/test_datasources.py  |  6 +-
 python/pyspark/sql/tests/test_types.py        |  3 +
 python/pyspark/testing/connectutils.py        |  2 +-
 python/run-tests.py                           | 25 ++++--
 40 files changed, 234 insertions(+), 57 deletions(-)

diff --git a/python/pyspark/ml/connect/functions.py b/python/pyspark/ml/connect/functions.py
index ab7e3ab3c9adc..d8aa54dcf9bee 100644
--- a/python/pyspark/ml/connect/functions.py
+++ b/python/pyspark/ml/connect/functions.py
@@ -39,6 +39,7 @@ def array_to_vector(col: Column) -> Column:
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -54,7 +55,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("ml.connect.functions tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/ml/tests/connect/test_connect_classification.py b/python/pyspark/ml/tests/connect/test_connect_classification.py
index f3e621c19f0f0..2763d3f613ae8 100644
--- a/python/pyspark/ml/tests/connect/test_connect_classification.py
+++ b/python/pyspark/ml/tests/connect/test_connect_classification.py
@@ -15,12 +15,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import unittest
+import os
+
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_classification import ClassificationTestsMixin
 
-have_torch = True
+# TODO(SPARK-48083): Reenable this test case
+have_torch = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torch  # noqa: F401
 except ImportError:
@@ -31,7 +33,7 @@
 class ClassificationTestsOnConnect(ClassificationTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
-            SparkSession.builder.remote("local[2]")
+            SparkSession.builder.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
             .config("spark.connect.copyFromLocalToFs.allowDestLocal", "true")
             .getOrCreate()
         )
diff --git a/python/pyspark/ml/tests/connect/test_connect_evaluation.py b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
index ce7cf03049d3c..35af54605ca81 100644
--- a/python/pyspark/ml/tests/connect/test_connect_evaluation.py
+++ b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
@@ -14,12 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
+
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_evaluation import EvaluationTestsMixin
 
-have_torcheval = True
+# TODO(SPARK-48084): Reenable this test case
+have_torcheval = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torcheval  # noqa: F401
 except ImportError:
@@ -29,7 +31,9 @@
 @unittest.skipIf(not have_torcheval, "torcheval is required")
 class EvaluationTestsOnConnect(EvaluationTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote("local[2]").getOrCreate()
+        self.spark = SparkSession.builder.remote(
+            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+        ).getOrCreate()
 
     def tearDown(self) -> None:
         self.spark.stop()
diff --git a/python/pyspark/ml/tests/connect/test_connect_feature.py b/python/pyspark/ml/tests/connect/test_connect_feature.py
index d7698c3772201..49021f6e82c5a 100644
--- a/python/pyspark/ml/tests/connect/test_connect_feature.py
+++ b/python/pyspark/ml/tests/connect/test_connect_feature.py
@@ -14,15 +14,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
+
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_feature import FeatureTestsMixin
 
 
 class FeatureTestsOnConnect(FeatureTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote("local[2]").getOrCreate()
+        self.spark = SparkSession.builder.remote(
+            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+        ).getOrCreate()
 
     def tearDown(self) -> None:
         self.spark.stop()
diff --git a/python/pyspark/ml/tests/connect/test_connect_function.py b/python/pyspark/ml/tests/connect/test_connect_function.py
index 7da3d3f1addd8..fc3344ecebfe2 100644
--- a/python/pyspark/ml/tests/connect/test_connect_function.py
+++ b/python/pyspark/ml/tests/connect/test_connect_function.py
@@ -33,6 +33,7 @@
     from pyspark.ml.connect import functions as CF
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class SparkConnectMLFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, SQLTestUtils):
     """These test cases exercise the interface to the proto plan
     generation but do not call Spark."""
diff --git a/python/pyspark/ml/tests/connect/test_connect_pipeline.py b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
index e676c8bfee955..dc7490bf14b12 100644
--- a/python/pyspark/ml/tests/connect/test_connect_pipeline.py
+++ b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
@@ -15,8 +15,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
+
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_pipeline import PipelineTestsMixin
 
@@ -24,7 +25,7 @@
 class PipelineTestsOnConnect(PipelineTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
-            SparkSession.builder.remote("local[2]")
+            SparkSession.builder.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
             .config("spark.connect.copyFromLocalToFs.allowDestLocal", "true")
             .getOrCreate()
         )
diff --git a/python/pyspark/ml/tests/connect/test_connect_summarizer.py b/python/pyspark/ml/tests/connect/test_connect_summarizer.py
index 0b0537dfee3cd..28cfa4b4dc1b3 100644
--- a/python/pyspark/ml/tests/connect/test_connect_summarizer.py
+++ b/python/pyspark/ml/tests/connect/test_connect_summarizer.py
@@ -14,15 +14,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
+
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_summarizer import SummarizerTestsMixin
 
 
 class SummarizerTestsOnConnect(SummarizerTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote("local[2]").getOrCreate()
+        self.spark = SparkSession.builder.remote(
+            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+        ).getOrCreate()
 
     def tearDown(self) -> None:
         self.spark.stop()
diff --git a/python/pyspark/ml/tests/connect/test_connect_tuning.py b/python/pyspark/ml/tests/connect/test_connect_tuning.py
index 18673d4b26be9..901367e44d20b 100644
--- a/python/pyspark/ml/tests/connect/test_connect_tuning.py
+++ b/python/pyspark/ml/tests/connect/test_connect_tuning.py
@@ -15,16 +15,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_tuning import CrossValidatorTestsMixin
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class CrossValidatorTestsOnConnect(CrossValidatorTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
-            SparkSession.builder.remote("local[2]")
+            SparkSession.builder.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
             .config("spark.connect.copyFromLocalToFs.allowDestLocal", "true")
             .getOrCreate()
         )
diff --git a/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py b/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
index 18556633d89f8..60f683bf726ca 100644
--- a/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
+++ b/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
@@ -15,10 +15,11 @@
 # limitations under the License.
 #
 
+import os
 import unittest
 from pyspark.sql import SparkSession
 
-have_torch = True
+have_torch = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torch  # noqa: F401
 except ImportError:
diff --git a/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py b/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
index b855332f96c42..238775ded2a21 100644
--- a/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
+++ b/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
@@ -19,7 +19,7 @@
 import shutil
 import unittest
 
-have_torch = True
+have_torch = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torch  # noqa: F401
 except ImportError:
@@ -81,7 +81,7 @@ def _get_inputs_for_test_local_training_succeeds(self):
         ]
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class TorchDistributorLocalUnitTestsIIOnConnect(
     TorchDistributorLocalUnitTestsMixin, unittest.TestCase
 ):
diff --git a/python/pyspark/ml/torch/tests/test_data_loader.py b/python/pyspark/ml/torch/tests/test_data_loader.py
index 67ab6e378ceaa..f7814f8195416 100644
--- a/python/pyspark/ml/torch/tests/test_data_loader.py
+++ b/python/pyspark/ml/torch/tests/test_data_loader.py
@@ -15,10 +15,11 @@
 # limitations under the License.
 #
 
+import os
 import numpy as np
 import unittest
 
-have_torch = True
+have_torch = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torch  # noqa: F401
 except ImportError:
diff --git a/python/pyspark/sql/connect/avro/functions.py b/python/pyspark/sql/connect/avro/functions.py
index bf019ef8fe7d7..821660fdbd302 100644
--- a/python/pyspark/sql/connect/avro/functions.py
+++ b/python/pyspark/sql/connect/avro/functions.py
@@ -85,7 +85,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.avro.functions tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 2a54a0d727af9..069a8d013ff32 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -326,6 +326,7 @@ def registerFunction(
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -333,7 +334,9 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.catalog.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.catalog tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.catalog tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     (failure_count, test_count) = doctest.testmod(
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index 0529293816338..464f5397b85b6 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -483,6 +483,7 @@ def __nonzero__(self) -> None:
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -490,7 +491,9 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.column.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.column tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.column tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     (failure_count, test_count) = doctest.testmod(
diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py
index d323de716c46a..cb296a750e62f 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -97,6 +97,7 @@ def _checkType(self, obj: Any, identifier: str) -> None:
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -104,7 +105,9 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.conf.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.conf tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.conf tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     (failure_count, test_count) = doctest.testmod(
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index 7b326538a8e0a..ff61916420258 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -2150,7 +2150,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.dataframe tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
index e2583f84c417c..ecb800bbee93d 100644
--- a/python/pyspark/sql/connect/functions.py
+++ b/python/pyspark/sql/connect/functions.py
@@ -3906,6 +3906,7 @@ def call_function(funcName: str, *cols: "ColumnOrName") -> Column:
 
 def _test() -> None:
     import sys
+    import os
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
     import pyspark.sql.connect.functions
@@ -3914,7 +3915,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.functions tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index a393d2cb37e89..2d5a66fd6ef92 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -388,6 +388,7 @@ def _extract_cols(gd: "GroupedData") -> List[Column]:
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -396,7 +397,9 @@ def _test() -> None:
     globs = pyspark.sql.connect.group.__dict__.copy()
 
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.group tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.group tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     (failure_count, test_count) = doctest.testmod(
diff --git a/python/pyspark/sql/connect/protobuf/functions.py b/python/pyspark/sql/connect/protobuf/functions.py
index 56119f4bc4eb9..c8e12640b3136 100644
--- a/python/pyspark/sql/connect/protobuf/functions.py
+++ b/python/pyspark/sql/connect/protobuf/functions.py
@@ -144,7 +144,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.protobuf.functions tests")
-        .remote("local[2]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index cfcbcede34873..7cfdf9910d7e0 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -830,6 +830,7 @@ def overwritePartitions(self) -> None:
 
 def _test() -> None:
     import sys
+    import os
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
     import pyspark.sql.connect.readwriter
@@ -838,7 +839,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.readwriter tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index 1307c8bdd84e1..10d599ca397b9 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -910,6 +910,7 @@ def session_id(self) -> str:
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -917,7 +918,9 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.session.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.session tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.session tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     # Uses PySpark session to test builder.
diff --git a/python/pyspark/sql/connect/streaming/query.py b/python/pyspark/sql/connect/streaming/query.py
index 021d27e939de8..7d968b175f281 100644
--- a/python/pyspark/sql/connect/streaming/query.py
+++ b/python/pyspark/sql/connect/streaming/query.py
@@ -276,7 +276,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.streaming.query tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py b/python/pyspark/sql/connect/streaming/readwriter.py
index 89097fcf43a01..afee833fda4e9 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -586,6 +586,7 @@ def toTable(
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -595,7 +596,7 @@ def _test() -> None:
 
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.streaming.readwriter tests")
-        .remote("local[4]")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
 
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
index ad082c6e265db..922a641c2428c 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -235,6 +235,7 @@ def rangeBetween(start: int, end: int) -> "WindowSpec":
 
 
 def _test() -> None:
+    import os
     import sys
     import doctest
     from pyspark.sql import SparkSession as PySparkSession
@@ -242,7 +243,9 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.window.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.window tests").remote("local[4]").getOrCreate()
+        PySparkSession.builder.appName("sql.connect.window tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
     )
 
     (failure_count, test_count) = doctest.testmod(
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 97f60967da70e..afa979dab019e 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1513,7 +1513,7 @@ def cache(self) -> "DataFrame":
         >>> df.cache()
         DataFrame[id: bigint]
 
-        >>> df.explain()
+        >>> df.explain()  # doctest: +SKIP
         == Physical Plan ==
         AdaptiveSparkPlan isFinalPlan=false
         +- InMemoryTableScan ...
@@ -1556,7 +1556,7 @@ def persist(
         >>> df.persist()
         DataFrame[id: bigint]
 
-        >>> df.explain()
+        >>> df.explain()  # doctest: +SKIP
         == Physical Plan ==
         AdaptiveSparkPlan isFinalPlan=false
         +- InMemoryTableScan ...
@@ -3887,8 +3887,8 @@ def union(self, other: "DataFrame") -> "DataFrame":
         >>> df2 = spark.createDataFrame([(3, "Charlie"), (4, "Dave")], ["id", "name"])
         >>> df1 = df1.withColumn("age", lit(30))
         >>> df2 = df2.withColumn("age", lit(40))
-        >>> df3 = df1.union(df2)
-        >>> df3.show()
+        >>> df3 = df1.union(df2)  # doctest: +SKIP
+        >>> df3.show()  # doctest: +SKIP
         +-----+-------+---+
         | name|     id|age|
         +-----+-------+---+
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 06cb3063d1b16..7e1a8faf00178 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -7973,7 +7973,7 @@ def to_unix_timestamp(
 
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
-    >>> df.select(to_unix_timestamp(df.e).alias('r')).collect()
+    >>> df.select(to_unix_timestamp(df.e).alias('r')).collect()  # doctest: +SKIP
     [Row(r=None)]
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
@@ -8084,7 +8084,7 @@ def current_database() -> Column:
 
     Examples
     --------
-    >>> spark.range(1).select(current_database()).show()
+    >>> spark.range(1).select(current_database()).show()  # doctest: +SKIP
     +------------------+
     |current_database()|
     +------------------+
@@ -8103,7 +8103,7 @@ def current_schema() -> Column:
     Examples
     --------
     >>> import pyspark.sql.functions as sf
-    >>> spark.range(1).select(sf.current_schema()).show()
+    >>> spark.range(1).select(sf.current_schema()).show()  # doctest: +SKIP
     +------------------+
     |current_database()|
     +------------------+
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact.py b/python/pyspark/sql/tests/connect/client/test_artifact.py
index d45230e926b16..cf3eea0b55607 100644
--- a/python/pyspark/sql/tests/connect/client/test_artifact.py
+++ b/python/pyspark/sql/tests/connect/client/test_artifact.py
@@ -146,6 +146,7 @@ def test_add_file(self):
         )
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class ArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
     @classmethod
     def root(cls):
@@ -389,6 +390,7 @@ def test_cache_artifact(self):
         self.assertEqual(self.artifact_manager.is_cached_artifact(expected_hash), True)
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires local-cluster")
 class LocalClusterArtifactTests(ReusedConnectTestCase, ArtifactTestsMixin):
     @classmethod
     def conf(cls):
@@ -403,7 +405,7 @@ def root(cls):
 
     @classmethod
     def master(cls):
-        return "local-cluster[2,2,512]"
+        return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local-cluster[2,2,512]")
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index 5069a76cfdb73..35ca2681cc97c 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
 import time
 
@@ -45,6 +45,10 @@ def onQueryTerminated(self, event):
         df.write.mode("append").saveAsTable("listener_terminated_events")
 
 
+# TODO(SPARK-48089): Reenable this test case
+@unittest.skipIf(
+    "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+)
 class StreamingListenerParityTests(StreamingListenerTestsMixin, ReusedConnectTestCase):
     def test_listener_events(self):
         test_listener = TestListener()
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
index 6fe2b89408014..e7c1958064bb2 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
@@ -14,13 +14,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import unittest
 
 from pyspark.sql.tests.streaming.test_streaming import StreamingTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class StreamingParityTests(StreamingTestsMixin, ReusedConnectTestCase):
-    pass
+    # TODO(SPARK-48090): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_stream_exception(self):
+        super(StreamingParityTests, self).test_stream_exception()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 2904eb42587e8..48e5248e28f53 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -80,6 +80,7 @@
     from pyspark.sql.connect.client.core import Retrying, SparkConnectClient
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class SparkConnectSQLTestCase(ReusedConnectTestCase, SQLTestUtils, PandasOnSparkTestUtils):
     """Parent test fixture class for all Spark Connect related
     test cases."""
@@ -3250,12 +3251,15 @@ def test_df_caache(self):
         self.assertTrue(df.is_cached)
 
 
+@unittest.skipIf(
+    "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Session creation different from local mode"
+)
 class SparkConnectSessionTests(ReusedConnectTestCase):
     def setUp(self) -> None:
         self.spark = (
             PySparkSession.builder.config(conf=self.conf())
             .appName(self.__class__.__name__)
-            .remote("local[4]")
+            .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
             .getOrCreate()
         )
 
@@ -3347,6 +3351,7 @@ def test_can_create_multiple_sessions_to_different_remotes(self):
             self.assertIn("Create a new SparkSession is only supported with SparkConnect.", str(e))
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class SparkConnectSessionWithOptionsTest(unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index a5d330fe1a7e9..dc101e98e01d4 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -36,6 +36,7 @@
     from pyspark.sql.connect.dataframe import DataFrame as CDF
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, SQLTestUtils):
     """These test cases exercise the interface to the proto plan
     generation but do not call Spark."""
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
index c950ca2e17c3c..960f7f11e873f 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 import unittest
 from pyspark.sql.tests.pandas.test_pandas_udf_scalar import ScalarPandasUDFTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
@@ -31,6 +32,10 @@ def test_vectorized_udf_empty_partition(self):
     def test_vectorized_udf_struct_with_empty_partition(self):
         super().test_vectorized_udf_struct_with_empty_partition()
 
+    # TODO(SPARK-48086): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_vectorized_udf_exception(self):
         self.check_vectorized_udf_exception()
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_udtf.py b/python/pyspark/sql/tests/connect/test_parity_udtf.py
index 1222b1bb5b44f..5955b502e48b2 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udtf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udtf.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import unittest
 from pyspark.testing.connectutils import should_test_connect
 
 if should_test_connect:
@@ -57,6 +59,90 @@ def eval(self, a: int):
         ):
             TestUDTF(lit(1)).collect()
 
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_udtf_init_with_additional_args(self):
+        super(UDTFParityTests, self).test_udtf_init_with_additional_args()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_udtf_with_wrong_num_input(self):
+        super(UDTFParityTests, self).test_udtf_with_wrong_num_input()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_array_output_type_casting(self):
+        super(UDTFParityTests, self).test_array_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_map_output_type_casting(self):
+        super(UDTFParityTests, self).test_map_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_numeric_output_type_casting(self):
+        super(UDTFParityTests, self).test_numeric_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_numeric_output_type_casting(self):
+        super(UDTFParityTests, self).test_numeric_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_numeric_string_output_type_casting(self):
+        super(UDTFParityTests, self).test_numeric_string_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_string_output_type_casting(self):
+        super(UDTFParityTests, self).test_string_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_string_output_type_casting(self):
+        super(UDTFParityTests, self).test_string_output_type_casting()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_struct_output_type_casting_dict(self):
+        super(UDTFParityTests, self).test_struct_output_type_casting_dict()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_udtf_init_with_additional_args(self):
+        super(UDTFParityTests, self).test_udtf_init_with_additional_args()
+
+    # TODO(SPARK-48087): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_udtf_with_wrong_num_input(self):
+        super(UDTFParityTests, self).test_udtf_with_wrong_num_input()
+
 
 class ArrowUDTFParityTests(UDTFArrowTestsMixin, UDTFParityTests):
     @classmethod
diff --git a/python/pyspark/sql/tests/connect/test_utils.py b/python/pyspark/sql/tests/connect/test_utils.py
index 917cb58057f7f..19fa9cd93f321 100644
--- a/python/pyspark/sql/tests/connect/test_utils.py
+++ b/python/pyspark/sql/tests/connect/test_utils.py
@@ -14,13 +14,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
+import unittest
 
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.sql.tests.test_utils import UtilsTestsMixin
 
 
 class ConnectUtilsTests(ReusedConnectTestCase, UtilsTestsMixin):
-    pass
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
+    def test_assert_approx_equal_decimaltype_custom_rtol_pass(self):
+        super(ConnectUtilsTests, self).test_assert_approx_equal_decimaltype_custom_rtol_pass()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index fb2f9214c5d8f..c3ba7b3e93a00 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -110,7 +110,7 @@ def func(iterator):
             df = (
                 self.spark.range(10, numPartitions=3)
                 .select(col("id").cast("string").alias("str"))
-                .withColumn("bin", encode(col("str"), "utf8"))
+                .withColumn("bin", encode(col("str"), "utf-8"))
             )
             actual = df.mapInPandas(func, "str string, bin binary").collect()
             expected = df.collect()
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index 34cd9c2358195..4673375ccf694 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import unittest
 import datetime
 from typing import cast
@@ -262,6 +262,10 @@ def foo(x):
             .collect,
         )
 
+    # TODO(SPARK-48086): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_pandas_udf_detect_unsafe_type_conversion(self):
         import pandas as pd
         import numpy as np
@@ -285,6 +289,10 @@ def udf(column):
         with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
             df.select(["A"]).withColumn("udf", udf("A")).collect()
 
+    # TODO(SPARK-48086): Reenable this test case
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_pandas_udf_arrow_overflow(self):
         import pandas as pd
 
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index 6418983b06a44..c920fa75f4b29 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -14,7 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
+import unittest
 import shutil
 import tempfile
 import uuid
@@ -146,6 +147,9 @@ def test_csv_sampling_ratio(self):
         schema = self.spark.read.option("inferSchema", True).csv(rdd, samplingRatio=0.5).schema
         self.assertEqual(schema, StructType([StructField("_c0", IntegerType(), True)]))
 
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_checking_csv_header(self):
         path = tempfile.mkdtemp()
         shutil.rmtree(path)
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 90ecfd657765d..00bd1d9a6f834 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -812,6 +812,9 @@ def test_struct_type(self):
         self.assertRaises(IndexError, lambda: struct1[9])
         self.assertRaises(TypeError, lambda: struct1[9.9])
 
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_parse_datatype_string(self):
         from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
 
diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
index ba81c7836728e..a063f27c9ea22 100644
--- a/python/pyspark/testing/connectutils.py
+++ b/python/pyspark/testing/connectutils.py
@@ -178,7 +178,7 @@ def conf(cls):
 
     @classmethod
     def master(cls):
-        return "local[4]"
+        return os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]")
 
     @classmethod
     def setUpClass(cls):
diff --git a/python/run-tests.py b/python/run-tests.py
index b9031765d9437..ca8ddb5ff8635 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -60,15 +60,17 @@ def get_valid_filename(s):
 FAILURE_REPORTING_LOCK = Lock()
 LOGGER = logging.getLogger()
 
-# Find out where the assembly jars are located.
-# TODO: revisit for Scala 2.13
-for scala in ["2.12", "2.13"]:
-    build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
-    if os.path.isdir(build_dir):
-        SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
-        break
-else:
-    raise RuntimeError("Cannot find assembly build directory, please build Spark first.")
+SPARK_DIST_CLASSPATH = ""
+if "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ:
+    # Find out where the assembly jars are located.
+    # TODO: revisit for Scala 2.13
+    for scala in ["2.12", "2.13"]:
+        build_dir = os.path.join(SPARK_HOME, "assembly", "target", "scala-" + scala)
+        if os.path.isdir(build_dir):
+            SPARK_DIST_CLASSPATH = os.path.join(build_dir, "jars", "*")
+            break
+    else:
+        raise RuntimeError("Cannot find assembly build directory, please build Spark first.")
 
 
 def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_output):
@@ -98,6 +100,11 @@ def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_
         'PYARROW_IGNORE_TIMEZONE': '1',
     })
 
+    if "SPARK_CONNECT_TESTING_REMOTE" in os.environ:
+        env.update({"SPARK_CONNECT_TESTING_REMOTE": os.environ["SPARK_CONNECT_TESTING_REMOTE"]})
+    if "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ:
+        env.update({"SPARK_SKIP_JVM_REQUIRED_TESTS": os.environ["SPARK_SKIP_CONNECT_COMPAT_TESTS"]})
+
     # Create a unique temp directory under 'target/' for each run. The TMPDIR variable is
     # recognized by the tempfile module to override the default system temp directory.
     tmp_dir = os.path.join(target_dir, str(uuid.uuid4()))

From 2f2347f3b74f1478fb583de9378427b3e45bd980 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Sat, 4 May 2024 11:49:20 -0700
Subject: [PATCH 309/521] [SPARK-48128][SQL] For BitwiseCount / bit_count
 expression, fix codegen syntax error for boolean type inputs

### What changes were proposed in this pull request?

This PR fixes an issue where `BitwiseCount` / `bit_count` of boolean inputs would cause codegen to generate syntactically invalid Java code that does not compile, triggering errors like

```
 java.util.concurrent.ExecutionException: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 41, Column 11: Failed to compile: org.codehaus.commons.compiler.CompileException: File 'generated.java', Line 41, Column 11: Unexpected token "if" in primary
```

Even though this code has test cases in `bitwise.sql` via the query test framework, those existing test cases were insufficient to find this problem: I believe that is because the example queries were constant-folded using the interpreted path, leaving the codegen path without test coverage.

This PR fixes the codegen issue and adds explicit expression tests to ensure that the same tests run on both the codegen and interpreted paths.

### Why are the changes needed?

Fix a rare codegen to interpreted fallback issue, which may harm query performance.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added new test cases to BitwiseExpressionsSuite.scala, copied from the existing `bitwise.sql` query test case file.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46382 from JoshRosen/SPARK-48128-bit_count_codegen.

Authored-by: Josh Rosen <joshrosen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 96f65c950064d330245dc53fcd50cf6d9753afc8)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../expressions/bitwiseExpressions.scala      |  2 +-
 .../expressions/BitwiseExpressionsSuite.scala | 41 +++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 6061f625ef07b..183e5d6697e99 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -229,7 +229,7 @@ case class BitwiseCount(child: Expression)
   override def prettyName: String = "bit_count"
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.dataType match {
-    case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0")
+    case BooleanType => defineCodeGen(ctx, ev, c => s"($c) ? 1 : 0")
     case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)")
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
index 4cd5f3e861ac8..5bd1bc346c02f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
@@ -133,6 +133,47 @@ class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("BitCount") {
+    // null
+    val nullLongLiteral = Literal.create(null, LongType)
+    val nullIntLiteral = Literal.create(null, IntegerType)
+    val nullBooleanLiteral = Literal.create(null, BooleanType)
+    checkEvaluation(BitwiseCount(nullLongLiteral), null)
+    checkEvaluation(BitwiseCount(nullIntLiteral), null)
+    checkEvaluation(BitwiseCount(nullBooleanLiteral), null)
+
+    // boolean
+    checkEvaluation(BitwiseCount(Literal(true)), 1)
+    checkEvaluation(BitwiseCount(Literal(false)), 0)
+
+    // byte/tinyint
+    checkEvaluation(BitwiseCount(Literal(1.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toByte)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toByte)), 2)
+
+    // short/smallint
+    checkEvaluation(BitwiseCount(Literal(1.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(2.toShort)), 1)
+    checkEvaluation(BitwiseCount(Literal(3.toShort)), 2)
+
+    // int
+    checkEvaluation(BitwiseCount(Literal(1)), 1)
+    checkEvaluation(BitwiseCount(Literal(2)), 1)
+    checkEvaluation(BitwiseCount(Literal(3)), 2)
+
+    // long/bigint
+    checkEvaluation(BitwiseCount(Literal(1L)), 1)
+    checkEvaluation(BitwiseCount(Literal(2L)), 1)
+    checkEvaluation(BitwiseCount(Literal(3L)), 2)
+
+    // negative num
+    checkEvaluation(BitwiseCount(Literal(-1L)), 64)
+
+    // edge value
+    checkEvaluation(BitwiseCount(Literal(9223372036854775807L)), 63)
+    checkEvaluation(BitwiseCount(Literal(-9223372036854775808L)), 1)
+  }
+
   test("BitGet") {
     val nullLongLiteral = Literal.create(null, LongType)
     val nullIntLiteral = Literal.create(null, IntegerType)

From 45befc07d2a064ab2a279a113489ed5c66f7a69d Mon Sep 17 00:00:00 2001
From: Gene Pang <gene.pang@databricks.com>
Date: Sun, 5 May 2024 21:50:15 +0800
Subject: [PATCH 310/521] [SPARK-48019][SQL][FOLLOWUP] Use primitive arrays
 over object arrays when nulls exist

### What changes were proposed in this pull request?

This is a followup to https://github.com/apache/spark/pull/46254 . Instead of using object arrays when nulls are present, continue to use primitive arrays when appropriate. This PR sets the null bits appropriately for the primitive array copy.

Primitive arrays are faster than object arrays and won't create unnecessary objects.

### Why are the changes needed?

This will improve performance and memory usage, when nulls are present in the `ColumnarArray`.

### Does this PR introduce _any_ user-facing change?

This is expected to be faster when copying `ColumnarArray`.

### How was this patch tested?

Existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46372 from gene-db/primitive-nulls.

Authored-by: Gene Pang <gene.pang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit bf2e25459fe46ca2b1d26e1c98c873923fc135e1)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/vectorized/ColumnarArray.java   | 36 ++++++++++++-------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index c4de83cf8b82d..1f8e679a4146f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -47,31 +47,43 @@ public int numElements() {
     return length;
   }
 
+  /**
+   * Sets all the appropriate null bits in the input UnsafeArrayData.
+   *
+   * @param arrayData The UnsafeArrayData to set the null bits for
+   * @return The UnsafeArrayData with the null bits set
+   */
+  private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
+    if (data.hasNull()) {
+      for (int i = 0; i < length; i++) {
+        if (data.isNullAt(i)) {
+          arrayData.setNullAt(i);
+        }
+      }
+    }
+    return arrayData;
+  }
+
   @Override
   public ArrayData copy() {
     DataType dt = data.dataType();
 
-    if (data.hasNull()) {
-      // UnsafeArrayData cannot be used if there are any nulls.
-      return new GenericArrayData(toObjectArray(dt)).copy();
-    }
-
     if (dt instanceof BooleanType) {
-      return UnsafeArrayData.fromPrimitiveArray(toBooleanArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toBooleanArray()));
     } else if (dt instanceof ByteType) {
-      return UnsafeArrayData.fromPrimitiveArray(toByteArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toByteArray()));
     } else if (dt instanceof ShortType) {
-      return UnsafeArrayData.fromPrimitiveArray(toShortArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toShortArray()));
     } else if (dt instanceof IntegerType || dt instanceof DateType
             || dt instanceof YearMonthIntervalType) {
-      return UnsafeArrayData.fromPrimitiveArray(toIntArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toIntArray()));
     } else if (dt instanceof LongType || dt instanceof TimestampType
             || dt instanceof DayTimeIntervalType) {
-      return UnsafeArrayData.fromPrimitiveArray(toLongArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toLongArray()));
     } else if (dt instanceof FloatType) {
-      return UnsafeArrayData.fromPrimitiveArray(toFloatArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toFloatArray()));
     } else if (dt instanceof DoubleType) {
-      return UnsafeArrayData.fromPrimitiveArray(toDoubleArray());
+      return setNullBits(UnsafeArrayData.fromPrimitiveArray(toDoubleArray()));
     } else {
       return new GenericArrayData(toObjectArray(dt)).copy(); // ensure the elements are copied.
     }

From e699a1eee085eb6025f33284c6369553713794d1 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 6 May 2024 19:06:29 -0700
Subject: [PATCH 311/521] [SPARK-48088][PYTHON][CONNECT][TESTS][FOLLOW-UP][3.5]
 Skips another that that requires JVM access

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/46334 that missed one more test case.

### Why are the changes needed?

See https://github.com/apache/spark/pull/46334

### Does this PR introduce _any_ user-facing change?

See https://github.com/apache/spark/pull/46334

### How was this patch tested?

Manually

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46411 from HyukjinKwon/SPARK-48088-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/ml/tests/connect/test_connect_pipeline.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/python/pyspark/ml/tests/connect/test_connect_pipeline.py b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
index dc7490bf14b12..eb2bedddbe283 100644
--- a/python/pyspark/ml/tests/connect/test_connect_pipeline.py
+++ b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
@@ -22,6 +22,7 @@
 from pyspark.ml.tests.connect.test_legacy_mode_pipeline import PipelineTestsMixin
 
 
+@unittest.skipIf("SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Requires JVM access")
 class PipelineTestsOnConnect(PipelineTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (

From 997100119e6c893c46b12cc32a4f96721c8f3a22 Mon Sep 17 00:00:00 2001
From: Weichen Xu <weichen.xu@databricks.com>
Date: Tue, 7 May 2024 15:58:01 +0900
Subject: [PATCH 312/521] [SPARK-48083][SPARK-48084][ML][TESTS] Remove JIRA
 comments for reenabling ML compatibility tests

### What changes were proposed in this pull request?

Enable spark ml test

### Why are the changes needed?

For test_connect_classification.py,
session.copyFromLocalToFs failure with 3.5 client <> 4.0 server

this is not an issue,
copyFromLocalToFs requires spark server to config spark.connect.copyFromLocalToFs.allowDestLocal to False, because the test can only use local fs.

For test_connect_evaluation.py,
This test error pyspark.ml.connect.evaluation not working in 3.5 client <> 4.0 server is caused by cloudpickle forward incompatibility, it is not related to ML code

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

CI.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46419 from WeichenXu123/reenable-test-3.5.

Authored-by: Weichen Xu <weichen.xu@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/ml/tests/connect/test_connect_classification.py | 1 -
 python/pyspark/ml/tests/connect/test_connect_evaluation.py     | 1 -
 2 files changed, 2 deletions(-)

diff --git a/python/pyspark/ml/tests/connect/test_connect_classification.py b/python/pyspark/ml/tests/connect/test_connect_classification.py
index 2763d3f613ae8..f0d60a117e12f 100644
--- a/python/pyspark/ml/tests/connect/test_connect_classification.py
+++ b/python/pyspark/ml/tests/connect/test_connect_classification.py
@@ -21,7 +21,6 @@
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_classification import ClassificationTestsMixin
 
-# TODO(SPARK-48083): Reenable this test case
 have_torch = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torch  # noqa: F401
diff --git a/python/pyspark/ml/tests/connect/test_connect_evaluation.py b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
index 35af54605ca81..58076dfe0bbe6 100644
--- a/python/pyspark/ml/tests/connect/test_connect_evaluation.py
+++ b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
@@ -20,7 +20,6 @@
 from pyspark.sql import SparkSession
 from pyspark.ml.tests.connect.test_legacy_mode_evaluation import EvaluationTestsMixin
 
-# TODO(SPARK-48084): Reenable this test case
 have_torcheval = "SPARK_SKIP_CONNECT_COMPAT_TESTS" not in os.environ
 try:
     import torcheval  # noqa: F401

From f92580aa111f8531cdb229a2d1bb0234764d7262 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 7 May 2024 15:59:20 +0900
Subject: [PATCH 313/521] [SPARK-48090][SS][PYTHON][TESTS] Shorten the
 traceback in the test checking error message in UDF

This PR reduces traceback so the actual error `ZeroDivisionError` can be tested in `pyspark.sql.tests.connect.streaming.test_parity_streaming.StreamingParityTests.test_stream_exception`

So long traceback doesn't affect the test case. It can fail as below:

```
======================================================================
FAIL [1.883s]: test_stream_exception (pyspark.sql.tests.connect.streaming.test_parity_streaming.StreamingParityTests.test_stream_exception)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/streaming/test_streaming.py", line 287, in test_stream_exception
    sq.processAllAvailable()
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/streaming/query.py", line 129, in processAllAvailable
    self._execute_streaming_query_cmd(cmd)
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/streaming/query.py", line 177, in _execute_streaming_query_cmd
    (_, properties) = self._session.client.execute_command(exec_cmd)
                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 982, in execute_command
    data, _, _, _, properties = self._execute_and_fetch(req)
                                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1283, in _execute_and_fetch
    for response in self._execute_and_fetch_as_iterator(req):
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1264, in _execute_and_fetch_as_iterator
    self._handle_error(error)
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line [150](https://github.com/HyukjinKwon/spark/actions/runs/8978991632/job/24660689666#step:9:151)3, in _handle_error
    self._handle_rpc_error(error)
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py", line 1539, in _handle_rpc_error
    raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.StreamingQueryException: [STREAM_FAILED] Query [id = 1c0c440d-0b48-41b1-9a03-071e7e13de82, runId = 692ec338-963a-43b1-89cb-2a8b7cb1e21a] terminated with exception: Job aborted due to stage failure: Task 0 in stage 39.0 failed 1 times, most recent failure: Lost task 0.0 in stage 39.0 (TID 58) (fv-az714-234.22nzjvkrszmuhkvqy55p1tioig.phxx.internal.cloudapp.net executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1834, in main
    process()
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1826, in process
    serializer.dump_stream(out_iter, outfile)
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 224, in dump_stream
    self.serializer.dump_stream(self._batched(iterator), stream)
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 145, in dump_stream
    for obj in iterator:
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/serializers.py", line 213, in _batched
    for item in iterator:
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1734, in mapper
    result = tuple(f(*[a[o] for o in arg_offsets]) for arg_offsets, f in udfs)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1734, in <genexpr>
    result = tuple(f(*[a[o] for o in arg_offsets]) for arg_offsets, f in udfs)
                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 112, in <lambda>
    return args_kwargs_offsets, lambda *a: func(*a)
                                           ^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/util.py", line 134, in wrapper
    return f(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark-3....

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/streaming/test_streaming.py", line 291, in test_stream_exception
    self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/streaming/test_streaming.py", line 300, in _assert_exception_tree_contains_msg
    self._assert_exception_tree_contains_msg_connect(exception, msg)
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/streaming/test_streaming.py", line 305, in _assert_exception_tree_contains_msg_connect
    self.assertTrue(
AssertionError: False is not true : Exception tree doesn't contain the expected message: ZeroDivisionError

----------------------------------------------------------------------
```

No, test-only.

Tested in my own fork: https://github.com/HyukjinKwon/spark/actions/runs/8978991632

No.

Closes #46426 from HyukjinKwon/SPARK-48090.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit eee179135ed21dbdd8b342d053c9eda849e2de77)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../streaming/test_parity_streaming.py        | 10 +--
 .../sql/tests/streaming/test_streaming.py     | 61 ++++++++++---------
 2 files changed, 32 insertions(+), 39 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
index e7c1958064bb2..6b23c15775fe6 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_streaming.py
@@ -14,20 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import os
-import unittest
-
 from pyspark.sql.tests.streaming.test_streaming import StreamingTestsMixin
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class StreamingParityTests(StreamingTestsMixin, ReusedConnectTestCase):
-    # TODO(SPARK-48090): Reenable this test case
-    @unittest.skipIf(
-        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
-    )
-    def test_stream_exception(self):
-        super(StreamingParityTests, self).test_stream_exception()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/streaming/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py
index 0eea86dc73756..69a5a2b90986e 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming.py
@@ -264,36 +264,37 @@ def test_stream_await_termination(self):
             shutil.rmtree(tmpPath)
 
     def test_stream_exception(self):
-        sdf = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
-        sq = sdf.writeStream.format("memory").queryName("query_explain").start()
-        try:
-            sq.processAllAvailable()
-            self.assertEqual(sq.exception(), None)
-        finally:
-            sq.stop()
-
-        from pyspark.sql.functions import col, udf
-        from pyspark.errors import StreamingQueryException
-
-        bad_udf = udf(lambda x: 1 / 0)
-        sq = (
-            sdf.select(bad_udf(col("value")))
-            .writeStream.format("memory")
-            .queryName("this_query")
-            .start()
-        )
-        try:
-            # Process some data to fail the query
-            sq.processAllAvailable()
-            self.fail("bad udf should fail the query")
-        except StreamingQueryException as e:
-            # This is expected
-            self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
-        finally:
-            exception = sq.exception()
-            sq.stop()
-        self.assertIsInstance(exception, StreamingQueryException)
-        self._assert_exception_tree_contains_msg(exception, "ZeroDivisionError")
+        with self.sql_conf({"spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled": True}):
+            sdf = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
+            sq = sdf.writeStream.format("memory").queryName("query_explain").start()
+            try:
+                sq.processAllAvailable()
+                self.assertEqual(sq.exception(), None)
+            finally:
+                sq.stop()
+
+            from pyspark.sql.functions import col, udf
+            from pyspark.errors import StreamingQueryException
+
+            bad_udf = udf(lambda x: 1 / 0)
+            sq = (
+                sdf.select(bad_udf(col("value")))
+                .writeStream.format("memory")
+                .queryName("this_query")
+                .start()
+            )
+            try:
+                # Process some data to fail the query
+                sq.processAllAvailable()
+                self.fail("bad udf should fail the query")
+            except StreamingQueryException as e:
+                # This is expected
+                self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
+            finally:
+                exception = sq.exception()
+                sq.stop()
+            self.assertIsInstance(exception, StreamingQueryException)
+            self._assert_exception_tree_contains_msg(exception, "ZeroDivisionError")
 
     def _assert_exception_tree_contains_msg(self, exception, msg):
         if isinstance(exception, SparkConnectException):

From ec7e888d70822796146ffb2769ccae759baf24f4 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 7 May 2024 16:16:02 +0900
Subject: [PATCH 314/521] [SPARK-48167][CONNECT][TESTS] Skip known behaviour
 change by SPARK-46122

### What changes were proposed in this pull request?

This PR proposes to skip `test_create_without_provider` in backward compatibility test in https://github.com/apache/spark/actions/workflows/build_python_connect35.yml

### Why are the changes needed?

This is a intentional behaviour change (SPARK-46122)

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Tested in my fork

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46430 from HyukjinKwon/SPARK-48167.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_readwriter.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 921d2eba5ac7f..e903d3383b747 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -16,6 +16,7 @@
 #
 
 import os
+import unittest
 import shutil
 import tempfile
 
@@ -245,6 +246,8 @@ def test_create(self):
             df.writeTo("test_table").using("parquet").create()
             self.assertEqual(100, self.spark.sql("select * from test_table").count())
 
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Known behavior change in 4.0")
     def test_create_without_provider(self):
         df = self.df
         with self.assertRaisesRegex(

From 1735d7d3fd660560e15457793ccd9b758bb360f8 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 7 May 2024 18:50:47 +0900
Subject: [PATCH 315/521] [SPARK-48086][PYTHON][TESTS][3.5] Remove obsolete
 comment

### What changes were proposed in this pull request?

This PR proposes to remove those comments for skipped tests related to different Arrow version in JVM.

### Why are the changes needed?

Arrow version incompatibility is something we cannot avoid. We should just skip those tests for now.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Linters.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46431 from HyukjinKwon/SPARK-48086.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py  | 1 -
 python/pyspark/sql/tests/pandas/test_pandas_udf.py              | 2 --
 2 files changed, 3 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
index 960f7f11e873f..a508fe1059ed8 100644
--- a/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf_scalar.py
@@ -32,7 +32,6 @@ def test_vectorized_udf_empty_partition(self):
     def test_vectorized_udf_struct_with_empty_partition(self):
         super().test_vectorized_udf_struct_with_empty_partition()
 
-    # TODO(SPARK-48086): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index 4673375ccf694..b54e5608f3d09 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -262,7 +262,6 @@ def foo(x):
             .collect,
         )
 
-    # TODO(SPARK-48086): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
@@ -289,7 +288,6 @@ def udf(column):
         with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
             df.select(["A"]).withColumn("udf", udf("A")).collect()
 
-    # TODO(SPARK-48086): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )

From 03bc2b188d2111b5c4cc5bc13ebd0455602028a8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 7 May 2024 13:38:08 -0700
Subject: [PATCH 316/521] [SPARK-48167][PYTHON][TESTS][FOLLOWUP][3.5] Reformat
 test_readwriter.py to fix Python Linter error

### What changes were proposed in this pull request?

This is a follow-up of https://github.com/apache/spark/pull/46430 to fix Python linter failure.

### Why are the changes needed?

To recover `branch-3.5` CI,
- https://github.com/apache/spark/actions/runs/8981228745/job/24666400664

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass Python Linter in this PR builder.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46445 from dongjoon-hyun/SPARK-48167.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/tests/test_readwriter.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index e903d3383b747..7911a82c61fcc 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -247,7 +247,8 @@ def test_create(self):
             self.assertEqual(100, self.spark.sql("select * from test_table").count())
 
     @unittest.skipIf(
-        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Known behavior change in 4.0")
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Known behavior change in 4.0"
+    )
     def test_create_without_provider(self):
         df = self.df
         with self.assertRaisesRegex(

From a24ec1d8f76c7bf47e491086f14ea202b6806cd8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 7 May 2024 15:23:24 -0700
Subject: [PATCH 317/521] [SPARK-48179][INFRA][3.5] Pin `nbsphinx` to `0.9.3`

### What changes were proposed in this pull request?

This PR aims to pin `nbsphinx` to `0.9.3` to recover `branch-3.5` CI.

### Why are the changes needed?

From yesterday, `branch-3.5` commit build is broken.
- https://github.com/apache/spark/actions/runs/8978558438/job/24659197282
```
Exception occurred:
  File "/usr/local/lib/python3.9/dist-packages/nbsphinx/__init__.py", line 1316, in apply
    for section in self.document.findall(docutils.nodes.section):
AttributeError: 'document' object has no attribute 'findall'
The full traceback has been saved in /tmp/sphinx-err-qz4y0bav.log, if you want to report the issue to the developers.
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs on this PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46448 from dongjoon-hyun/nbsphinx.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 8488540b415d5..fa40b2d0a390a 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -682,7 +682,7 @@ jobs:
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
+        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme 'sphinx-copybutton==0.5.2' 'nbsphinx==0.9.3' numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' 'nest-asyncio==1.5.8' 'rpds-py==0.16.2' 'alabaster==0.7.13'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' 'pyarrow==12.0.1' pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421

From 2f8e7cbe98df97ee0ae51a20796192c95e750721 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <cloud0fan@gmail.com>
Date: Tue, 7 May 2024 15:25:15 -0700
Subject: [PATCH 318/521] [SPARK-48173][SQL][3.5] CheckAnalysis should see the
 entire query plan

backport https://github.com/apache/spark/pull/46439 to 3.5

### What changes were proposed in this pull request?

This is a follow-up of https://github.com/apache/spark/pull/38029 . Some custom check rules need to see the entire query plan tree to get some context, but https://github.com/apache/spark/pull/38029 breaks it as it checks the query plan of dangling CTE relations recursively.

This PR fixes it by putting back the dangling CTE relation in the main query plan and then check the main query plan.

### Why are the changes needed?

Revert the breaking change to custom check rules

### Does this PR introduce _any_ user-facing change?

No for most users. This restores the behavior of Spark 3.3 and earlier for custom check rules.

### How was this patch tested?

existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46442 from cloud-fan/check2.

Lead-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 38 ++++++++++++++++---
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 7f10bdbc80ca9..485015f2efab4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -141,17 +141,45 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       errorClass, missingCol, orderedCandidates, a.origin)
   }
 
+  private def checkUnreferencedCTERelations(
+      cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
+      visited: mutable.Map[Long, Boolean],
+      danglingCTERelations: mutable.ArrayBuffer[CTERelationDef],
+      cteId: Long): Unit = {
+    if (visited(cteId)) {
+      return
+    }
+    val (cteDef, _, refMap) = cteMap(cteId)
+    refMap.foreach { case (id, _) =>
+      checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, id)
+    }
+    danglingCTERelations.append(cteDef)
+    visited(cteId) = true
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
     val inlineCTE = InlineCTE(alwaysInline = true)
     val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])]
     inlineCTE.buildCTEMap(plan, cteMap)
-    cteMap.values.foreach { case (relation, refCount, _) =>
-      // If a CTE relation is never used, it will disappear after inline. Here we explicitly check
-      // analysis for it, to make sure the entire query plan is valid.
-      if (refCount == 0) checkAnalysis0(relation.child)
+    val danglingCTERelations = mutable.ArrayBuffer.empty[CTERelationDef]
+    val visited: mutable.Map[Long, Boolean] = mutable.Map.empty.withDefaultValue(false)
+    // If a CTE relation is never used, it will disappear after inline. Here we explicitly collect
+    // these dangling CTE relations, and put them back in the main query, to make sure the entire
+    // query plan is valid.
+    cteMap.foreach { case (cteId, (_, refCount, _)) =>
+      // If a CTE relation ref count is 0, the other CTE relations that reference it should also be
+      // collected. This code will also guarantee the leaf relations that do not reference
+      // any others are collected first.
+      if (refCount == 0) {
+        checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, cteId)
+      }
     }
     // Inline all CTEs in the plan to help check query plan structures in subqueries.
-    checkAnalysis0(inlineCTE(plan))
+    var inlinedPlan: LogicalPlan = inlineCTE(plan)
+    if (danglingCTERelations.nonEmpty) {
+      inlinedPlan = WithCTE(inlinedPlan, danglingCTERelations.toSeq)
+    }
+    checkAnalysis0(inlinedPlan)
     plan.setAnalyzed()
   }
 

From 15b5d2a558371395547461d7b37f20610432dea0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Tue, 7 May 2024 15:54:50 -0700
Subject: [PATCH 319/521] [SPARK-48178][INFRA][3.5] Run
 `build/scala-213/java-11-17` jobs of `branch-3.5` only if needed

### What changes were proposed in this pull request?

This PR aims to run `build`, `scala-213`, and `java-11-17` job of `branch-3.5` only if needed to reduce the maximum concurrency of Apache Spark GitHub Action usage.

### Why are the changes needed?

To meet ASF Infra GitHub Action policy, we need to reduce the maximum concurrency.
- https://infra.apache.org/github-actions-policy.html

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46449 from dongjoon-hyun/SPARK-48178.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index fa40b2d0a390a..9c3dc95d0f66c 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -85,17 +85,16 @@ jobs:
           sparkr=`./dev/is-changed.py -m sparkr`
           tpcds=`./dev/is-changed.py -m sql`
           docker=`./dev/is-changed.py -m docker-integration-tests`
-          # 'build', 'scala-213', and 'java-11-17' are always true for now.
-          # It does not save significant time and most of PRs trigger the build.
+          build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`
           precondition="
             {
-              \"build\": \"true\",
+              \"build\": \"$build\",
               \"pyspark\": \"$pyspark\",
               \"sparkr\": \"$sparkr\",
               \"tpcds-1g\": \"$tpcds\",
               \"docker-integration-tests\": \"$docker\",
-              \"scala-213\": \"true\",
-              \"java-11-17\": \"true\",
+              \"scala-213\": \"$build\",
+              \"java-11-17\": \"$build\",
               \"lint\" : \"true\",
               \"k8s-integration-tests\" : \"true\",
               \"breaking-changes-buf\" : \"true\",

From 704f956dcbeddc9067e4ec502c4fd07175171cac Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 7 May 2024 20:07:25 -0700
Subject: [PATCH 320/521] [MINOR][PYTHON][TESTS] Remove the doc in error
 message tests to allow other PyArrow versions in tests

This PR is a minor change to support more PyArrow versions in the test.

To support more PyArrow versions in the test. it can fail: (https://github.com/HyukjinKwon/spark/actions/runs/8994639538/job/24708397027)

```
Traceback (most recent call last):
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 585, in _test_merge_error
    self.__test_merge_error(
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py", line 606, in __test_merge_error
    with self.assertRaisesRegex(error_class, error_message_regex):
AssertionError: "Return type of the user-defined function should be pandas.DataFrame, but is int64." does not match "
  An exception was thrown from the Python worker. Please see the stack trace below.
Traceback (most recent call last):
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1834, in main
    process()
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1826, in process
    serializer.dump_stream(out_iter, outfile)
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 531, in dump_stream
    return ArrowStreamSerializer.dump_stream(self, init_stream_yield_batches(), stream)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 104, in dump_stream
    for batch in iterator:
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/sql/pandas/serializers.py", line 524, in init_stream_yield_batches
    for series in iterator:
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 1694, in mapper
    return f(df1_keys, df1_vals, df2_keys, df2_vals)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 370, in <lambda>
    return lambda kl, vl, kr, vr: [(wrapped(kl, vl, kr, vr), to_arrow_type(return_type))]
                                    ^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 364, in wrapped
    verify_pandas_result(
  File "/home/runner/work/spark/spark/python/lib/pyspark.zip/pyspark/worker.py", line 234, in verify_pandas_result
    raise PySparkTypeError(
pyspark.errors.exceptions.base.PySparkTypeError: [UDF_RETURN_TYPE] Return type of the user-defined function should be pandas.DataFrame, but is int.
```

No, test-only.

Ci should validate it.

No.

Closes #46453 from HyukjinKwon/minor-test.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py | 2 +-
 python/pyspark/sql/tests/pandas/test_pandas_map.py           | 4 ++--
 python/pyspark/sql/tests/test_arrow_map.py                   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index c3cd0f37b1038..948ef4a53f2cf 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -166,7 +166,7 @@ def check_apply_in_pandas_not_returning_pandas_dataframe(self):
             fn=lambda lft, rgt: lft.size + rgt.size,
             error_class=PythonException,
             error_message_regex="Return type of the user-defined function "
-            "should be pandas.DataFrame, but is int64.",
+            "should be pandas.DataFrame, but is int",
         )
 
     def test_apply_in_pandas_returning_column_names(self):
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
index c3ba7b3e93a00..4b2be2bcf8442 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -151,14 +151,14 @@ def bad_iter_elem(_):
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is int.",
+            "but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(no_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator of pandas.DataFrame, "
-            "but is iterator of int.",
+            "but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInPandas(bad_iter_elem, "a int").count())
 
diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py
index 15367743585e3..176286a809d45 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py
@@ -104,14 +104,14 @@ def bad_iter_elem(_):
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is int.",
+            "of pyarrow.RecordBatch, but is int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(not_iter, "a int").count())
 
         with self.assertRaisesRegex(
             PythonException,
             "Return type of the user-defined function should be iterator "
-            "of pyarrow.RecordBatch, but is iterator of int.",
+            "of pyarrow.RecordBatch, but is iterator of int",
         ):
             (self.spark.range(10, numPartitions=3).mapInArrow(bad_iter_elem, "a int").count())
 

From 58b71307795b6060be97431e0c5c8ab95205ea79 Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Tue, 7 May 2024 22:39:02 -0700
Subject: [PATCH 321/521] [SPARK-48037][CORE][3.5] Fix SortShuffleWriter lacks
 shuffle write related metrics resulting in potentially inaccurate data

### What changes were proposed in this pull request?
This PR aims to fix SortShuffleWriter lacks shuffle write related metrics resulting in potentially inaccurate data.

### Why are the changes needed?
When the shuffle writer is SortShuffleWriter, it does not use SQLShuffleWriteMetricsReporter to update metrics, which causes AQE to obtain runtime statistics and the rowCount obtained is 0.

Some optimization rules rely on rowCount statistics, such as `EliminateLimits`. Because rowCount is 0, it removes the limit operator. At this time, we get data results without limit.

https://github.com/apache/spark/blob/59d5946cfd377e9203ccf572deb34f87fab7510c/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala#L168-L172

https://github.com/apache/spark/blob/59d5946cfd377e9203ccf572deb34f87fab7510c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala#L2067-L2070

### Does this PR introduce _any_ user-facing change?
Yes

### How was this patch tested?
Production environment verification.

**master metrics**
<img width="296" alt="image" src="https://github.com/apache/spark/assets/3898450/dc9b6e8a-93ec-4f59-a903-71aa5b11962c">

**PR metrics**

<img width="276" alt="image" src="https://github.com/apache/spark/assets/3898450/2d73b773-2dcc-4d23-81de-25dcadac86c1">

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46459 from cxzl25/SPARK-48037-3.5.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../shuffle/sort/SortShuffleManager.scala     |  2 +-
 .../shuffle/sort/SortShuffleWriter.scala      |  6 ++--
 .../util/collection/ExternalSorter.scala      |  9 +++---
 .../shuffle/sort/SortShuffleWriterSuite.scala |  3 ++
 .../execution/UnsafeRowSerializerSuite.scala  |  3 +-
 .../adaptive/AdaptiveQueryExecSuite.scala     | 32 +++++++++++++++++--
 6 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 46aca07ce43f6..79dff6f87534a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -176,7 +176,7 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
           metrics,
           shuffleExecutorComponents)
       case other: BaseShuffleHandle[K @unchecked, V @unchecked, _] =>
-        new SortShuffleWriter(other, mapId, context, shuffleExecutorComponents)
+        new SortShuffleWriter(other, mapId, context, metrics, shuffleExecutorComponents)
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 8613fe11a4c2f..3be7d24f7e4ec 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -21,6 +21,7 @@ import org.apache.spark._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.scheduler.MapStatus
 import org.apache.spark.shuffle.{BaseShuffleHandle, ShuffleWriter}
+import org.apache.spark.shuffle.ShuffleWriteMetricsReporter
 import org.apache.spark.shuffle.api.ShuffleExecutorComponents
 import org.apache.spark.util.collection.ExternalSorter
 
@@ -28,6 +29,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     handle: BaseShuffleHandle[K, V, C],
     mapId: Long,
     context: TaskContext,
+    writeMetrics: ShuffleWriteMetricsReporter,
     shuffleExecutorComponents: ShuffleExecutorComponents)
   extends ShuffleWriter[K, V] with Logging {
 
@@ -46,8 +48,6 @@ private[spark] class SortShuffleWriter[K, V, C](
 
   private var partitionLengths: Array[Long] = _
 
-  private val writeMetrics = context.taskMetrics().shuffleWriteMetrics
-
   /** Write a bunch of records to this task's output */
   override def write(records: Iterator[Product2[K, V]]): Unit = {
     sorter = if (dep.mapSideCombine) {
@@ -67,7 +67,7 @@ private[spark] class SortShuffleWriter[K, V, C](
     // (see SPARK-3570).
     val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
       dep.shuffleId, mapId, dep.partitioner.numPartitions)
-    sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter)
+    sorter.writePartitionedMapOutput(dep.shuffleId, mapId, mapOutputWriter, writeMetrics)
     partitionLengths = mapOutputWriter.commitAllPartitions(sorter.getChecksums).getPartitionLengths
     mapStatus = MapStatus(blockManager.shuffleServerId, partitionLengths, mapId)
   }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 7153bb72476a7..2f2734a389ff0 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -29,7 +29,7 @@ import org.apache.spark._
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.serializer._
-import org.apache.spark.shuffle.ShufflePartitionPairsWriter
+import org.apache.spark.shuffle.{ShufflePartitionPairsWriter, ShuffleWriteMetricsReporter}
 import org.apache.spark.shuffle.api.{ShuffleMapOutputWriter, ShufflePartitionWriter}
 import org.apache.spark.shuffle.checksum.ShuffleChecksumSupport
 import org.apache.spark.storage.{BlockId, DiskBlockObjectWriter, ShuffleBlockId}
@@ -696,7 +696,8 @@ private[spark] class ExternalSorter[K, V, C](
   def writePartitionedMapOutput(
       shuffleId: Int,
       mapId: Long,
-      mapOutputWriter: ShuffleMapOutputWriter): Unit = {
+      mapOutputWriter: ShuffleMapOutputWriter,
+      writeMetrics: ShuffleWriteMetricsReporter): Unit = {
     if (spills.isEmpty) {
       // Case where we only have in-memory data
       val collection = if (aggregator.isDefined) map else buffer
@@ -713,7 +714,7 @@ private[spark] class ExternalSorter[K, V, C](
             serializerManager,
             serInstance,
             blockId,
-            context.taskMetrics().shuffleWriteMetrics,
+            writeMetrics,
             if (partitionChecksums.nonEmpty) partitionChecksums(partitionId) else null)
           while (it.hasNext && it.nextPartition() == partitionId) {
             it.writeNext(partitionPairsWriter)
@@ -737,7 +738,7 @@ private[spark] class ExternalSorter[K, V, C](
             serializerManager,
             serInstance,
             blockId,
-            context.taskMetrics().shuffleWriteMetrics,
+            writeMetrics,
             if (partitionChecksums.nonEmpty) partitionChecksums(id) else null)
           if (elements.hasNext) {
             for (elem <- elements) {
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
index 9e52b5e15143b..99402abb16cac 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/SortShuffleWriterSuite.scala
@@ -85,6 +85,7 @@ class SortShuffleWriterSuite
       shuffleHandle,
       mapId = 1,
       context,
+      context.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
     writer.write(Iterator.empty)
     writer.stop(success = true)
@@ -102,6 +103,7 @@ class SortShuffleWriterSuite
       shuffleHandle,
       mapId = 2,
       context,
+      context.taskMetrics().shuffleWriteMetrics,
       shuffleExecutorComponents)
     writer.write(records.iterator)
     writer.stop(success = true)
@@ -158,6 +160,7 @@ class SortShuffleWriterSuite
         shuffleHandle,
         mapId = 0,
         context,
+        context.taskMetrics().shuffleWriteMetrics,
         new LocalDiskShuffleExecutorComponents(
           conf, shuffleBlockResolver._blockManager, shuffleBlockResolver))
       writer.write(records.iterator)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index d949342106159..928d732f2a160 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -130,7 +130,8 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
     assert(sorter.numSpills > 0)
 
     // Merging spilled files should not throw assertion error
-    sorter.writePartitionedMapOutput(0, 0, mapOutputWriter)
+    sorter.writePartitionedMapOutput(0, 0, mapOutputWriter,
+      taskContext.taskMetrics.shuffleWriteMetrics)
   }
 
   test("SPARK-10403: unsafe row serializer with SortShuffleManager") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 7c280f72ca176..cab3e69b0d17b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -26,6 +26,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListenerJobStart}
+import org.apache.spark.shuffle.sort.SortShuffleManager
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
@@ -60,7 +61,8 @@ class AdaptiveQueryExecSuite
 
   setupTestData()
 
-  private def runAdaptiveAndVerifyResult(query: String): (SparkPlan, SparkPlan) = {
+  private def runAdaptiveAndVerifyResult(query: String,
+      skipCheckAnswer: Boolean = false): (SparkPlan, SparkPlan) = {
     var finalPlanCnt = 0
     var hasMetricsEvent = false
     val listener = new SparkListener {
@@ -84,8 +86,10 @@ class AdaptiveQueryExecSuite
     assert(planBefore.toString.startsWith("AdaptiveSparkPlan isFinalPlan=false"))
     val result = dfAdaptive.collect()
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
-      val df = sql(query)
-      checkAnswer(df, result)
+      if (!skipCheckAnswer) {
+        val df = sql(query)
+        checkAnswer(df, result)
+      }
     }
     val planAfter = dfAdaptive.queryExecution.executedPlan
     assert(planAfter.toString.startsWith("AdaptiveSparkPlan isFinalPlan=true"))
@@ -2405,6 +2409,28 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-48037: Fix SortShuffleWriter lacks shuffle write related metrics " +
+    "resulting in potentially inaccurate data") {
+    withTable("t3") {
+      withSQLConf(
+        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+        SQLConf.SHUFFLE_PARTITIONS.key -> (SortShuffleManager
+          .MAX_SHUFFLE_OUTPUT_PARTITIONS_FOR_SERIALIZED_MODE + 1).toString) {
+        sql("CREATE TABLE t3 USING PARQUET AS SELECT id FROM range(2)")
+        val (plan, adaptivePlan) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT id, count(*)
+            |FROM t3
+            |GROUP BY id
+            |LIMIT 1
+            |""".stripMargin, skipCheckAnswer = true)
+        // The shuffle stage produces two rows and the limit operator should not been optimized out.
+        assert(findTopLevelLimit(plan).size == 1)
+        assert(findTopLevelLimit(adaptivePlan).size == 1)
+      }
+    }
+  }
+
   test("SPARK-37063: OptimizeSkewInRebalancePartitions support optimize non-root node") {
     withTempView("v") {
       withSQLConf(

From 36da89deccc916a6f32d9bf6d6f2fd8e288da917 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 6 May 2024 13:45:54 +0800
Subject: [PATCH 322/521] [SPARK-48138][CONNECT][TESTS] Disable a flaky
 `SparkSessionE2ESuite.interrupt tag` test

### What changes were proposed in this pull request?

This PR aims to disable  a flaky test, `SparkSessionE2ESuite.interrupt tag`, temporarily.

To re-enable this, SPARK-48139 is created as a blocker issue for 4.0.0.

### Why are the changes needed?

This test case was added at `Apache Spark 3.5.0` but has been unstable unfortunately until now.
- #42009

We tried to stabilize this test case before `Apache Spark 4.0.0-preview`.
- #45173
- #46374

However, it's still flaky.

- https://github.com/apache/spark/actions/runs/8962353911/job/24611130573 (Master, 2024-05-05)
- https://github.com/apache/spark/actions/runs/8948176536/job/24581022674 (Master, 2024-05-04)

This PR aims to stablize CI first and to focus this flaky issue as a blocker level before going on `Spark Connect GA` in SPARK-48139 before Apache Spark 4.0.0.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46396 from dongjoon-hyun/SPARK-48138.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 8294c5962febe53eebdff79f65f5f293d93a1997)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
index c76dc724828e5..e9c2f0c457508 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionE2ESuite.scala
@@ -108,7 +108,8 @@ class SparkSessionE2ESuite extends RemoteSparkSession {
     assert(interrupted.length == 2, s"Interrupted operations: $interrupted.")
   }
 
-  test("interrupt tag") {
+  // TODO(SPARK-48139): Re-enable `SparkSessionE2ESuite.interrupt tag`
+  ignore("interrupt tag") {
     val session = spark
     import session.implicits._
 

From ac4c5d618e02281a3748cd0431539019a140ec27 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 8 May 2024 20:00:43 +0900
Subject: [PATCH 323/521] [SPARK-48087][PYTHON][TESTS][3.5] Remove obsolete
 comment about UDTF test

### What changes were proposed in this pull request?

UDTF is experimental, and its design has not been fully implemented yet. Not documented either so it's left as a breaking change.

### Why are the changes needed?

To remove the obsolete comment.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46473 from HyukjinKwon/SPARK-48087.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/connect/test_parity_udtf.py | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_parity_udtf.py b/python/pyspark/sql/tests/connect/test_parity_udtf.py
index 5955b502e48b2..ebf7692a20cd8 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udtf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udtf.py
@@ -59,84 +59,72 @@ def eval(self, a: int):
         ):
             TestUDTF(lit(1)).collect()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_udtf_init_with_additional_args(self):
         super(UDTFParityTests, self).test_udtf_init_with_additional_args()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_udtf_with_wrong_num_input(self):
         super(UDTFParityTests, self).test_udtf_with_wrong_num_input()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_array_output_type_casting(self):
         super(UDTFParityTests, self).test_array_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_map_output_type_casting(self):
         super(UDTFParityTests, self).test_map_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_numeric_output_type_casting(self):
         super(UDTFParityTests, self).test_numeric_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_numeric_output_type_casting(self):
         super(UDTFParityTests, self).test_numeric_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_numeric_string_output_type_casting(self):
         super(UDTFParityTests, self).test_numeric_string_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_string_output_type_casting(self):
         super(UDTFParityTests, self).test_string_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_string_output_type_casting(self):
         super(UDTFParityTests, self).test_string_output_type_casting()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_struct_output_type_casting_dict(self):
         super(UDTFParityTests, self).test_struct_output_type_casting_dict()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )
     def test_udtf_init_with_additional_args(self):
         super(UDTFParityTests, self).test_udtf_init_with_additional_args()
 
-    # TODO(SPARK-48087): Reenable this test case
     @unittest.skipIf(
         "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
     )

From a762f3175fcdb7b069faa0c2bfce93d295cb1f10 Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Wed, 8 May 2024 07:44:22 -0700
Subject: [PATCH 324/521] [SPARK-48184][PYTHON][CONNECT] Always set the seed of
 `Dataframe.sample` in Client side

### What changes were proposed in this pull request?
Always set the seed of `Dataframe.sample` in Client side

### Why are the changes needed?
Bug fix

If the seed is not set in Client, it will be set in server side with a random int

https://github.com/apache/spark/blob/c4df12cc884cddefcfcf8324b4d7b9349fb4f6a0/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala#L386

which cause inconsistent results in multiple executions

In Spark Classic:
```
In [1]: df = spark.range(10000).sample(0.1)

In [2]: [df.count() for i in range(10)]
Out[2]: [1006, 1006, 1006, 1006, 1006, 1006, 1006, 1006, 1006, 1006]
```

In Spark Connect:

before:
```
In [1]: df = spark.range(10000).sample(0.1)

In [2]: [df.count() for i in range(10)]
Out[2]: [969, 1005, 958, 996, 987, 1026, 991, 1020, 1012, 979]
```

after:
```
In [1]: df = spark.range(10000).sample(0.1)

In [2]: [df.count() for i in range(10)]
Out[2]: [1032, 1032, 1032, 1032, 1032, 1032, 1032, 1032, 1032, 1032]
```

### Does this PR introduce _any_ user-facing change?
yes, bug fix

### How was this patch tested?
ci

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #46456 from zhengruifeng/py_connect_sample_seed.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 47afe77242abf639a1d6966ce60cfd170a9d7d20)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 python/pyspark/sql/connect/dataframe.py               | 2 +-
 python/pyspark/sql/tests/connect/test_connect_plan.py | 2 +-
 python/pyspark/sql/tests/test_dataframe.py            | 5 +++++
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index ff61916420258..6f23a15fb4ad1 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -687,7 +687,7 @@ def sample(
         if withReplacement is None:
             withReplacement = False
 
-        seed = int(seed) if seed is not None else None
+        seed = int(seed) if seed is not None else random.randint(0, sys.maxsize)
 
         return DataFrame.withPlan(
             plan.Sample(
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan.py b/python/pyspark/sql/tests/connect/test_connect_plan.py
index c39fb6be24cdd..88ef37511a666 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan.py
@@ -430,7 +430,7 @@ def test_sample(self):
         self.assertEqual(plan.root.sample.lower_bound, 0.0)
         self.assertEqual(plan.root.sample.upper_bound, 0.3)
         self.assertEqual(plan.root.sample.with_replacement, False)
-        self.assertEqual(plan.root.sample.HasField("seed"), False)
+        self.assertEqual(plan.root.sample.HasField("seed"), True)
         self.assertEqual(plan.root.sample.deterministic_order, False)
 
         plan = (
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 5907c8c09fb46..887648018cf3f 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -1045,6 +1045,11 @@ def test_sample(self):
             IllegalArgumentException, lambda: self.spark.range(1).sample(-1.0).count()
         )
 
+    def test_sample_with_random_seed(self):
+        df = self.spark.range(10000).sample(0.1)
+        cnts = [df.count() for i in range(10)]
+        self.assertEqual(1, len(set(cnts)))
+
     def test_toDF_with_string(self):
         df = self.spark.createDataFrame([("John", 30), ("Alice", 25), ("Bob", 28)])
         data = [("John", 30), ("Alice", 25), ("Bob", 28)]

From ff691fa611f0c8a7f0ff626179bced2b48ef9b7d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 8 May 2024 13:45:55 -0700
Subject: [PATCH 325/521] [SPARK-48116][INFRA][3.5] Run `pyspark-pandas*` only
 in PR builder and Daily Python CIs

### What changes were proposed in this pull request?

This PR aims to run `pyspark-pandas*` of `branch-3.5` only in PR builder and Daily Python CIs. In other words, only the commit builder will skip it by default. Please note that all PR builders is not consuming ASF resources and they provides lots of test coverage everyday.

`branch-3.5` Python Daily CI runs all Python tests including `pyspark-pandas` like the following.

https://github.com/apache/spark/blob/21548a8cc5c527d4416a276a852f967b4410bd4b/.github/workflows/build_branch35_python.yml#L43-L44

### Why are the changes needed?

To reduce GitHub Action usage to meet ASF INFRA policy.
- https://infra.apache.org/github-actions-policy.html

    > All workflows MUST have a job concurrency level less than or equal to 20. This means a workflow cannot have more than 20 jobs running at the same time across all matrices.

Although `pandas` is an **optional** package in PySpark, this is essential for PySpark users and we have **6 test pipelines** which requires lots of resources. We need to optimize the job concurrently level to `less than or equal to 20` while keeping the test capability as much as possible.

https://github.com/apache/spark/blob/a762f3175fcdb7b069faa0c2bfce93d295cb1f10/dev/requirements.txt#L4-L7

- pyspark-pandas
- pyspark-pandas-slow
- pyspark-pandas-connect
- pyspark-pandas-slow-connect

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46482 from dongjoon-hyun/SPARK-48116-3.5.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 9c3dc95d0f66c..679c51bb0941e 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -82,6 +82,11 @@ jobs:
           pyspark=true; sparkr=true; tpcds=true; docker=true;
           pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
+          if [ "${{ github.repository != 'apache/spark' }}" ]; then
+            pandas=$pyspark
+          else
+            pandas=false
+          fi
           sparkr=`./dev/is-changed.py -m sparkr`
           tpcds=`./dev/is-changed.py -m sql`
           docker=`./dev/is-changed.py -m docker-integration-tests`
@@ -90,6 +95,7 @@ jobs:
             {
               \"build\": \"$build\",
               \"pyspark\": \"$pyspark\",
+              \"pyspark-pandas\": \"$pandas\",
               \"sparkr\": \"$sparkr\",
               \"tpcds-1g\": \"$tpcds\",
               \"docker-integration-tests\": \"$docker\",
@@ -361,6 +367,14 @@ jobs:
             pyspark-pandas-connect
           - >-
             pyspark-pandas-slow-connect
+        exclude:
+          # Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job)
+          # In practice, the build will run in individual PR, but not against the individual commit
+          # in Apache Spark repository.
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect' }}
+          - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow-connect' }}
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
       HADOOP_PROFILE: ${{ inputs.hadoop }}

From 81775a083f2339a76f3d1af472baf58e6fdf47d2 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 3 May 2024 21:25:41 -0700
Subject: [PATCH 326/521] [SPARK-48116][INFRA][FOLLOWUP] Fix `if` statement to
 check repository

---
 .github/workflows/build_and_test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 679c51bb0941e..051e8c98908cc 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -82,7 +82,7 @@ jobs:
           pyspark=true; sparkr=true; tpcds=true; docker=true;
           pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
-          if [ "${{ github.repository != 'apache/spark' }}" ]; then
+          if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
             pandas=$pyspark
           else
             pandas=false

From 26dccf09322fc9945557a6e005a15e14fc6926b0 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 2 May 2024 23:21:59 -0700
Subject: [PATCH 327/521] [SPARK-48109][INFRA] Enable `k8s-integration-tests`
 only for `kubernetes` module change

This PR aims to enable `k8s-integration-tests` only for `kubernetes` module change.

Although there is a chance of missing `core` module change, the daily CI test coverage will reveal that.

To reduce GitHub Action usage to meet ASF INFRA policy.
- https://infra.apache.org/github-actions-policy.html

    > The average number of minutes a project uses in any consecutive five-day period MUST NOT exceed the equivalent of 30 full-time runners (216,000 minutes, or 3,600 hours).

No.

Manual review.

No.

Closes #46356 from dongjoon-hyun/SPARK-48109.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 63837020ed29c9e6003f24117ad21f8b97f40f0f)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 051e8c98908cc..e73dced982388 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -90,6 +90,7 @@ jobs:
           sparkr=`./dev/is-changed.py -m sparkr`
           tpcds=`./dev/is-changed.py -m sql`
           docker=`./dev/is-changed.py -m docker-integration-tests`
+          kubernetes=`./dev/is-changed.py -m kubernetes`
           build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`
           precondition="
             {
@@ -102,7 +103,7 @@ jobs:
               \"scala-213\": \"$build\",
               \"java-11-17\": \"$build\",
               \"lint\" : \"true\",
-              \"k8s-integration-tests\" : \"true\",
+              \"k8s-integration-tests\" : \"$kubernetes\",
               \"breaking-changes-buf\" : \"true\",
             }"
           echo $precondition # For debugging

From 9454607944df5e8430642bbe399a35436506be2a Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sat, 4 May 2024 22:55:04 -0700
Subject: [PATCH 328/521] [SPARK-48132][INFRA] Run `k8s-integration-tests` only
 in PR builder and Daily CIs

This PR aims to run `k8s-integration-tests` only in PR builder and Daily Python CIs. In other words, only the commit builder will skip it by default.

Please note that
- K8s unit tests will be covered by the commit builder still.
- All PR builders are not consuming ASF resources and they provide lots of test coverage everyday also.

To reduce GitHub Action usage to meet ASF INFRA policy.
- https://infra.apache.org/github-actions-policy.html

    > All workflows MUST have a job concurrency level less than or equal to 20. This means a workflow cannot have more than 20 jobs running at the same time across all matrices.

No.

Manual review.

No.

Closes #46388 from dongjoon-hyun/SPARK-48132.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index e73dced982388..645054dc20877 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -84,13 +84,14 @@ jobs:
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
           if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
             pandas=$pyspark
+            kubernetes=`./dev/is-changed.py -m kubernetes`
           else
             pandas=false
+            kubernetes=false
           fi
           sparkr=`./dev/is-changed.py -m sparkr`
           tpcds=`./dev/is-changed.py -m sql`
           docker=`./dev/is-changed.py -m docker-integration-tests`
-          kubernetes=`./dev/is-changed.py -m kubernetes`
           build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`
           precondition="
             {

From 6dbbf081a7d248ddce62b62e979ff06a3c793f22 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Sun, 5 May 2024 13:19:23 -0700
Subject: [PATCH 329/521] [SPARK-48133][INFRA] Run `sparkr` only in PR builders
 and Daily CIs

This PR aims to run `sparkr` only in PR builder and Daily Python CIs. In other words, only the commit builder will skip it by default.

To reduce GitHub Action usage to meet ASF INFRA policy.
- https://infra.apache.org/github-actions-policy.html

    > All workflows MUST have a job concurrency level less than or equal to 20. This means a workflow cannot have more than 20 jobs running at the same time across all matrices.

No.

Manual review.

No.

Closes #46389 from dongjoon-hyun/SPARK-48133.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 32ba5c1db62caaaa2674e8acced56f89ed840bf9)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 645054dc20877..4ad4a243c76d6 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -79,17 +79,17 @@ jobs:
       id: set-outputs
       run: |
         if [ -z "${{ inputs.jobs }}" ]; then
-          pyspark=true; sparkr=true; tpcds=true; docker=true;
           pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
           pyspark=`./dev/is-changed.py -m $pyspark_modules`
           if [[ "${{ github.repository }}" != 'apache/spark' ]]; then
             pandas=$pyspark
             kubernetes=`./dev/is-changed.py -m kubernetes`
+            sparkr=`./dev/is-changed.py -m sparkr`
           else
             pandas=false
             kubernetes=false
+            sparkr=false
           fi
-          sparkr=`./dev/is-changed.py -m sparkr`
           tpcds=`./dev/is-changed.py -m sql`
           docker=`./dev/is-changed.py -m docker-integration-tests`
           build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`

From 82779217b1fa1dea2b18772795969c04c1f34532 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 8 May 2024 17:13:11 +0900
Subject: [PATCH 330/521] [SPARK-48192][INFRA] Enable TPC-DS tests in forked
 repository

This PR is a sort of a followup of https://github.com/apache/spark/pull/46361. It proposes to run TPC-DS and Docker integration tests in PRs (that does not consume ASF resources).

TPC-DS and Docker integration stuff at least have to be tested in the PR if the PR touches the codes related to that.

No, test-only.

Manually

No.

Closes #46470 from HyukjinKwon/SPARK-48192.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit f693abc8de949b1fd5f77b9e74037b0cc2298aef)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .github/workflows/build_and_test.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 4ad4a243c76d6..b016a29a86be1 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -85,13 +85,15 @@ jobs:
             pandas=$pyspark
             kubernetes=`./dev/is-changed.py -m kubernetes`
             sparkr=`./dev/is-changed.py -m sparkr`
+            tpcds=`./dev/is-changed.py -m sql`
+            docker=`./dev/is-changed.py -m docker-integration-tests`
           else
             pandas=false
             kubernetes=false
             sparkr=false
+            tpcds=false
+            docker=false
           fi
-          tpcds=`./dev/is-changed.py -m sql`
-          docker=`./dev/is-changed.py -m docker-integration-tests`
           build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"`
           precondition="
             {

From 541e1c4da131ce737b9cf554028cf292bebbcf04 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 9 May 2024 10:56:21 +0800
Subject: [PATCH 331/521] [SPARK-48197][SQL] Avoid assert error for invalid
 lambda function

### What changes were proposed in this pull request?

`ExpressionBuilder` asserts all its input expressions to be resolved during lookup, which is not true as the analyzer rule `ResolveFunctions` can trigger function lookup even if the input expression contains unresolved lambda functions.

This PR updates that assert to check non-lambda inputs only, and fail earlier if the input contains lambda functions. In the future, if we use `ExpressionBuilder` to register higher-order functions, we can relax it.

### Why are the changes needed?

better error message

### Does this PR introduce _any_ user-facing change?

no, only changes error message

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46475 from cloud-fan/minor.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7e79e91dc8c531ee9135f0e32a9aa2e1f80c4bbf)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/analysis/FunctionRegistry.scala  |  9 +++++++-
 .../plans/logical/FunctionBuilderBase.scala   |  2 ++
 .../ansi/higher-order-functions.sql.out       | 20 +++++++++++++++++
 .../higher-order-functions.sql.out            | 20 +++++++++++++++++
 .../inputs/higher-order-functions.sql         |  2 ++
 .../ansi/higher-order-functions.sql.out       | 22 +++++++++++++++++++
 .../results/higher-order-functions.sql.out    | 22 +++++++++++++++++++
 7 files changed, 96 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 558579cdb80ac..aaf718fab941d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -930,7 +930,14 @@ object FunctionRegistry {
       since: Option[String] = None): (String, (ExpressionInfo, FunctionBuilder)) = {
     val info = FunctionRegistryBase.expressionInfo[T](name, since)
     val funcBuilder = (expressions: Seq[Expression]) => {
-      assert(expressions.forall(_.resolved), "function arguments must be resolved.")
+      val (lambdas, others) = expressions.partition(_.isInstanceOf[LambdaFunction])
+      if (lambdas.nonEmpty && !builder.supportsLambda) {
+        throw new AnalysisException(
+          errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+          messageParameters = Map(
+            "class" -> builder.getClass.getCanonicalName))
+      }
+      assert(others.forall(_.resolved), "function arguments must be resolved.")
       val rearrangedExpressions = rearrangeExpressions(name, builder, expressions)
       val expr = builder.build(name, rearrangedExpressions)
       if (setAlias) expr.setTagValue(FUNC_ALIAS, name)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
index 1088655f60cd4..a901fa5a72c5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
@@ -69,6 +69,8 @@ trait FunctionBuilderBase[T] {
   }
 
   def build(funcName: String, expressions: Seq[Expression]): T
+
+  def supportsLambda: Boolean = false
 }
 
 object NamedParametersSupport {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
index 08d3be615b314..3fafb9858e5ab 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
@@ -34,6 +34,26 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
index f656716a843e0..d9e88ac618aaf 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
@@ -34,6 +34,26 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
index 7925a21de04cd..37081de012e98 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
@@ -11,6 +11,8 @@ create or replace temporary view nested as values
 
 -- Only allow lambda's in higher order functions.
 select upper(x -> x) as v;
+-- Also test functions registered with `ExpressionBuilder`.
+select ceil(x -> x) as v;
 
 -- Identity transform an array
 select transform(zs, z -> z) as v from nested;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
index e479b49463e74..eb9c454109f0f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
@@ -32,6 +32,28 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index e479b49463e74..eb9c454109f0f 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -32,6 +32,28 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select ceil(x -> x) as v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
+  "sqlState" : "42K0D",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "ceil(x -> x)"
+  } ]
+}
+
+
 -- !query
 select transform(zs, z -> z) as v from nested
 -- !query schema

From da4c808be7d66dc61fdcb3b41254eef77298a72c Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 9 May 2024 14:46:01 -0700
Subject: [PATCH 332/521] [SPARK-48197][SQL][TESTS][FOLLOWUP][3.5] Regenerate
 golden files

### What changes were proposed in this pull request?

This PR is a follow-up to regenerate golden files for branch-3.5
- #46475

### Why are the changes needed?

To recover branch-3.5 CI.
- https://github.com/apache/spark/actions/runs/9011670853/job/24786397001
```
[info] *** 4 TESTS FAILED ***
[error] Failed: Total 3036, Failed 4, Errors 0, Passed 3032, Ignored 3
[error] Failed tests:
[error] 	org.apache.spark.sql.SQLQueryTestSuite
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46514 from dongjoon-hyun/SPARK-48197.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../analyzer-results/ansi/higher-order-functions.sql.out         | 1 -
 .../sql-tests/analyzer-results/higher-order-functions.sql.out    | 1 -
 .../sql-tests/results/ansi/higher-order-functions.sql.out        | 1 -
 .../resources/sql-tests/results/higher-order-functions.sql.out   | 1 -
 4 files changed, 4 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
index 3fafb9858e5ab..8fe6e7097e67e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
@@ -40,7 +40,6 @@ select ceil(x -> x) as v
 org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
-  "sqlState" : "42K0D",
   "messageParameters" : {
     "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
   },
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
index d9e88ac618aaf..d851019860789 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
@@ -40,7 +40,6 @@ select ceil(x -> x) as v
 org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
-  "sqlState" : "42K0D",
   "messageParameters" : {
     "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
   },
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
index eb9c454109f0f..dceb370c83884 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
@@ -40,7 +40,6 @@ struct<>
 org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
-  "sqlState" : "42K0D",
   "messageParameters" : {
     "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
   },
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index eb9c454109f0f..dceb370c83884 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -40,7 +40,6 @@ struct<>
 org.apache.spark.sql.AnalysisException
 {
   "errorClass" : "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
-  "sqlState" : "42K0D",
   "messageParameters" : {
     "class" : "org.apache.spark.sql.catalyst.expressions.CeilExpressionBuilder$"
   },

From dc4911725baa8b9e5f3c095c27a569b98c0bd8a3 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu@databricks.com>
Date: Fri, 10 May 2024 08:39:52 +0900
Subject: [PATCH 333/521] [SPARK-48089][SS][CONNECT] Fix 3.5 <> 4.0
 StreamingQueryListener compatibility test

### What changes were proposed in this pull request?

Fix the 3.5 <> 4.0 compatibility test. This is a test only issue. The reason of the failure could be a pickle optimization.
On branch 3.5, there is a listener named "TestListener", but on 4.0, it was renamed to "TestListenerSpark". In the test, the listener should be serialized to the server. But on the 4.0 server, we see this error:
```
Traceback (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/sql/connect/streaming/worker/listener_worker.py", line 115, in <module>
  File "/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/sql/connect/streaming/worker/listener_worker.py", line 73, in main
  File "/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/worker_util.py", line 64, in read_command
  File "/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/serializers.py", line 173, in _read_with_length
  File "/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/serializers.py", line 473, in loads
AttributeError: Can't get attribute 'TestListener' on <module 'pyspark.sql.tests.connect.streaming.test_parity_listener' from '/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/sql/tests/connect/streaming/test_parity_listener.py'>
```

It cannot find the `TestListener` on 4.0 server. This indicates that the 4.0 server is trying to read that `TestListener` from it's local `<module 'pyspark.sql.tests.connect.streaming.test_parity_listener' from '/home/runner/work/oss-spark/oss-spark/python/lib/pyspark.zip/pyspark/sql/tests/connect/streaming/test_parity_listener.py'>` but `TestListener` is not there. If the TestListener is really serialized and streamed to the server to deserialize, the server should not throw this error. So it could be that pickle is trying to do some fast load, but I'm not really sure about this theory. But anyways an easy fix is to just rename the listener on 3.5.

Also cherry-picked https://github.com/apache/spark/commit/4d9dbb35aacb6bd8ca1e5a6dff5076034b5a042b to remove the tables after testing.

### Why are the changes needed?

Backward compatibility test fix for Spark Connect

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Tested on my own branch: https://github.com/WweiL/oss-spark/actions/runs/9021444850

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46513 from WweiL/wweil-3.5.

Lead-authored-by: Wei Liu <wei.liu@databricks.com>
Co-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../connect/streaming/test_parity_listener.py | 89 +++++++++----------
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index 35ca2681cc97c..eae2b01c55446 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -14,8 +14,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import os
-import unittest
 import time
 
 import pyspark.cloudpickle
@@ -25,7 +23,7 @@
 from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
-class TestListener(StreamingQueryListener):
+class TestListenerSpark(StreamingQueryListener):
     def onQueryStarted(self, event):
         e = pyspark.cloudpickle.dumps(event)
         df = self.spark.createDataFrame(data=[(e,)])
@@ -45,52 +43,53 @@ def onQueryTerminated(self, event):
         df.write.mode("append").saveAsTable("listener_terminated_events")
 
 
-# TODO(SPARK-48089): Reenable this test case
-@unittest.skipIf(
-    "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
-)
 class StreamingListenerParityTests(StreamingListenerTestsMixin, ReusedConnectTestCase):
     def test_listener_events(self):
-        test_listener = TestListener()
+        test_listener = TestListenerSpark()
 
         try:
-            self.spark.streams.addListener(test_listener)
-
-            # This ensures the read socket on the server won't crash (i.e. because of timeout)
-            # when there hasn't been a new event for a long time
-            time.sleep(30)
-
-            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
-            df_observe = df.observe("my_event", count(lit(1)).alias("rc"))
-            df_stateful = df_observe.groupBy().count()  # make query stateful
-            q = (
-                df_stateful.writeStream.format("noop")
-                .queryName("test")
-                .outputMode("complete")
-                .start()
-            )
-
-            self.assertTrue(q.isActive)
-            time.sleep(10)
-            self.assertTrue(q.lastProgress["batchId"] > 0)  # ensure at least one batch is ran
-            q.stop()
-            self.assertFalse(q.isActive)
-
-            start_event = pyspark.cloudpickle.loads(
-                self.spark.read.table("listener_start_events").collect()[0][0]
-            )
-
-            progress_event = pyspark.cloudpickle.loads(
-                self.spark.read.table("listener_progress_events").collect()[0][0]
-            )
-
-            terminated_event = pyspark.cloudpickle.loads(
-                self.spark.read.table("listener_terminated_events").collect()[0][0]
-            )
-
-            self.check_start_event(start_event)
-            self.check_progress_event(progress_event)
-            self.check_terminated_event(terminated_event)
+            with self.table(
+                "listener_start_events",
+                "listener_progress_events",
+                "listener_terminated_events",
+            ):
+                self.spark.streams.addListener(test_listener)
+
+                # This ensures the read socket on the server won't crash (i.e. because of timeout)
+                # when there hasn't been a new event for a long time
+                time.sleep(30)
+
+                df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+                df_observe = df.observe("my_event", count(lit(1)).alias("rc"))
+                df_stateful = df_observe.groupBy().count()  # make query stateful
+                q = (
+                    df_stateful.writeStream.format("noop")
+                    .queryName("test")
+                    .outputMode("complete")
+                    .start()
+                )
+
+                self.assertTrue(q.isActive)
+                time.sleep(10)
+                self.assertTrue(q.lastProgress["batchId"] > 0)  # ensure at least one batch is ran
+                q.stop()
+                self.assertFalse(q.isActive)
+
+                start_event = pyspark.cloudpickle.loads(
+                    self.spark.read.table("listener_start_events").collect()[0][0]
+                )
+
+                progress_event = pyspark.cloudpickle.loads(
+                    self.spark.read.table("listener_progress_events").collect()[0][0]
+                )
+
+                terminated_event = pyspark.cloudpickle.loads(
+                    self.spark.read.table("listener_terminated_events").collect()[0][0]
+                )
+
+                self.check_start_event(start_event)
+                self.check_progress_event(progress_event)
+                self.check_terminated_event(terminated_event)
 
         finally:
             self.spark.streams.removeListener(test_listener)

From c048653435f9b7c832f79d38a504a145a17654c0 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Thu, 9 May 2024 22:55:07 -0700
Subject: [PATCH 334/521] [SPARK-47847][CORE] Deprecate
 `spark.network.remoteReadNioBufferConversion`

### What changes were proposed in this pull request?

`spark.network.remoteReadNioBufferConversion` was introduced in https://github.com/apache/spark/commit/2c82745686f4456c4d5c84040a431dcb5b6cb60b, to allow disable [SPARK-24307](https://issues.apache.org/jira/browse/SPARK-24307) for safety, while during the whole Spark 3 period, there are no negative reports, it proves that [SPARK-24307](https://issues.apache.org/jira/browse/SPARK-24307) is solid enough, I propose to mark it deprecated in 3.5.2 and remove in 4.1.0 or later

### Why are the changes needed?

Code clean up

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass GA.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #46047 from pan3793/SPARK-47847.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 33cac4436e593c9c501c5ff0eedf923d3a21899c)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 core/src/main/scala/org/apache/spark/SparkConf.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 813a14acd19e4..f49e9e357c84d 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -638,7 +638,9 @@ private[spark] object SparkConf extends Logging {
       DeprecatedConfig("spark.blacklist.killBlacklistedExecutors", "3.1.0",
         "Please use spark.excludeOnFailure.killExcludedExecutors"),
       DeprecatedConfig("spark.yarn.blacklist.executor.launch.blacklisting.enabled", "3.1.0",
-        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled")
+        "Please use spark.yarn.executor.launch.excludeOnFailure.enabled"),
+      DeprecatedConfig("spark.network.remoteReadNioBufferConversion", "3.5.2",
+        "Please open a JIRA ticket to report it if you need to use this configuration.")
     )
 
     Map(configs.map { cfg => (cfg.key -> cfg) } : _*)

From e9a1b4254419c751e612cd5e5c56f111b41399e7 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Fri, 10 May 2024 19:54:29 -0700
Subject: [PATCH 335/521] [SPARK-48237][BUILD] Clean up `dev/pr-deps` at the
 end of `test-dependencies.sh` script

### What changes were proposed in this pull request?
The pr aims to delete the dir `dev/pr-deps` after executing `test-dependencies.sh`.

### Why are the changes needed?
We'd better clean the `temporary files` generated at the end.
Before:
```
sh dev/test-dependencies.sh
```
<img width="569" alt="image" src="https://github.com/apache/spark/assets/15246973/39a56983-774c-4c2d-897d-26a7d0999456">

After:
```
sh dev/test-dependencies.sh
```
<img width="534" alt="image" src="https://github.com/apache/spark/assets/15246973/f7e76e22-63cf-4411-99d0-5e844f8d5a7a">

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Manually test.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46531 from panbingkun/minor_test-dependencies.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit f699f556d8a09bb755e9c8558661a36fbdb42e73)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/test-dependencies.sh | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index d7967ac3afa90..36cc7a4f994dc 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -140,4 +140,8 @@ for HADOOP_HIVE_PROFILE in "${HADOOP_HIVE_PROFILES[@]}"; do
   fi
 done
 
+if [[ -d "$FWDIR/dev/pr-deps" ]]; then
+  rm -rf "$FWDIR/dev/pr-deps"
+fi
+
 exit 0

From ab511a784db2d2b9d0980b63a02fea8f472ceb76 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 13 May 2024 17:15:28 +0900
Subject: [PATCH 336/521] [SPARK-48248][PYTHON] Fix nested array to respect
 legacy conf of inferArrayTypeFromFirstElement

This PR fixes a bug that does not respect `spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` in nested arrays, introduced by https://github.com/apache/spark/pull/36545.

To have a way to restore the original behaviour.

Yes, it fixes the regression when `spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled` is set to `True`.

Unittest added.

No.

Closes #46548 from HyukjinKwon/SPARK-48248.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit b2140d0f25d81e64a968df83c5da5089051acaac)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_types.py |  7 +++++++
 python/pyspark/sql/types.py            | 18 ++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 00bd1d9a6f834..ad458988d4fd1 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -1275,6 +1275,13 @@ def test_yearmonth_interval_type(self):
         schema3 = self.spark.sql("SELECT INTERVAL '8' MONTH AS interval").schema
         self.assertEqual(schema3.fields[0].dataType, YearMonthIntervalType(1, 1))
 
+    def test_infer_array_element_type_with_struct(self):
+        # SPARK-48248: Nested array to respect legacy conf of inferArrayTypeFromFirstElement
+        with self.sql_conf(
+            {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
+        ):
+            self.assertEqual([[1, None]], self.spark.createDataFrame([[[[1, "a"]]]]).first()[0])
+
 
 class DataTypeTests(unittest.TestCase):
     # regression test for SPARK-6055
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 24964c56e2e89..a2a8796957623 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1606,13 +1606,27 @@ def _infer_type(
         if len(obj) > 0:
             if infer_array_from_first_element:
                 return ArrayType(
-                    _infer_type(obj[0], infer_dict_as_struct, prefer_timestamp_ntz), True
+                    _infer_type(
+                        obj[0],
+                        infer_dict_as_struct,
+                        infer_array_from_first_element,
+                        prefer_timestamp_ntz,
+                    ),
+                    True,
                 )
             else:
                 return ArrayType(
                     reduce(
                         _merge_type,
-                        (_infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz) for v in obj),
+                        (
+                            _infer_type(
+                                v,
+                                infer_dict_as_struct,
+                                infer_array_from_first_element,
+                                prefer_timestamp_ntz,
+                            )
+                            for v in obj
+                        ),
                     ),
                     True,
                 )

From 19d12b249f0fe4cb5b20b9722188c5a850147cec Mon Sep 17 00:00:00 2001
From: "joey.ljy" <joey.ljy@alibaba-inc.com>
Date: Tue, 14 May 2024 13:06:57 +0800
Subject: [PATCH 337/521] [SPARK-48241][SQL][3.5] CSV parsing failure with
 char/varchar type columns

### What changes were proposed in this pull request?
CSV table containing char and varchar columns will result in the following error when selecting from the CSV table:
```
spark-sql (default)> show create table test_csv;
CREATE TABLE default.test_csv (
  id INT,
  name CHAR(10))
USING csv
```
```
java.lang.IllegalArgumentException: requirement failed: requiredSchema (struct<id:int,name:string>) should be the subset of dataSchema (struct<id:int,name:string>).
    at scala.Predef$.require(Predef.scala:281)
    at org.apache.spark.sql.catalyst.csv.UnivocityParser.<init>(UnivocityParser.scala:56)
    at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat.$anonfun$buildReader$2(CSVFileFormat.scala:127)
    at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:155)
    at org.apache.spark.sql.execution.datasources.FileFormat$$anon$1.apply(FileFormat.scala:140)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:231)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:293)
    at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:125)
```

### Why are the changes needed?
For char and varchar types, Spark will convert them to `StringType` in `CharVarcharUtils.replaceCharVarcharWithStringInSchema` and record `__CHAR_VARCHAR_TYPE_STRING` in the metadata.

The reason for the above error is that the `StringType` columns in the `dataSchema` and `requiredSchema` of `UnivocityParser` are not consistent. The `StringType` in the `dataSchema` has metadata, while the metadata in the `requiredSchema` is empty. We need to retain the metadata when resolving schema.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Add a new test case in `CSVSuite`.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46565 from liujiayi771/branch-3.5-SPARK-48241.

Authored-by: joey.ljy <joey.ljy@alibaba-inc.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/plans/logical/LogicalPlan.scala  |  4 +++-
 .../src/test/resources/test-data/char.csv     |  4 ++++
 .../execution/datasources/csv/CSVSuite.scala  | 24 +++++++++++++++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 sql/core/src/test/resources/test-data/char.csv

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 374eb070db1c9..7fe8bd356ea94 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -116,7 +116,9 @@ abstract class LogicalPlan
   def resolve(schema: StructType, resolver: Resolver): Seq[Attribute] = {
     schema.map { field =>
       resolve(field.name :: Nil, resolver).map {
-        case a: AttributeReference => a
+        case a: AttributeReference =>
+          // Keep the metadata in given schema.
+          a.withMetadata(field.metadata)
         case _ => throw QueryExecutionErrors.resolveCannotHandleNestedSchema(this)
       }.getOrElse {
         throw QueryCompilationErrors.cannotResolveAttributeError(
diff --git a/sql/core/src/test/resources/test-data/char.csv b/sql/core/src/test/resources/test-data/char.csv
new file mode 100644
index 0000000000000..d2be68a15fc12
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/char.csv
@@ -0,0 +1,4 @@
+color,name
+pink,Bob
+blue,Mike
+grey,Tom
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index a91adb787838e..3762c00ff1a19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -80,6 +80,7 @@ abstract class CSVSuite
   private val valueMalformedFile = "test-data/value-malformed.csv"
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
   private val malformedRowFile = "test-data/malformedRow.csv"
+  private val charFile = "test-data/char.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -3226,6 +3227,29 @@ abstract class CSVSuite
       }
     }
   }
+
+  test("SPARK-48241: CSV parsing failure with char/varchar type columns") {
+    withTable("charVarcharTable") {
+      spark.sql(
+        s"""
+           |CREATE TABLE charVarcharTable(
+           |  color char(4),
+           |  name varchar(10))
+           |USING csv
+           |OPTIONS (
+           |  header "true",
+           |  path "${testFile(charFile)}"
+           |)
+       """.stripMargin)
+      val expected = Seq(
+        Row("pink", "Bob"),
+        Row("blue", "Mike"),
+        Row("grey", "Tom"))
+      checkAnswer(
+        sql("SELECT * FROM charVarcharTable"),
+        expected)
+    }
+  }
 }
 
 class CSVv1Suite extends CSVSuite {

From 34588a82239a5c12fefed13e271edd963b821b1c Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Tue, 14 May 2024 13:44:47 +0800
Subject: [PATCH 338/521] [SPARK-48265][SQL] Infer window group limit batch
 should do constant folding

### What changes were proposed in this pull request?
Plan after PropagateEmptyRelation may generate double local limit
```
 GlobalLimit 21
 +- LocalLimit 21
!   +- Union false, false
!      :- LocalLimit 21
!      :  +- Project [item_id#647L]
!      :     +- Filter (xxxx)
!      :        +- Relation db.table[,... 91 more fields] parquet
!      +- LocalLimit 21
!         +- Project [item_id#738L]
!            +- LocalRelation <empty>, [, ... 91 more fields]
```
to
```
 GlobalLimit 21
    +- LocalLimit 21
       - LocalLimit 21
          +- Project [item_id#647L]
            +- Filter (xxxx)
               +- Relation db.table[,... 91 more fields] parquet
```
after `Infer window group limit batch` batch's `EliminateLimits`
will be
```
 GlobalLimit 21
    +- LocalLimit least(21, 21)
          +- Project [item_id#647L]
            +- Filter (xxxx)
               +- Relation db.table[,... 91 more fields] parquet
```
It can't work, here miss a `ConstantFolding`

### Why are the changes needed?
Fix bug

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46568 from AngersZhuuuu/SPARK-48265.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 7974811218c9fb52ac9d07f8983475a885ada81b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/sql/execution/SparkOptimizer.scala  | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 70a35ea911538..6173703ef3cd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -89,7 +89,8 @@ class SparkOptimizer(
       InferWindowGroupLimit,
       LimitPushDown,
       LimitPushDownThroughWindow,
-      EliminateLimits) :+
+      EliminateLimits,
+      ConstantFolding) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
     Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
 

From 172a23f780ae2a603908421b49683aff6748e419 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Tue, 14 May 2024 15:40:51 +0900
Subject: [PATCH 339/521] [SPARK-48267][SS] Regression e2e test with
 SPARK-47305

### What changes were proposed in this pull request?

This PR proposes to add a regression test (e2e) with SPARK-47305.

As of commit cae2248bc13 (pre-Spark 4.0), the query in new unit test is represented as below logical plans:

> Batch 0

>> analyzed plan

```
WriteToMicroBatchDataSource MemorySink, 5067923b-e1d0-484c-914c-b111c9e60aac, Append, 0
+- Project [value#1]
   +- Join Inner, (cast(code#5 as bigint) = ref_code#14L)
      :- Union false, false
      :  :- Project [value#1, 1 AS code#5]
      :  :  +- StreamingDataSourceV2ScanRelation[value#1] MemoryStreamDataSource
      :  +- Project [value#3, cast(code#9 as int) AS code#16]
      :     +- Project [value#3, null AS code#9]
      :        +- LocalRelation <empty>, [value#3]
      +- Project [id#12L AS ref_code#14L]
         +- Range (1, 5, step=1, splits=Some(2))
```

>> optimized plan

```
WriteToDataSourceV2 MicroBatchWrite[epoch: 0, writer: ...]
+- Join Inner
   :- StreamingDataSourceV2ScanRelation[value#1] MemoryStreamDataSource
   +- Project
      +- Filter (1 = id#12L)
         +- Range (1, 5, step=1, splits=Some(2))
```

> Batch 1

>> analyzed plan

```
WriteToMicroBatchDataSource MemorySink, d1c8be66-88e7-437a-9f25-6b87db8efe17, Append, 1
+- Project [value#1]
   +- Join Inner, (cast(code#5 as bigint) = ref_code#14L)
      :- Union false, false
      :  :- Project [value#1, 1 AS code#5]
      :  :  +- LocalRelation <empty>, [value#1]
      :  +- Project [value#3, cast(code#9 as int) AS code#16]
      :     +- Project [value#3, null AS code#9]
      :        +- StreamingDataSourceV2ScanRelation[value#3] MemoryStreamDataSource
      +- Project [id#12L AS ref_code#14L]
         +- Range (1, 5, step=1, splits=Some(2))
```

>> optimized plan

```
WriteToDataSourceV2 MicroBatchWrite[epoch: 1, writer: ...]
+- Join Inner
   :- StreamingDataSourceV2ScanRelation[value#3] MemoryStreamDataSource
   +- LocalRelation <empty>
```

Notice the difference in optimized plan between batch 0 and batch 1. In optimized plan for batch 1, the batch side is pruned out, which goes with the path of PruneFilters. The sequence of optimization is,

1) left stream side is collapsed with empty local relation
2) union is replaced with subtree for right stream side as left stream side is simply an empty local relation
3) the value of 'code' column is now known to be 'null' and it's propagated to the join criteria (`null = ref_code`)
4) join criteria is extracted out from join, and being pushed to the batch side
5) the value of 'ref_code' column can never be null, hence the filter is optimized as `filter false`
6) `filter false` triggers PruneFilters (where we fix a bug in SPARK-47305)

Before SPARK-47305, a new empty local relation was incorrectly marked as streaming.

NOTE: I intentionally didn't put the detail like above as code comment, as optimization result is subject to change for Spark versions.

### Why are the changes needed?

In the PR of SPARK-47305 we only added an unit test to verify the fix, but it wasn't e2e about the workload we encountered an issue. Given the complexity of QO, it'd be ideal to put an e2e reproducer (despite simplified) as regression test.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New UT.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46569 from HeartSaVioR/SPARK-48267.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 ...ingQueryOptimizationCorrectnessSuite.scala | 37 ++++++++++++++++++-
 1 file changed, 36 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
index efc84c8e4c7cf..d17da5d31edd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -21,7 +21,7 @@ import java.sql.Timestamp
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions.{lit, window}
+import org.apache.spark.sql.functions.{expr, lit, window}
 
 /**
  * This test ensures that any optimizations done by Spark SQL optimizer are
@@ -416,4 +416,39 @@ class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
       )
     }
   }
+
+  test("SPARK-48267: regression test, stream-stream union followed by stream-batch join") {
+    withTempDir { dir =>
+      val input1 = MemoryStream[Int]
+      val input2 = MemoryStream[Int]
+
+      val df1 = input1.toDF().withColumn("code", lit(1))
+      val df2 = input2.toDF().withColumn("code", lit(null))
+
+      // NOTE: The column 'ref_code' is known to be non-nullable.
+      val batchDf = spark.range(1, 5).select($"id".as("ref_code"))
+
+      val unionDf = df1.union(df2)
+        .join(batchDf, expr("code = ref_code"))
+        .select("value")
+
+      testStream(unionDf)(
+        StartStream(checkpointLocation = dir.getAbsolutePath),
+
+        AddData(input1, 1, 2, 3),
+        CheckNewAnswer(1, 2, 3),
+
+        AddData(input2, 1, 2, 3),
+        // The test failed before SPARK-47305 - the test failed with below error message:
+        // org.apache.spark.sql.streaming.StreamingQueryException: Stream-stream join without
+        // equality predicate is not supported.;
+        // Join Inner
+        // :- StreamingDataSourceV2ScanRelation[value#3] MemoryStreamDataSource
+        // +- LocalRelation <empty>
+        // Note that LocalRelation <empty> is actually a batch source (Range) but due to
+        // a bug, it was incorrect marked to the streaming. SPARK-47305 fixed the bug.
+        CheckNewAnswer()
+      )
+    }
+  }
 }

From f37fa436cd4e0ef9f486a60f9af91a3ce0195df9 Mon Sep 17 00:00:00 2001
From: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Date: Tue, 14 May 2024 23:31:46 +0800
Subject: [PATCH 340/521] [SPARK-48172][SQL] Fix escaping issues in JDBC
 Dialects

Special case escaping for MySQL and fix issues with redundant escaping for ' character.

When pushing down startsWith, endsWith and contains they are converted to LIKE. This requires addition of escape characters for these expressions. Unfortunately, MySQL uses ESCAPE '\\' syntax instead of ESCAPE '\' which would cause errors when trying to push down.

Yes

Tests for each existing dialect.

No.

Closes #46437 from mihailom-db/SPARK-48172.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 47006a493f98ca85196194d16d58b5847177b1a3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |   6 +
 .../v2/DockerJDBCIntegrationV2Suite.scala     |  11 +
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |   6 +
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |   6 +
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  |   6 +
 .../jdbc/v2/PostgresIntegrationSuite.scala    |   6 +
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 229 ++++++++++++++++++
 .../util/V2ExpressionSQLBuilder.java          |   3 -
 .../connector/expressions/expressions.scala   |   4 +-
 .../org/apache/spark/sql/jdbc/H2Dialect.scala |   7 -
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  15 ++
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   |   6 +-
 12 files changed, 291 insertions(+), 14 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 9a78244f53266..9b4916ddd36b6 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -80,6 +80,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
index 72edfc9f1bf1c..a42caeafe6fee 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -38,6 +38,17 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
     connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
       .executeUpdate()
+
+    connection.prepareStatement(
+      s"""
+         |INSERT INTO pattern_testing_table VALUES
+         |('special_character_quote\\'_present'),
+         |('special_character_quote_not_present'),
+         |('special_character_percent%_present'),
+         |('special_character_percent_not_present'),
+         |('special_character_underscore_present'),
+         |('special_character_underscorenot_present')
+             """.stripMargin).executeUpdate()
   }
 
   def tablePreparation(connection: Connection): Unit
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 0dc3a39f4db5d..57a2667557fa2 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -86,6 +86,12 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def notSupportsTableComment: Boolean = true
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index f6f264804e7db..faf9f14b260d4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -88,6 +88,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
         " bonus DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 33ce55b1c6c9f..83863b840f608 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -106,6 +106,12 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
     connection.prepareStatement(
       "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
         " bonus BINARY_DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 1f09c2fd3fc59..24ad97af64498 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -59,6 +59,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
         " bonus double precision)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index b8671455ac6f8..4cf8f6dbda2bd 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -358,6 +358,235 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     assert(scan.schema.names.sameElements(Seq(col)))
   }
 
+  test("SPARK-48172: Test CONTAINS") {
+    val df1 = spark.sql(
+      s"""
+         |SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin)
+    df1.explain("formatted")
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE contains(pattern_testing_col, 'character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 1)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test ENDSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE endswith(pattern_testing_col, 'present')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 1)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test STARTSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE startswith(pattern_testing_col, 'special_character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 1)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test LIKE") {
+    // this one should map to contains
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE '%character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 1)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+
+    // map to startsWith
+    // this one should map to contains
+    val df5 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin)
+    val rows5 = df5.collect()
+    assert(rows5.length === 1)
+    assert(rows5(0).getString(0) === "special_character_quote'_present")
+
+    val df6 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin)
+    val rows6 = df6.collect()
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(0) === "special_character_percent%_present")
+
+    val df7 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin)
+    val rows7 = df7.collect()
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) === "special_character_underscore_present")
+
+    val df8 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE 'special_character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows8 = df8.collect()
+    assert(rows8.length === 1)
+    assert(rows8(0).getString(0) === "special_character_percent%_present")
+    assert(rows8(1).getString(0) === "special_character_percent_not_present")
+    assert(rows8(2).getString(0) === "special_character_quote'_present")
+    assert(rows8(3).getString(0) === "special_character_quote_not_present")
+    assert(rows8(4).getString(0) === "special_character_underscore_present")
+    assert(rows8(5).getString(0) === "special_character_underscorenot_present")
+    // map to endsWith
+    // this one should map to contains
+    val df9 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin)
+    val rows9 = df9.collect()
+    assert(rows9.length === 1)
+    assert(rows9(0).getString(0) === "special_character_quote'_present")
+
+    val df10 = spark.sql(
+      s"""SELECT * FROM $catalogName.pattern_testing_table
+         |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin)
+    val rows10 = df10.collect()
+    assert(rows10.length === 1)
+    assert(rows10(0).getString(0) === "special_character_percent%_present")
+
+    val df11 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin)
+    val rows11 = df11.collect()
+    assert(rows11.length === 1)
+    assert(rows11(0).getString(0) === "special_character_underscore_present")
+
+    val df12 = spark.
+      sql(
+        s"""SELECT * FROM $catalogName.pattern_testing_table
+           |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin)
+    val rows12 = df12.collect()
+    assert(rows12.length === 1)
+    assert(rows12(0).getString(0) === "special_character_percent%_present")
+    assert(rows12(1).getString(0) === "special_character_percent_not_present")
+    assert(rows12(2).getString(0) === "special_character_quote'_present")
+    assert(rows12(3).getString(0) === "special_character_quote_not_present")
+    assert(rows12(4).getString(0) === "special_character_underscore_present")
+    assert(rows12(5).getString(0) === "special_character_underscorenot_present")
+  }
+
   test("SPARK-37038: Test TABLESAMPLE") {
     if (supportsTableSample) {
       withTable(s"$catalogName.new_table") {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index dcb3c706946c5..e170951bfa284 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -66,9 +66,6 @@ protected String escapeSpecialCharsForLikePattern(String str) {
         case '%':
           builder.append("\\%");
           break;
-        case '\'':
-          builder.append("\\\'");
-          break;
         default:
           builder.append(c);
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index fbd2520e2a774..7f536bdb712a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connector.expressions
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -350,7 +352,7 @@ private[sql] object HoursTransform {
 private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
   override def toString: String = {
     if (dataType.isInstanceOf[StringType]) {
-      s"'$value'"
+      s"'${StringUtils.replace(s"$value", "'", "''")}'"
     } else {
       s"$value"
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 3f56eb035f5c3..336220922b5e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -254,13 +254,6 @@ private[sql] object H2Dialect extends JdbcDialect {
   }
 
   class H2SQLBuilder extends JDBCSQLBuilder {
-    override def escapeSpecialCharsForLikePattern(str: String): String = {
-      str.map {
-        case '_' => "\\_"
-        case '%' => "\\%"
-        case c => c.toString
-      }.mkString
-    }
 
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index ef2be1c9c5c6c..5d9ff94838f15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -65,6 +65,21 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       }
     }
 
+    override def visitStartsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
+    override def visitEndsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'"
+    }
+
+    override def visitContains(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
       if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 51a15881088b5..5d2108d2b8fce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -1278,7 +1278,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
     checkFiltersRemoved(df5)
     checkPushedInfo(df5,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_''\%%' ESCAPE '\']")
     checkAnswer(df5, Seq(Row("abc_'%def@gmail.com")))
 
     val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com"))
@@ -1309,7 +1309,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com"))
     checkFiltersRemoved(df10)
     checkPushedInfo(df10,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_''\%def@gmail.com' ESCAPE '\']")
     checkAnswer(df10, Seq(Row("abc_'%def@gmail.com")))
 
     val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
@@ -1337,7 +1337,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d"))
     checkFiltersRemoved(df15)
     checkPushedInfo(df15,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']")
     checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
   }
 

From 74724d61c3d04925da6faa5d49643619aa14f206 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 15 May 2024 10:09:09 +0800
Subject: [PATCH 341/521] Revert "[SPARK-48172][SQL] Fix escaping issues in
 JDBC Dialects"

This reverts commit f37fa436cd4e0ef9f486a60f9af91a3ce0195df9.
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |   6 -
 .../v2/DockerJDBCIntegrationV2Suite.scala     |  11 -
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |   6 -
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |   6 -
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  |   6 -
 .../jdbc/v2/PostgresIntegrationSuite.scala    |   6 -
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 229 ------------------
 .../util/V2ExpressionSQLBuilder.java          |   3 +
 .../connector/expressions/expressions.scala   |   4 +-
 .../org/apache/spark/sql/jdbc/H2Dialect.scala |   7 +
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  15 --
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   |   6 +-
 12 files changed, 14 insertions(+), 291 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 9b4916ddd36b6..9a78244f53266 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -80,12 +80,6 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
       .executeUpdate()
-    connection.prepareStatement(
-      s"""CREATE TABLE pattern_testing_table (
-         |pattern_testing_col LONGTEXT
-         |)
-                   """.stripMargin
-    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
index a42caeafe6fee..72edfc9f1bf1c 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -38,17 +38,6 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
     connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
       .executeUpdate()
-
-    connection.prepareStatement(
-      s"""
-         |INSERT INTO pattern_testing_table VALUES
-         |('special_character_quote\\'_present'),
-         |('special_character_quote_not_present'),
-         |('special_character_percent%_present'),
-         |('special_character_percent_not_present'),
-         |('special_character_underscore_present'),
-         |('special_character_underscorenot_present')
-             """.stripMargin).executeUpdate()
   }
 
   def tablePreparation(connection: Connection): Unit
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 57a2667557fa2..0dc3a39f4db5d 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -86,12 +86,6 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)")
       .executeUpdate()
-    connection.prepareStatement(
-      s"""CREATE TABLE pattern_testing_table (
-         |pattern_testing_col LONGTEXT
-         |)
-                   """.stripMargin
-    ).executeUpdate()
   }
 
   override def notSupportsTableComment: Boolean = true
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index faf9f14b260d4..f6f264804e7db 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -88,12 +88,6 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
         " bonus DOUBLE)").executeUpdate()
-    connection.prepareStatement(
-      s"""CREATE TABLE pattern_testing_table (
-         |pattern_testing_col LONGTEXT
-         |)
-                   """.stripMargin
-    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 83863b840f608..33ce55b1c6c9f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -106,12 +106,6 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
     connection.prepareStatement(
       "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
         " bonus BINARY_DOUBLE)").executeUpdate()
-    connection.prepareStatement(
-      s"""CREATE TABLE pattern_testing_table (
-         |pattern_testing_col LONGTEXT
-         |)
-                   """.stripMargin
-    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 24ad97af64498..1f09c2fd3fc59 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -59,12 +59,6 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
         " bonus double precision)").executeUpdate()
-    connection.prepareStatement(
-      s"""CREATE TABLE pattern_testing_table (
-         |pattern_testing_col LONGTEXT
-         |)
-                   """.stripMargin
-    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 4cf8f6dbda2bd..b8671455ac6f8 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -358,235 +358,6 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     assert(scan.schema.names.sameElements(Seq(col)))
   }
 
-  test("SPARK-48172: Test CONTAINS") {
-    val df1 = spark.sql(
-      s"""
-         |SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin)
-    df1.explain("formatted")
-    val rows1 = df1.collect()
-    assert(rows1.length === 1)
-    assert(rows1(0).getString(0) === "special_character_quote'_present")
-
-    val df2 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin)
-    val rows2 = df2.collect()
-    assert(rows2.length === 1)
-    assert(rows2(0).getString(0) === "special_character_percent%_present")
-
-    val df3 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin)
-    val rows3 = df3.collect()
-    assert(rows3.length === 1)
-    assert(rows3(0).getString(0) === "special_character_underscore_present")
-
-    val df4 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE contains(pattern_testing_col, 'character')
-           |ORDER BY pattern_testing_col""".stripMargin)
-    val rows4 = df4.collect()
-    assert(rows4.length === 1)
-    assert(rows4(0).getString(0) === "special_character_percent%_present")
-    assert(rows4(1).getString(0) === "special_character_percent_not_present")
-    assert(rows4(2).getString(0) === "special_character_quote'_present")
-    assert(rows4(3).getString(0) === "special_character_quote_not_present")
-    assert(rows4(4).getString(0) === "special_character_underscore_present")
-    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
-  }
-
-  test("SPARK-48172: Test ENDSWITH") {
-    val df1 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin)
-    val rows1 = df1.collect()
-    assert(rows1.length === 1)
-    assert(rows1(0).getString(0) === "special_character_quote'_present")
-
-    val df2 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin)
-    val rows2 = df2.collect()
-    assert(rows2.length === 1)
-    assert(rows2(0).getString(0) === "special_character_percent%_present")
-
-    val df3 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin)
-    val rows3 = df3.collect()
-    assert(rows3.length === 1)
-    assert(rows3(0).getString(0) === "special_character_underscore_present")
-
-    val df4 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE endswith(pattern_testing_col, 'present')
-           |ORDER BY pattern_testing_col""".stripMargin)
-    val rows4 = df4.collect()
-    assert(rows4.length === 1)
-    assert(rows4(0).getString(0) === "special_character_percent%_present")
-    assert(rows4(1).getString(0) === "special_character_percent_not_present")
-    assert(rows4(2).getString(0) === "special_character_quote'_present")
-    assert(rows4(3).getString(0) === "special_character_quote_not_present")
-    assert(rows4(4).getString(0) === "special_character_underscore_present")
-    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
-  }
-
-  test("SPARK-48172: Test STARTSWITH") {
-    val df1 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin)
-    val rows1 = df1.collect()
-    assert(rows1.length === 1)
-    assert(rows1(0).getString(0) === "special_character_quote'_present")
-
-    val df2 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin)
-    val rows2 = df2.collect()
-    assert(rows2.length === 1)
-    assert(rows2(0).getString(0) === "special_character_percent%_present")
-
-    val df3 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin)
-    val rows3 = df3.collect()
-    assert(rows3.length === 1)
-    assert(rows3(0).getString(0) === "special_character_underscore_present")
-
-    val df4 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE startswith(pattern_testing_col, 'special_character')
-           |ORDER BY pattern_testing_col""".stripMargin)
-    val rows4 = df4.collect()
-    assert(rows4.length === 1)
-    assert(rows4(0).getString(0) === "special_character_percent%_present")
-    assert(rows4(1).getString(0) === "special_character_percent_not_present")
-    assert(rows4(2).getString(0) === "special_character_quote'_present")
-    assert(rows4(3).getString(0) === "special_character_quote_not_present")
-    assert(rows4(4).getString(0) === "special_character_underscore_present")
-    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
-  }
-
-  test("SPARK-48172: Test LIKE") {
-    // this one should map to contains
-    val df1 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin)
-    val rows1 = df1.collect()
-    assert(rows1.length === 1)
-    assert(rows1(0).getString(0) === "special_character_quote'_present")
-
-    val df2 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin)
-    val rows2 = df2.collect()
-    assert(rows2.length === 1)
-    assert(rows2(0).getString(0) === "special_character_percent%_present")
-
-    val df3 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin)
-    val rows3 = df3.collect()
-    assert(rows3.length === 1)
-    assert(rows3(0).getString(0) === "special_character_underscore_present")
-
-    val df4 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE '%character%'
-           |ORDER BY pattern_testing_col""".stripMargin)
-    val rows4 = df4.collect()
-    assert(rows4.length === 1)
-    assert(rows4(0).getString(0) === "special_character_percent%_present")
-    assert(rows4(1).getString(0) === "special_character_percent_not_present")
-    assert(rows4(2).getString(0) === "special_character_quote'_present")
-    assert(rows4(3).getString(0) === "special_character_quote_not_present")
-    assert(rows4(4).getString(0) === "special_character_underscore_present")
-    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
-
-    // map to startsWith
-    // this one should map to contains
-    val df5 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin)
-    val rows5 = df5.collect()
-    assert(rows5.length === 1)
-    assert(rows5(0).getString(0) === "special_character_quote'_present")
-
-    val df6 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin)
-    val rows6 = df6.collect()
-    assert(rows6.length === 1)
-    assert(rows6(0).getString(0) === "special_character_percent%_present")
-
-    val df7 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin)
-    val rows7 = df7.collect()
-    assert(rows7.length === 1)
-    assert(rows7(0).getString(0) === "special_character_underscore_present")
-
-    val df8 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE 'special_character%'
-           |ORDER BY pattern_testing_col""".stripMargin)
-    val rows8 = df8.collect()
-    assert(rows8.length === 1)
-    assert(rows8(0).getString(0) === "special_character_percent%_present")
-    assert(rows8(1).getString(0) === "special_character_percent_not_present")
-    assert(rows8(2).getString(0) === "special_character_quote'_present")
-    assert(rows8(3).getString(0) === "special_character_quote_not_present")
-    assert(rows8(4).getString(0) === "special_character_underscore_present")
-    assert(rows8(5).getString(0) === "special_character_underscorenot_present")
-    // map to endsWith
-    // this one should map to contains
-    val df9 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin)
-    val rows9 = df9.collect()
-    assert(rows9.length === 1)
-    assert(rows9(0).getString(0) === "special_character_quote'_present")
-
-    val df10 = spark.sql(
-      s"""SELECT * FROM $catalogName.pattern_testing_table
-         |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin)
-    val rows10 = df10.collect()
-    assert(rows10.length === 1)
-    assert(rows10(0).getString(0) === "special_character_percent%_present")
-
-    val df11 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin)
-    val rows11 = df11.collect()
-    assert(rows11.length === 1)
-    assert(rows11(0).getString(0) === "special_character_underscore_present")
-
-    val df12 = spark.
-      sql(
-        s"""SELECT * FROM $catalogName.pattern_testing_table
-           |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin)
-    val rows12 = df12.collect()
-    assert(rows12.length === 1)
-    assert(rows12(0).getString(0) === "special_character_percent%_present")
-    assert(rows12(1).getString(0) === "special_character_percent_not_present")
-    assert(rows12(2).getString(0) === "special_character_quote'_present")
-    assert(rows12(3).getString(0) === "special_character_quote_not_present")
-    assert(rows12(4).getString(0) === "special_character_underscore_present")
-    assert(rows12(5).getString(0) === "special_character_underscorenot_present")
-  }
-
   test("SPARK-37038: Test TABLESAMPLE") {
     if (supportsTableSample) {
       withTable(s"$catalogName.new_table") {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index e170951bfa284..dcb3c706946c5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -66,6 +66,9 @@ protected String escapeSpecialCharsForLikePattern(String str) {
         case '%':
           builder.append("\\%");
           break;
+        case '\'':
+          builder.append("\\\'");
+          break;
         default:
           builder.append(c);
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index 7f536bdb712a4..fbd2520e2a774 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.connector.expressions
 
-import org.apache.commons.lang3.StringUtils
-
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -352,7 +350,7 @@ private[sql] object HoursTransform {
 private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
   override def toString: String = {
     if (dataType.isInstanceOf[StringType]) {
-      s"'${StringUtils.replace(s"$value", "'", "''")}'"
+      s"'$value'"
     } else {
       s"$value"
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 336220922b5e0..3f56eb035f5c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -254,6 +254,13 @@ private[sql] object H2Dialect extends JdbcDialect {
   }
 
   class H2SQLBuilder extends JDBCSQLBuilder {
+    override def escapeSpecialCharsForLikePattern(str: String): String = {
+      str.map {
+        case '_' => "\\_"
+        case '%' => "\\%"
+        case c => c.toString
+      }.mkString
+    }
 
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 5d9ff94838f15..ef2be1c9c5c6c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -65,21 +65,6 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       }
     }
 
-    override def visitStartsWith(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
-    }
-
-    override def visitEndsWith(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'"
-    }
-
-    override def visitContains(l: String, r: String): String = {
-      val value = r.substring(1, r.length() - 1)
-      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
-    }
-
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
       if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 5d2108d2b8fce..51a15881088b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -1278,7 +1278,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
     checkFiltersRemoved(df5)
     checkPushedInfo(df5,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_''\%%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']")
     checkAnswer(df5, Seq(Row("abc_'%def@gmail.com")))
 
     val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com"))
@@ -1309,7 +1309,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com"))
     checkFiltersRemoved(df10)
     checkPushedInfo(df10,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_''\%def@gmail.com' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']")
     checkAnswer(df10, Seq(Row("abc_'%def@gmail.com")))
 
     val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
@@ -1337,7 +1337,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d"))
     checkFiltersRemoved(df15)
     checkPushedInfo(df15,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']")
     checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
   }
 

From 07e08c00b32f66442bf1b8d3d6eacba7e88e9db6 Mon Sep 17 00:00:00 2001
From: Huanli Wang <huanli.wang@databricks.com>
Date: Wed, 15 May 2024 14:52:04 +0900
Subject: [PATCH 342/521] [SPARK-48105][SS][3.5] Fix the race condition between
 state store unloading and snapshotting

* When we close the hdfs state store, we should only remove the entry from `loadedMaps` rather than doing the active data cleanup. JVM GC should be able to help us GC those objects.
* we should wait for the maintenance thread to stop before unloading the providers.

There are two race conditions between state store snapshotting and state store unloading which could result in query failure and potential data corruption.

Case 1:
1. the maintenance thread pool encounters some issues and call the [stopMaintenanceTask,](https://github.com/apache/spark/blob/d9d79a54a3cd487380039c88ebe9fa708e0dcf23/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L774) this function further calls [threadPool.stop.](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L587) However, this function doesn't wait for the stop operation to be completed and move to do the state store [unload and clear.](https://github.com/apache/spark/blob/d9d79a54a3cd487380039c88ebe9fa708e0dcf23/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L775-L778)
2. the provider unload will [close the state store](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L719-L721) which [clear the values of loadedMaps](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala#L353-L355) for HDFS backed state store.
3. if the not-yet-stop maintenance thread is still running and trying to do the snapshot, but the data in the underlying `HDFSBackedStateStoreMap` has been removed. if this snapshot process completes successfully, then we will write corrupted data and the following batches will consume this corrupted data.

Case 2:

1. In executor_1, the maintenance thread is going to do the snapshot for state_store_1, it retrieves the `HDFSBackedStateStoreMap` object from the loadedMaps, after this, the maintenance thread [releases the lock of the loadedMaps](https://github.com/apache/spark/blob/c6696cdcd611a682ebf5b7a183e2970ecea3b58c/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala#L750-L751).
2. state_store_1 is loaded in another executor, e.g. executor_2.
3. another state store, state_store_2, is loaded on executor_1 and [reportActiveStoreInstance](https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L854-L871) to driver.
4. executor_1 does the [unload](https://github.com/apache/spark/blob/c6696cdcd611a682ebf5b7a183e2970ecea3b58c/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala#L713) for those no longer active state store which clears the data entries in the `HDFSBackedStateStoreMap`
5. the snapshotting thread is terminated and uploads the incomplete snapshot to cloud because the [iterator doesn't have next element](https://github.com/apache/spark/blob/c6696cdcd611a682ebf5b7a183e2970ecea3b58c/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala#L634) after doing the clear.
6. future batches are consuming the corrupted data.

No

```
[info] Run completed in 2 minutes, 55 seconds.
[info] Total number of tests run: 153
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 153, failed 0, canceled 0, ignored 0, pending 0
[info] All tests passed.
[success] Total time: 271 s (04:31), completed May 2, 2024, 6:26:33 PM
```
before this change

```
[info] - state store unload/close happens during the maintenance *** FAILED *** (648 milliseconds)
[info]   Vector("a1", "a10", "a11", "a12", "a13", "a14", "a15", "a16", "a17", "a18", "a19", "a2", "a20", "a3", "a4", "a5", "a6", "a7", "a8", "a9") did not equal ArrayBuffer("a8") (StateStoreSuite.scala:414)
[info]   Analysis:
[info]   Vector1(0: "a1" -> "a8", 1: "a10" -> , 2: "a11" -> , 3: "a12" -> , 4: "a13" -> , 5: "a14" -> , 6: "a15" -> , 7: "a16" -> , 8: "a17" -> , 9: "a18" -> , 10: "a19" -> , 11: "a2" -> , 12: "a20" -> , 13: "a3" -> , 14: "a4" -> , 15: "a5" -> , 16: "a6" -> , 17: "a7" -> , 18: "a8" -> , 19: "a9" -> )
[info]   org.scalatest.exceptions.TestFailedException:
[info]   at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472)
[info]   at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471)
[info]   at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231)
[info]   at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.$anonfun$new$39(StateStoreSuite.scala:414)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuiteBase.tryWithProviderResource(StateStoreSuite.scala:1663)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.$anonfun$new$38(StateStoreSuite.scala:394)
18:32:09.694 WARN org.apache.spark.sql.execution.streaming.state.StateStoreSuite:

===== POSSIBLE THREAD LEAK IN SUITE o.a.s.sql.execution.streaming.state.StateStoreSuite, threads: ForkJoinPool.commonPool-worker-1 (daemon=true) =====
[info]   at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
[info]   at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
[info]   at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
[info]   at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
[info]   at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
[info]   at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
[info]   at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
[info]   at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
[info]   at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.org$scalatest$BeforeAndAfter$$super$runTest(StateStoreSuite.scala:90)
[info]   at org.scalatest.BeforeAndAfter.runTest(BeforeAndAfter.scala:213)
[info]   at org.scalatest.BeforeAndAfter.runTest$(BeforeAndAfter.scala:203)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.runTest(StateStoreSuite.scala:90)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
[info]   at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
[info]   at scala.collection.immutable.List.foreach(List.scala:334)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
[info]   at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
[info]   at org.scalatest.Suite.run(Suite.scala:1114)
[info]   at org.scalatest.Suite.run$(Suite.scala:1096)
[info]   at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
[info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
[info]   at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
[info]   at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
[info]   at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
[info]   at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
[info]   at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.org$scalatest$BeforeAndAfter$$super$run(StateStoreSuite.scala:90)
[info]   at org.scalatest.BeforeAndAfter.run(BeforeAndAfter.scala:273)
[info]   at org.scalatest.BeforeAndAfter.run$(BeforeAndAfter.scala:271)
[info]   at org.apache.spark.sql.execution.streaming.state.StateStoreSuite.run(StateStoreSuite.scala:90)
[info]   at org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:321)
[info]   at org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:517)
[info]   at sbt.ForkMain$Run.lambda$runTest$1(ForkMain.java:414)
[info]   at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
[info]   at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
[info]   at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
[info]   at java.base/java.lang.Thread.run(Thread.java:840)
[info] Run completed in 2 seconds, 4 milliseconds.
[info] Total number of tests run: 1
[info] Suites: completed 1, aborted 0
[info] Tests: succeeded 0, failed 1, canceled 0, ignored 0, pending 0
[info] *** 1 TEST FAILED ***

```

No

Closes #46351 from huanliwang-db/race.

Authored-by: Huanli Wang <huanli.wangdatabricks.com>

Closes #46415 from huanliwang-db/race-3.5.

Authored-by: Huanli Wang <huanli.wang@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../state/HDFSBackedStateStoreMap.scala       |  8 ----
 .../state/HDFSBackedStateStoreProvider.scala  |  5 ++-
 .../streaming/state/StateStoreSuite.scala     | 38 +++++++++++++++++++
 3 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
index 9a0b6a733d051..914d116e27fc0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreMap.scala
@@ -31,7 +31,6 @@ trait HDFSBackedStateStoreMap {
   def remove(key: UnsafeRow): UnsafeRow
   def iterator(): Iterator[UnsafeRowPair]
   def prefixScan(prefixKey: UnsafeRow): Iterator[UnsafeRowPair]
-  def clear(): Unit
 }
 
 object HDFSBackedStateStoreMap {
@@ -79,8 +78,6 @@ class NoPrefixHDFSBackedStateStoreMap extends HDFSBackedStateStoreMap {
   override def prefixScan(prefixKey: UnsafeRow): Iterator[UnsafeRowPair] = {
     throw new UnsupportedOperationException("Prefix scan is not supported!")
   }
-
-  override def clear(): Unit = map.clear()
 }
 
 class PrefixScannableHDFSBackedStateStoreMap(
@@ -169,9 +166,4 @@ class PrefixScannableHDFSBackedStateStoreMap(
       .iterator
       .map { key => unsafeRowPair.withRows(key, map.get(key)) }
   }
-
-  override def clear(): Unit = {
-    map.clear()
-    prefixKeyToKeysMap.clear()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index fbf4b357a35f9..85de3e7ff9425 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -262,7 +262,10 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   }
 
   override def close(): Unit = {
-    loadedMaps.values.asScala.foreach(_.clear())
+    // Clearing the map resets the TreeMap.root to null, and therefore entries inside the
+    // `loadedMaps` will be de-referenced and GCed automatically when their reference
+    // counts become 0.
+    synchronized { loadedMaps.clear() }
   }
 
   override def supportedCustomMetrics: Seq[StateStoreCustomMetric] = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 02aa12b325ff7..512a095250ae3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -272,6 +272,44 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
   }
 
+  test("SPARK-48105: state store unload/close happens during the maintenance") {
+    tryWithProviderResource(
+      newStoreProvider(opId = Random.nextInt(), partition = 0, minDeltasForSnapshot = 1)) {
+      provider =>
+        val store = provider.getStore(0).asInstanceOf[provider.HDFSBackedStateStore]
+        val values = (1 to 20)
+        val keys = values.map(i => ("a" + i))
+        keys.zip(values).map{case (k, v) => put(store, k, 0, v)}
+        // commit state store with 20 keys.
+        store.commit()
+        // get the state store iterator: mimic the case which the iterator is hold in the
+        // maintenance thread.
+        val storeIterator = store.iterator()
+
+        // the store iterator should still be valid as the maintenance thread may have already
+        // hold it and is doing snapshotting even though the state store is unloaded.
+        val outputKeys = new mutable.ArrayBuffer[String]
+        val outputValues = new mutable.ArrayBuffer[Int]
+        var cnt = 0
+        while (storeIterator.hasNext) {
+          if (cnt == 10) {
+            // Mimic the case where the provider is loaded in another executor in the middle of
+            // iteration. When this happens, the provider will be unloaded and closed in
+            // current executor.
+            provider.close()
+          }
+          val unsafeRowPair = storeIterator.next()
+          val (key, _) = keyRowToData(unsafeRowPair.key)
+          outputKeys.append(key)
+          outputValues.append(valueRowToData(unsafeRowPair.value))
+
+          cnt = cnt + 1
+        }
+        assert(keys.sorted === outputKeys.sorted)
+        assert(values.sorted === outputValues.sorted)
+    }
+  }
+
   test("maintenance") {
     val conf = new SparkConf()
       .setMaster("local")

From 210ed2521d3dc1202cd1ba855ed5e729a5d940d0 Mon Sep 17 00:00:00 2001
From: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Date: Wed, 15 May 2024 22:15:52 +0800
Subject: [PATCH 343/521] [SPARK-48172][SQL] Fix escaping issues in
 JDBCDialects

This PR is a fix of https://github.com/apache/spark/pull/46437. The previous PR was reverted as `LONGTEXT` is not supported by all dialects.

Special case escaping for MySQL and fix issues with redundant escaping for ' character.
New changes introduced in the fix include change `LONGTEXT` -> `VARCHAR(50)`, as well as fix for table naming in the tests.

When pushing down startsWith, endsWith and contains they are converted to LIKE. This requires addition of escape characters for these expressions. Unfortunately, MySQL uses ESCAPE '\' syntax instead of ESCAPE '' which would cause errors when trying to push down.

Yes

Tests for each existing dialect.

No.

Closes #46588 from mihailom-db/SPARK-48172.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 9e386b472981979e368a5921c58da5bfefe3acfe)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/jdbc/v2/DB2IntegrationSuite.scala     |   6 +
 .../v2/DockerJDBCIntegrationV2Suite.scala     |  11 +
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala |   6 +
 .../sql/jdbc/v2/MySQLIntegrationSuite.scala   |   6 +
 .../sql/jdbc/v2/OracleIntegrationSuite.scala  |   6 +
 .../jdbc/v2/PostgresIntegrationSuite.scala    |   6 +
 .../apache/spark/sql/jdbc/v2/V2JDBCTest.scala | 229 ++++++++++++++++++
 .../util/V2ExpressionSQLBuilder.java          |   3 -
 .../connector/expressions/expressions.scala   |   4 +-
 .../org/apache/spark/sql/jdbc/H2Dialect.scala |   7 -
 .../apache/spark/sql/jdbc/MySQLDialect.scala  |  15 ++
 .../apache/spark/sql/jdbc/JDBCV2Suite.scala   |   6 +-
 12 files changed, 291 insertions(+), 14 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 9a78244f53266..5bcc8afefb1dd 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -80,6 +80,12 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
index 72edfc9f1bf1c..5f4f0b7a3afbc 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -38,6 +38,17 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
       .executeUpdate()
     connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
       .executeUpdate()
+
+    connection.prepareStatement(
+      s"""
+         |INSERT INTO pattern_testing_table VALUES
+         |('special_character_quote''_present'),
+         |('special_character_quote_not_present'),
+         |('special_character_percent%_present'),
+         |('special_character_percent_not_present'),
+         |('special_character_underscore_present'),
+         |('special_character_underscorenot_present')
+             """.stripMargin).executeUpdate()
   }
 
   def tablePreparation(connection: Connection): Unit
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 0dc3a39f4db5d..0bb2ea8249b39 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -86,6 +86,12 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary NUMERIC(20, 2), bonus FLOAT)")
       .executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def notSupportsTableComment: Boolean = true
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index f6f264804e7db..faf9f14b260d4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -88,6 +88,12 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
     connection.prepareStatement(
       "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
         " bonus DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col LONGTEXT
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 33ce55b1c6c9f..002091b6a0d80 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -106,6 +106,12 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
     connection.prepareStatement(
       "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
         " bonus BINARY_DOUBLE)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 1f09c2fd3fc59..7fef3ccd6b3f6 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -59,6 +59,12 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
     connection.prepareStatement(
       "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
         " bonus double precision)").executeUpdate()
+    connection.prepareStatement(
+      s"""CREATE TABLE pattern_testing_table (
+         |pattern_testing_col VARCHAR(50)
+         |)
+                   """.stripMargin
+    ).executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index b8671455ac6f8..a0f337912c859 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -358,6 +358,235 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     assert(scan.schema.names.sameElements(Seq(col)))
   }
 
+  test("SPARK-48172: Test CONTAINS") {
+    val df1 = spark.sql(
+      s"""
+         |SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE contains(pattern_testing_col, 'quote\\'')""".stripMargin)
+    df1.explain("formatted")
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE contains(pattern_testing_col, 'percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE contains(pattern_testing_col, 'underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE contains(pattern_testing_col, 'character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test ENDSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE endswith(pattern_testing_col, 'quote\\'_present')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE endswith(pattern_testing_col, 'percent%_present')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE endswith(pattern_testing_col, 'underscore_present')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE endswith(pattern_testing_col, 'present')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test STARTSWITH") {
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE startswith(pattern_testing_col, 'special_character_quote\\'')""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE startswith(pattern_testing_col, 'special_character_percent%')""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE startswith(pattern_testing_col, 'special_character_underscore_')""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE startswith(pattern_testing_col, 'special_character')
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+  }
+
+  test("SPARK-48172: Test LIKE") {
+    // this one should map to contains
+    val df1 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%quote\\'%'""".stripMargin)
+    val rows1 = df1.collect()
+    assert(rows1.length === 1)
+    assert(rows1(0).getString(0) === "special_character_quote'_present")
+
+    val df2 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%percent\\%%'""".stripMargin)
+    val rows2 = df2.collect()
+    assert(rows2.length === 1)
+    assert(rows2(0).getString(0) === "special_character_percent%_present")
+
+    val df3 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%underscore\\_%'""".stripMargin)
+    val rows3 = df3.collect()
+    assert(rows3.length === 1)
+    assert(rows3(0).getString(0) === "special_character_underscore_present")
+
+    val df4 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows4 = df4.collect()
+    assert(rows4.length === 6)
+    assert(rows4(0).getString(0) === "special_character_percent%_present")
+    assert(rows4(1).getString(0) === "special_character_percent_not_present")
+    assert(rows4(2).getString(0) === "special_character_quote'_present")
+    assert(rows4(3).getString(0) === "special_character_quote_not_present")
+    assert(rows4(4).getString(0) === "special_character_underscore_present")
+    assert(rows4(5).getString(0) === "special_character_underscorenot_present")
+
+    // map to startsWith
+    // this one should map to contains
+    val df5 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE 'special_character_quote\\'%'""".stripMargin)
+    val rows5 = df5.collect()
+    assert(rows5.length === 1)
+    assert(rows5(0).getString(0) === "special_character_quote'_present")
+
+    val df6 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE 'special_character_percent\\%%'""".stripMargin)
+    val rows6 = df6.collect()
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(0) === "special_character_percent%_present")
+
+    val df7 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE 'special_character_underscore\\_%'""".stripMargin)
+    val rows7 = df7.collect()
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) === "special_character_underscore_present")
+
+    val df8 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE 'special_character%'
+           |ORDER BY pattern_testing_col""".stripMargin)
+    val rows8 = df8.collect()
+    assert(rows8.length === 6)
+    assert(rows8(0).getString(0) === "special_character_percent%_present")
+    assert(rows8(1).getString(0) === "special_character_percent_not_present")
+    assert(rows8(2).getString(0) === "special_character_quote'_present")
+    assert(rows8(3).getString(0) === "special_character_quote_not_present")
+    assert(rows8(4).getString(0) === "special_character_underscore_present")
+    assert(rows8(5).getString(0) === "special_character_underscorenot_present")
+    // map to endsWith
+    // this one should map to contains
+    val df9 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%quote\\'_present'""".stripMargin)
+    val rows9 = df9.collect()
+    assert(rows9.length === 1)
+    assert(rows9(0).getString(0) === "special_character_quote'_present")
+
+    val df10 = spark.sql(
+      s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+         |WHERE pattern_testing_col LIKE '%percent\\%_present'""".stripMargin)
+    val rows10 = df10.collect()
+    assert(rows10.length === 1)
+    assert(rows10(0).getString(0) === "special_character_percent%_present")
+
+    val df11 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%underscore\\_present'""".stripMargin)
+    val rows11 = df11.collect()
+    assert(rows11.length === 1)
+    assert(rows11(0).getString(0) === "special_character_underscore_present")
+
+    val df12 = spark.
+      sql(
+        s"""SELECT * FROM $catalogAndNamespace.${caseConvert("pattern_testing_table")}
+           |WHERE pattern_testing_col LIKE '%present' ORDER BY pattern_testing_col""".stripMargin)
+    val rows12 = df12.collect()
+    assert(rows12.length === 6)
+    assert(rows12(0).getString(0) === "special_character_percent%_present")
+    assert(rows12(1).getString(0) === "special_character_percent_not_present")
+    assert(rows12(2).getString(0) === "special_character_quote'_present")
+    assert(rows12(3).getString(0) === "special_character_quote_not_present")
+    assert(rows12(4).getString(0) === "special_character_underscore_present")
+    assert(rows12(5).getString(0) === "special_character_underscorenot_present")
+  }
+
   test("SPARK-37038: Test TABLESAMPLE") {
     if (supportsTableSample) {
       withTable(s"$catalogName.new_table") {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index dcb3c706946c5..e170951bfa284 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -66,9 +66,6 @@ protected String escapeSpecialCharsForLikePattern(String str) {
         case '%':
           builder.append("\\%");
           break;
-        case '\'':
-          builder.append("\\\'");
-          break;
         default:
           builder.append(c);
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
index fbd2520e2a774..7f536bdb712a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/expressions/expressions.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connector.expressions
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -350,7 +352,7 @@ private[sql] object HoursTransform {
 private[sql] final case class LiteralValue[T](value: T, dataType: DataType) extends Literal[T] {
   override def toString: String = {
     if (dataType.isInstanceOf[StringType]) {
-      s"'$value'"
+      s"'${StringUtils.replace(s"$value", "'", "''")}'"
     } else {
       s"$value"
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 3f56eb035f5c3..336220922b5e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -254,13 +254,6 @@ private[sql] object H2Dialect extends JdbcDialect {
   }
 
   class H2SQLBuilder extends JDBCSQLBuilder {
-    override def escapeSpecialCharsForLikePattern(str: String): String = {
-      str.map {
-        case '_' => "\\_"
-        case '%' => "\\%"
-        case c => c.toString
-      }.mkString
-    }
 
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index ef2be1c9c5c6c..5d9ff94838f15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -65,6 +65,21 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
       }
     }
 
+    override def visitStartsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
+    override def visitEndsWith(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}' ESCAPE '\\\\'"
+    }
+
+    override def visitContains(l: String, r: String): String = {
+      val value = r.substring(1, r.length() - 1)
+      s"$l LIKE '%${escapeSpecialCharsForLikePattern(value)}%' ESCAPE '\\\\'"
+    }
+
     override def visitAggregateFunction(
         funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
       if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 51a15881088b5..5d2108d2b8fce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -1278,7 +1278,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df5 = spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
     checkFiltersRemoved(df5)
     checkPushedInfo(df5,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_\'\%%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_''\%%' ESCAPE '\']")
     checkAnswer(df5, Seq(Row("abc_'%def@gmail.com")))
 
     val df6 = spark.table("h2.test.address").filter($"email".endsWith("_def@gmail.com"))
@@ -1309,7 +1309,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df10 = spark.table("h2.test.address").filter($"email".endsWith("_'%def@gmail.com"))
     checkFiltersRemoved(df10)
     checkPushedInfo(df10,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\'\%def@gmail.com' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_''\%def@gmail.com' ESCAPE '\']")
     checkAnswer(df10, Seq(Row("abc_'%def@gmail.com")))
 
     val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
@@ -1337,7 +1337,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val df15 = spark.table("h2.test.address").filter($"email".contains("c_'%d"))
     checkFiltersRemoved(df15)
     checkPushedInfo(df15,
-      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\'\%d%' ESCAPE '\']")
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_''\%d%' ESCAPE '\']")
     checkAnswer(df15, Seq(Row("abc_'%def@gmail.com")))
   }
 

From c1dd4a5df69340884f3f0f0c28ce916bf9e30159 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 16 May 2024 17:29:47 +0800
Subject: [PATCH 344/521] [SPARK-48297][SQL] Fix a regression TRANSFORM clause
 with char/varchar

### What changes were proposed in this pull request?

TRANSFORM with char/varchar has been accidentally invalidated since 3.1 with a scala.MatchError, this PR fixes it

### Why are the changes needed?

bugfix
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46603 from yaooqinn/SPARK-48297.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 3bd845ea930a4709b7a2f0447b5f8af64c697239)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../apache/spark/sql/catalyst/parser/AstBuilder.scala |  4 +++-
 .../sql-tests/analyzer-results/transform.sql.out      | 11 +++++++++++
 .../src/test/resources/sql-tests/inputs/transform.sql |  6 +++++-
 .../resources/sql-tests/results/transform.sql.out     | 10 ++++++++++
 4 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 5d68aed9245a4..f38d41af445ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -787,7 +787,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     // Create the attributes.
     val (attributes, schemaLess) = if (transformClause.colTypeList != null) {
       // Typed return columns.
-      (DataTypeUtils.toAttributes(createSchema(transformClause.colTypeList)), false)
+      val schema = createSchema(transformClause.colTypeList)
+      val replacedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
+      (DataTypeUtils.toAttributes(replacedSchema), false)
     } else if (transformClause.identifierSeq != null) {
       // Untyped return columns.
       val attrs = visitIdentifierSeq(transformClause.identifierSeq).map { name =>
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
index ceca433a1c915..aa595c551f792 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
@@ -1035,3 +1035,14 @@ ScriptTransformation cat, [a#x, b#x], ScriptInputOutputSchema(List(),List(),None
                         +- Project [a#x, b#x]
                            +- SubqueryAlias complex_trans
                               +- LocalRelation [a#x, b#x]
+
+
+-- !query
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b)
+-- !query analysis
+ScriptTransformation cat, [a#x, b#x], ScriptInputOutputSchema(List(),List(),None,None,List(),List(),None,None,false)
++- Project [a#x, b#x]
+   +- SubqueryAlias t
+      +- LocalRelation [a#x, b#x]
diff --git a/sql/core/src/test/resources/sql-tests/inputs/transform.sql b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
index 922a1d8177780..8570496d439e6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/transform.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/transform.sql
@@ -415,4 +415,8 @@ FROM (
   ORDER BY a
 ) map_output
 SELECT TRANSFORM(a, b)
-  USING 'cat' AS (a, b);
\ No newline at end of file
+  USING 'cat' AS (a, b);
+
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b);
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index ab726b93c07c8..7975392fd0147 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -837,3 +837,13 @@ struct<a:string,b:string>
 3	3
 3	3
 3	3
+
+
+-- !query
+SELECT TRANSFORM (a, b)
+  USING 'cat' AS (a CHAR(10), b VARCHAR(10))
+FROM VALUES('apache', 'spark') t(a, b)
+-- !query schema
+struct<a:string,b:string>
+-- !query output
+apache	spark

From 1a454287c01eb2ddda3e11afcc8c4885abc095b2 Mon Sep 17 00:00:00 2001
From: Michael Zhang <m.zhang@databricks.com>
Date: Fri, 17 May 2024 15:17:32 -0700
Subject: [PATCH 345/521] [SPARK-48294][SQL][3.5] Handle lowercase in
 nestedTypeMissingElementTypeError

### What changes were proposed in this pull request?

Backport of #46623.
Handle lowercase values inside of nestTypeMissingElementTypeError to prevent match errors.

### Why are the changes needed?

The previous match error was not user-friendly. Now it gives an actionable `INCOMPLETE_TYPE_DEFINITION` error.

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

Newly added tests pass.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46643 from michaelzhan-db/SPARK-48294-3.5.

Authored-by: Michael Zhang <m.zhang@databricks.com>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../spark/sql/errors/QueryParsingErrors.scala |  2 +-
 .../sql/errors/QueryParsingErrorsSuite.scala  | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index f63fc8c4785bc..d9bfb26906420 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -283,7 +283,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
 
   def nestedTypeMissingElementTypeError(
       dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
-    dataType match {
+    dataType.toUpperCase(Locale.ROOT) match {
       case "ARRAY" =>
         new ParseException(
           errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 7ebb677b12158..84857b972918a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -604,6 +604,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
+    // Create column of array type without specifying element type in lowercase
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 array)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "array", start = 30, stop = 34))
   }
 
   test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") {
@@ -631,6 +638,12 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       errorClass = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'>'", "hint" -> ""))
+    // Create column of struct type without specifying field type in lowercase
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 struct)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "struct", start = 30, stop = 35))
   }
 
   test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
@@ -652,6 +665,12 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       errorClass = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'>'", "hint" -> ""))
+    // Create column of map type without specifying key/value types in lowercase
+    checkError(
+      exception = parseException("SELECT CAST(map('1',2) AS map)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "map", start = 26, stop = 28))
   }
 
   test("INVALID_ESC: Escape string must contain only one character") {

From 0a45becbf0a9007edcdd9a8053e9f6d6401d1834 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 27 May 2024 10:28:06 +0900
Subject: [PATCH 346/521] [SPARK-48424][INFRA] Make dev/is-changed.py to return
 true it it fails

### What changes were proposed in this pull request?

This PR proposes to make dev/is-changed.py to return true it it fails

### Why are the changes needed?

To make the test robust. GitHub Actions sometimes fail to set the hash for commit properly, e.g., https://github.com/apache/spark/actions/runs/9244026522/job/25435224163?pr=46747

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

Manually tested:

```bash
GITHUB_SHA=a29c9653f3d48d97875ae446d82896bdf0de61ca GITHUB_PREV_SHA=0000000000000000000000000000000000000000 ./dev/is-changed.py -m root
```

```bash
a=`GITHUB_SHA=a29c9653f3d48d97875ae446d82896bdf0de61ca GITHUB_PREV_SHA=0000000000000000000000000000000000000000 ./dev/is-changed.py -m root`
echo $a
```

```bash
GITHUB_SHA=a29c9653f3d48d97875ae446d82896bdf0de61ca GITHUB_PREV_SHA=3346afd4b250c3aead5a237666d4942018a463e0 ./dev/is-changed.py -m root
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46749 from HyukjinKwon/SPARK-48424.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 4ef5ec92ef70fffa231b422c7da17c4438e95d0d)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/is-changed.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/dev/is-changed.py b/dev/is-changed.py
index 85f0d3cda6df4..1962e244d5dd7 100755
--- a/dev/is-changed.py
+++ b/dev/is-changed.py
@@ -17,6 +17,8 @@
 # limitations under the License.
 #
 
+import warnings
+import traceback
 import os
 import sys
 from argparse import ArgumentParser
@@ -82,4 +84,8 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    try:
+        main()
+    except Exception:
+        warnings.warn(f"Ignored exception:\n\n{traceback.format_exc()}")
+        print("true")

From f42c029fac5c8015d80ad957fae325243a2ed30d Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Mon, 27 May 2024 22:40:13 -0700
Subject: [PATCH 347/521] [SPARK-41049][SQL][FOLLOW-UP] Mark map related
 expressions as stateful expressions

MapConcat contains a state so it is stateful:
```
private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
```

Similarly `MapFromEntries, CreateMap, MapFromArrays, StringToMap, and TransformKeys` need the same change.

Stateful expression should be marked as stateful.

No

N/A

No

Closes #46721 from amaliujia/statefulexpr.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit af1ac1edc2a96c9aba949e3100ddae37b6f0e5b2)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/collectionOperations.scala    |  3 +++
 .../sql/catalyst/expressions/complexTypeCreator.scala  |  6 ++++++
 .../catalyst/expressions/higherOrderFunctions.scala    |  2 ++
 .../scala/org/apache/spark/sql/DataFrameSuite.scala    | 10 +++++++++-
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 3ddbe38fdedfb..45896382af672 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -712,6 +712,7 @@ case class MapConcat(children: Seq[Expression])
     }
   }
 
+  override def stateful: Boolean = true
   override def nullable: Boolean = children.exists(_.nullable)
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
@@ -827,6 +828,8 @@ case class MapFromEntries(child: Expression)
 
   override def nullable: Boolean = child.nullable || nullEntries
 
+  override def stateful: Boolean = true
+
   @transient override lazy val dataType: MapType = dataTypeDetails.get._1
 
   override def checkInputDataTypes(): TypeCheckResult = dataTypeDetails match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index c95a0987330d9..1b6f86984be77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -242,6 +242,8 @@ case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
 
+  override def stateful: Boolean = true
+
   override def eval(input: InternalRow): Any = {
     var i = 0
     while (i < keys.length) {
@@ -317,6 +319,8 @@ case class MapFromArrays(left: Expression, right: Expression)
       valueContainsNull = right.dataType.asInstanceOf[ArrayType].containsNull)
   }
 
+  override def stateful: Boolean = true
+
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
 
   override def nullSafeEval(keyArray: Any, valueArray: Any): Any = {
@@ -563,6 +567,8 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
     this(child, Literal(","), Literal(":"))
   }
 
+  override def stateful: Boolean = true
+
   override def first: Expression = text
   override def second: Expression = pairDelim
   override def third: Expression = keyValueDelim
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index fec1df108bccf..5b10b401af98d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -918,6 +918,8 @@ case class TransformKeys(
 
   override def dataType: MapType = MapType(function.dataType, valueType, valueContainsNull)
 
+  override def stateful: Boolean = true
+
   override def checkInputDataTypes(): TypeCheckResult = {
     TypeUtils.checkForMapKeyType(function.dataType)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index c586da6105fde..260ecaa5ece10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, CreateMap, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LocalRelation, LogicalPlan, OneRowRelation, Statistics}
@@ -3636,6 +3636,14 @@ class DataFrameSuite extends QueryTest
       assert(row.getInt(0).toString == row.getString(2))
       assert(row.getInt(0).toString == row.getString(3))
     }
+
+    val v3 = Column(CreateMap(Seq(Literal("key"), Literal("value"))))
+    val v4 = to_csv(struct(v3.as("a"))) // to_csv is CodegenFallback
+    df.select(v3, v3, v4, v4).collect().foreach { row =>
+      assert(row.getMap(0).toString() == row.getMap(1).toString())
+      assert(row.getString(2) == s"{key -> ${row.getMap(0).get("key").get}}")
+      assert(row.getString(3) == s"{key -> ${row.getMap(0).get("key").get}}")
+    }
   }
 
   test("SPARK-41219: IntegralDivide use decimal(1, 0) to represent 0") {

From 7313d71438e4691f7c086e90ded4a6f644cdcdc5 Mon Sep 17 00:00:00 2001
From: Nikola Mandic <nikola.mandic@databricks.com>
Date: Tue, 28 May 2024 09:59:53 -0700
Subject: [PATCH 348/521] [SPARK-48273][SQL] Fix late rewrite of
 PlanWithUnresolvedIdentifier

### What changes were proposed in this pull request?

`PlanWithUnresolvedIdentifier` is rewritten later in analysis which causes rules like
`SubstituteUnresolvedOrdinals` to miss the new plan. This causes following queries to fail:
```
create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
--
cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
--
create table identifier('t2') as (select my_col from (values (1), (2), (1)
as (my_col)) group by 1);
insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1;
```
Fix this by explicitly applying rules after plan rewrite.

### Why are the changes needed?

To fix the described bug.

### Does this PR introduce _any_ user-facing change?

Yes, it fixes the mentioned problematic queries.

### How was this patch tested?

Updated existing `identifier-clause.sql` golden file.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46580 from nikolamand-db/SPARK-48273.

Authored-by: Nikola Mandic <nikola.mandic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 731a2cfcffaeeeb1f1c107080ca77000330d79b5)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |  9 ++-
 .../analysis/ResolveIdentifierClause.scala    | 11 +++-
 .../sql/catalyst/rules/RuleExecutor.scala     |  2 +-
 .../identifier-clause.sql.out                 | 59 +++++++++++++++++++
 .../sql-tests/inputs/identifier-clause.sql    |  9 +++
 .../results/identifier-clause.sql.out         | 56 ++++++++++++++++++
 6 files changed, 139 insertions(+), 7 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ed7b978045c77..5890a9692e203 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -255,7 +255,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     TypeCoercion.typeCoercionRules
   }
 
-  override def batches: Seq[Batch] = Seq(
+  private def earlyBatches: Seq[Batch] = Seq(
     Batch("Substitution", fixedPoint,
       // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule.
       // However, when manipulating deeply nested schema, `UpdateFields` expression tree could be
@@ -275,7 +275,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Keep Legacy Outputs", Once,
-      KeepLegacyOutputs),
+      KeepLegacyOutputs)
+  )
+
+  override def batches: Seq[Batch] = earlyBatches ++ Seq(
     Batch("Resolution", fixedPoint,
       new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
@@ -319,7 +322,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveTimeZone ::
       ResolveRandomSeed ::
       ResolveBinaryArithmetic ::
-      ResolveIdentifierClause ::
+      new ResolveIdentifierClause(earlyBatches) ::
       ResolveUnion ::
       ResolveRowLevelCommandAssignments ::
       RewriteDeleteFromTable ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index e0d3e5629ef8b..422bad3d89e24 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -20,19 +20,24 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_IDENTIFIER
 import org.apache.spark.sql.types.StringType
 
 /**
  * Resolves the identifier expressions and builds the original plans/expressions.
  */
-object ResolveIdentifierClause extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
+class ResolveIdentifierClause(earlyBatches: Seq[RuleExecutor[LogicalPlan]#Batch])
+  extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
+
+  private val executor = new RuleExecutor[LogicalPlan] {
+    override def batches: Seq[Batch] = earlyBatches.asInstanceOf[Seq[Batch]]
+  }
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
     _.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
     case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved =>
-      p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr))
+      executor.execute(p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr)))
     case other =>
       other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
         case e: ExpressionWithUnresolvedIdentifier if e.identifierExpr.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 9d29ca1f9c6e1..c16b50a2b17a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -143,7 +143,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     override val maxIterationsSetting: String = null) extends Strategy
 
   /** A batch of rules. */
-  protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
+  protected[catalyst] case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
 
   /** Defines a sequence of rule batches, to be overridden by the implementation. */
   protected def batches: Seq[Batch]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index f91d0a26cf8a4..823ce43247a74 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -881,6 +881,65 @@ org.apache.spark.sql.catalyst.parser.ParseException
 }
 
 
+-- !query
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CreateViewCommand `v1`, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, false, LocalTempView, UNSUPPORTED, true
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CacheTableAsSelect t1, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, true
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t2`, ErrorIfExists, [my_col]
+   +- Aggregate [my_col#x], [my_col#x]
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- SubqueryAlias as
+            +- LocalRelation [my_col#x]
+
+
+-- !query
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [my_col]
++- Aggregate [my_col#x], [my_col#x]
+   +- SubqueryAlias __auto_generated_subquery_name
+      +- SubqueryAlias as
+         +- LocalRelation [my_col#x]
+
+
+-- !query
+drop view v1
+-- !query analysis
+DropTempViewCommand v1
+
+
+-- !query
+drop table t1
+-- !query analysis
+DropTempViewCommand t1
+
+
+-- !query
+drop table t2
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t2
+
+
 -- !query
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1)
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index 07ae157072938..9e6314202b5f5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -122,6 +122,15 @@ CREATE TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg') AS 'test.org.a
 DROP TEMPORARY FUNCTION IDENTIFIER('default.my' || 'DoubleAvg');
 CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1);
 
+-- SPARK-48273: Aggregation operation in statements using identifier clause for table name
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1;
+drop view v1;
+drop table t1;
+drop table t2;
+
 -- Not supported
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1);
 SELECT T1.c1 FROM VALUES(1) AS T1(c1) JOIN VALUES(1) AS T2(c1) USING (IDENTIFIER('c1'));
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index ed87f69fc5e6b..4d62c371a1713 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -1010,6 +1010,62 @@ org.apache.spark.sql.catalyst.parser.ParseException
 }
 
 
+-- !query
+create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop view v1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t2
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT row_number() OVER IDENTIFIER('x.win') FROM VALUES(1) AS T(c1) WINDOW win AS (ORDER BY c1)
 -- !query schema

From 043944e1b54902f6d8204a5610e8eb780f1fe753 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 29 May 2024 13:35:01 -0700
Subject: [PATCH 349/521] [SPARK-48273][SQL][FOLLOWUP] Explicitly create
 non-Hive table in identifier-clause.sql

### What changes were proposed in this pull request?

A followup of https://github.com/apache/spark/pull/46580 . It's better to create non-Hive tables in the tests, so that it's backport safe, as old branches creates hive table by default.

### Why are the changes needed?

fix branch-3.5 CI

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46794 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit cf47293b5fc7c80d19e50fda44a01f91d5e34530)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql-tests/analyzer-results/identifier-clause.sql.out  | 8 ++++----
 .../test/resources/sql-tests/inputs/identifier-clause.sql | 6 +++---
 .../resources/sql-tests/results/identifier-clause.sql.out | 6 +++---
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 823ce43247a74..9b56a172e59d9 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -687,7 +687,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER(1)(c1 INT)
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
@@ -709,7 +709,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT)
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
@@ -902,7 +902,7 @@ CacheTableAsSelect t1, (select my_col from (values (1), (2), (1) as (my_col)) gr
 
 
 -- !query
-create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
 -- !query analysis
 CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t2`, ErrorIfExists, [my_col]
    +- Aggregate [my_col#x], [my_col#x]
@@ -914,7 +914,7 @@ CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t2`, ErrorIfExis
 -- !query
 insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1
 -- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [my_col]
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t2, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/t2], Append, `spark_catalog`.`default`.`t2`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t2), [my_col]
 +- Aggregate [my_col#x], [my_col#x]
    +- SubqueryAlias __auto_generated_subquery_name
       +- SubqueryAlias as
diff --git a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
index 9e6314202b5f5..e85fdf7b5da3d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/identifier-clause.sql
@@ -109,8 +109,8 @@ VALUES(IDENTIFIER(1));
 VALUES(IDENTIFIER(SUBSTR('HELLO', 1, RAND() + 1)));
 SELECT `IDENTIFIER`('abs')(c1) FROM VALUES(-1) AS T(c1);
 
-CREATE TABLE IDENTIFIER(1)(c1 INT);
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT);
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv;
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv;
 CREATE VIEW IDENTIFIER('a.b.c')(c1) AS VALUES(1);
 DROP TABLE IDENTIFIER('a.b.c');
 DROP VIEW IDENTIFIER('a.b.c');
@@ -125,7 +125,7 @@ CREATE TEMPORARY VIEW IDENTIFIER('default.v')(c1) AS VALUES(1);
 -- SPARK-48273: Aggregation operation in statements using identifier clause for table name
 create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
 cache table identifier('t1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
-create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1);
 insert into identifier('t2') select my_col from (values (3) as (my_col)) group by 1;
 drop view v1;
 drop table t1;
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 4d62c371a1713..62f43152c48d9 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -794,7 +794,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER(1)(c1 INT)
+CREATE TABLE IDENTIFIER(1)(c1 INT) USING csv
 -- !query schema
 struct<>
 -- !query output
@@ -818,7 +818,7 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-CREATE TABLE IDENTIFIER('a.b.c')(c1 INT)
+CREATE TABLE IDENTIFIER('a.b.c')(c1 INT) USING csv
 -- !query schema
 struct<>
 -- !query output
@@ -1027,7 +1027,7 @@ struct<>
 
 
 -- !query
-create table identifier('t2') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
+create table identifier('t2') using csv as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
 -- !query schema
 struct<>
 -- !query output

From c87b6483a3e0690be2b267e6dcf93a3edd63b030 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Wed, 29 May 2024 17:15:17 -0700
Subject: [PATCH 350/521] [SPARK-41049][SQL][FOLLOW-UP][3.5] stateful
 expressions test uses different pretty name

### What changes were proposed in this pull request?

We need use a different pretty string for the stateful expression test case in branch-3.5.

### Why are the changes needed?

Fix the failing test case.

### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

Existing UT

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #46795 from amaliujia/branch-3.5.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 260ecaa5ece10..7ee18df375616 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -3641,8 +3641,9 @@ class DataFrameSuite extends QueryTest
     val v4 = to_csv(struct(v3.as("a"))) // to_csv is CodegenFallback
     df.select(v3, v3, v4, v4).collect().foreach { row =>
       assert(row.getMap(0).toString() == row.getMap(1).toString())
-      assert(row.getString(2) == s"{key -> ${row.getMap(0).get("key").get}}")
-      assert(row.getString(3) == s"{key -> ${row.getMap(0).get("key").get}}")
+      val expectedString = s"keys: [key], values: [${row.getMap(0).get("key").get}]"
+      assert(row.getString(2) == s"""\"$expectedString\"""")
+      assert(row.getString(3) == s"""\"$expectedString\"""")
     }
   }
 

From dc8f652c2c2e4fd858f0ab6c82ee9adf10d8e548 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 30 May 2024 16:34:25 +0900
Subject: [PATCH 351/521] [SPARK-48273][SQL][FOLLOWUP] Update golden file

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/46580 to update golden files

### Why are the changes needed?

fix CI

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

N/A

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46800 from cloud-fan/test.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql-tests/analyzer-results/identifier-clause.sql.out        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index 9b56a172e59d9..ecab824f0995d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -884,7 +884,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
 -- !query
 create temporary view identifier('v1') as (select my_col from (values (1), (2), (1) as (my_col)) group by 1)
 -- !query analysis
-CreateViewCommand `v1`, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, false, LocalTempView, UNSUPPORTED, true
+CreateViewCommand `v1`, (select my_col from (values (1), (2), (1) as (my_col)) group by 1), false, false, LocalTempView, true
    +- Aggregate [my_col#x], [my_col#x]
       +- SubqueryAlias __auto_generated_subquery_name
          +- SubqueryAlias as

From d64f96cbacd9d98b89f31c27cf4aa79262399659 Mon Sep 17 00:00:00 2001
From: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Date: Fri, 31 May 2024 13:33:02 +0800
Subject: [PATCH 352/521] [SPARK-48172][SQL][FOLLOWUP] Fix escaping issues in
 JDBCDialects

### What changes were proposed in this pull request?
Removal of stripMargin from the code in `DockerJDBCIntegrationV2Suite`.

### Why are the changes needed?
https://github.com/apache/spark/pull/46588
Given PR was merged to master/3.5/3.4. This PR broke daily jobs for `OracleIntegrationSuite`. Upon inspection, it was noted that 3.4 and 3.5 are run with JDK8 while master is run with JDK21 and stripMargin was behaving differently in those cases. Upon removing stripMargin and spliting `INSERT INTO` statements into multiple lines, all integration tests have passed.

### Does this PR introduce _any_ user-facing change?
No, only loading of the test data was changed to follow language requirements.

### How was this patch tested?
Existing suite was aborted in the job and now it is running.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46806

Closes #46807 from mihailom-db/FixOracleMaster.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 4360ec733d248b62798a191301e2b671f7bcfbd5)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../v2/DockerJDBCIntegrationV2Suite.scala     | 28 ++++++++++++-------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
index 5f4f0b7a3afbc..60345257f2dc4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
@@ -39,16 +39,24 @@ abstract class DockerJDBCIntegrationV2Suite extends DockerJDBCIntegrationSuite {
     connection.prepareStatement("INSERT INTO employee VALUES (6, 'jen', 12000, 1200)")
       .executeUpdate()
 
-    connection.prepareStatement(
-      s"""
-         |INSERT INTO pattern_testing_table VALUES
-         |('special_character_quote''_present'),
-         |('special_character_quote_not_present'),
-         |('special_character_percent%_present'),
-         |('special_character_percent_not_present'),
-         |('special_character_underscore_present'),
-         |('special_character_underscorenot_present')
-             """.stripMargin).executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_quote''_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_quote_not_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_percent%_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_percent_not_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_underscore_present')")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO pattern_testing_table "
+        + "VALUES ('special_character_underscorenot_present')")
+      .executeUpdate()
   }
 
   def tablePreparation(connection: Connection): Unit

From 7d39000f809a117d2ef9e73e46697704e45ba262 Mon Sep 17 00:00:00 2001
From: jackylee-ch <lijunqing@baidu.com>
Date: Fri, 31 May 2024 22:37:49 +0800
Subject: [PATCH 353/521] [SPARK-48484][SQL] Fix: V2Write use the same
 TaskAttemptId for different task attempts

### What changes were proposed in this pull request?
After #40064 , we always get the same TaskAttemptId for different task attempts which has the same partitionId. This would lead different task attempts write to the same directory.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
GA

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #46811 from jackylee-ch/fix_v2write_use_same_directories_for_different_task_attempts.

Lead-authored-by: jackylee-ch <lijunqing@baidu.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 67d11b1992aaa100d0e1fa30b0e5c33684c93a89)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../datasources/v2/FileWriterFactory.scala    |  8 ++--
 .../v2/FileWriterFactorySuite.scala           | 48 +++++++++++++++++++
 2 files changed, 53 insertions(+), 3 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index 4b1a099d3bac9..f18424b4bcb86 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -38,7 +38,7 @@ case class FileWriterFactory (
   @transient private lazy val jobId = SparkHadoopWriterUtils.createJobID(jobTrackerID, 0)
 
   override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
-    val taskAttemptContext = createTaskAttemptContext(partitionId)
+    val taskAttemptContext = createTaskAttemptContext(partitionId, realTaskId.toInt & Int.MaxValue)
     committer.setupTask(taskAttemptContext)
     if (description.partitionColumns.isEmpty) {
       new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
@@ -47,9 +47,11 @@ case class FileWriterFactory (
     }
   }
 
-  private def createTaskAttemptContext(partitionId: Int): TaskAttemptContextImpl = {
+  private def createTaskAttemptContext(
+      partitionId: Int,
+      realTaskId: Int): TaskAttemptContextImpl = {
     val taskId = new TaskID(jobId, TaskType.MAP, partitionId)
-    val taskAttemptId = new TaskAttemptID(taskId, 0)
+    val taskAttemptId = new TaskAttemptID(taskId, realTaskId)
     // Set up the configuration object
     val hadoopConf = description.serializableHadoopConf.value
     hadoopConf.set("mapreduce.job.id", jobId.toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala
new file mode 100644
index 0000000000000..bd20307974416
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactorySuite.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
+import org.mockito.Mockito._
+import org.scalatest.PrivateMethodTester
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.sql.execution.datasources.WriteJobDescription
+import org.apache.spark.util.SerializableConfiguration
+
+class FileWriterFactorySuite extends SparkFunSuite with PrivateMethodTester {
+
+  test("SPARK-48484: V2Write uses different TaskAttemptIds for different task attempts") {
+    val jobDescription = mock(classOf[WriteJobDescription])
+    when(jobDescription.serializableHadoopConf).thenReturn(
+      new SerializableConfiguration(new Configuration(false)))
+    val committer = mock(classOf[FileCommitProtocol])
+
+    val writerFactory = FileWriterFactory(jobDescription, committer)
+    val createTaskAttemptContext =
+      PrivateMethod[TaskAttemptContextImpl](Symbol("createTaskAttemptContext"))
+
+    val attemptContext =
+      writerFactory.invokePrivate(createTaskAttemptContext(0, 1))
+    val attemptContext1 =
+      writerFactory.invokePrivate(createTaskAttemptContext(0, 2))
+    assert(attemptContext.getTaskAttemptID.getTaskID == attemptContext1.getTaskAttemptID.getTaskID)
+    assert(attemptContext.getTaskAttemptID.getId != attemptContext1.getTaskAttemptID.getId)
+  }
+}

From 744b070fa964dee9e5460a24f88f22c3af8170dc Mon Sep 17 00:00:00 2001
From: Dereck Li <monkeyboy.ljh@gmail.com>
Date: Fri, 31 May 2024 15:56:05 -0700
Subject: [PATCH 354/521] [SPARK-48391][CORE] Using addAll instead of add
 function in fromAccumulatorInfos method of TaskMetrics Class
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

Using addAll instead of add function in fromAccumulators method of TaskMetrics.

### Why are the changes needed?

To Improve performance. In the fromAccumulators method of TaskMetrics，we should use `
tm._externalAccums.addAll` instead of `tm._externalAccums.add`, as _externalAccums is a instance of CopyOnWriteArrayList

### Does this PR introduce _any_ user-facing change?

yes.

### How was this patch tested?

No Tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46705 from monkeyboy123/fromAccumulators-accelerate.

Authored-by: Dereck Li <monkeyboy.ljh@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 3cd35f8cb6462051c621cf49de54b9c5692aae1d)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../main/scala/org/apache/spark/executor/TaskMetrics.scala | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 78b39b0cbda68..d446104cb6421 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -328,16 +328,19 @@ private[spark] object TaskMetrics extends Logging {
    */
   def fromAccumulators(accums: Seq[AccumulatorV2[_, _]]): TaskMetrics = {
     val tm = new TaskMetrics
+    val externalAccums = new java.util.ArrayList[AccumulatorV2[Any, Any]]()
     for (acc <- accums) {
       val name = acc.name
+      val tmpAcc = acc.asInstanceOf[AccumulatorV2[Any, Any]]
       if (name.isDefined && tm.nameToAccums.contains(name.get)) {
         val tmAcc = tm.nameToAccums(name.get).asInstanceOf[AccumulatorV2[Any, Any]]
         tmAcc.metadata = acc.metadata
-        tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
+        tmAcc.merge(tmpAcc)
       } else {
-        tm._externalAccums.add(acc)
+        externalAccums.add(tmpAcc)
       }
     }
+    tm._externalAccums.addAll(externalAccums)
     tm
   }
 }

From 7e0c31445c31a76f6e1835f204e8a09eee2b57dc Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Sat, 1 Jun 2024 15:15:01 +0900
Subject: [PATCH 355/521] [SPARK-48481][SQL][SS] Do not apply
 OptimizeOneRowPlan against streaming Dataset

### What changes were proposed in this pull request?

This PR proposes to exclude streaming Dataset from the target of OptimizeOneRowPlan.

### Why are the changes needed?

The rule should not be applied to streaming source, since the number of rows it sees is just for current microbatch. It does not mean the streaming source will ever produce max 1 rows during lifetime of the query.

Suppose the case: the streaming query has a case where batch 0 runs with empty data in streaming source A which triggers the rule with Aggregate, and batch 1 runs with several data in streaming source A which no longer trigger the rule.

In the above scenario, this could fail the query as stateful operator is expected to be planned for every batches whereas here it is planned "selectively".

### Does this PR introduce _any_ user-facing change?

Yes, but the behavior can be reverted back with a new config, `spark.sql.streaming.optimizeOneRowPlan.enabled`, although I believe there should be really rare case where users have to turn the config on.

### How was this patch tested?

New UT.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46820 from HeartSaVioR/SPARK-48481.

Authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 1cecdc7596e078b4917f456bfbd2435ff9022f2f)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../optimizer/OptimizeOneRowPlan.scala        | 27 ++++++-
 .../apache/spark/sql/internal/SQLConf.scala   | 11 +++
 ...ingQueryOptimizationCorrectnessSuite.scala | 73 +++++++++++++++++++
 3 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
index 83646611578cb..61c08eb8f8b6f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The rule is applied both normal and AQE Optimizer. It optimizes plan using max rows:
@@ -31,19 +32,37 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
  *     it's grouping only(include the rewritten distinct plan), convert aggregate to project
  *   - if the max rows of the child of aggregate is less than or equal to 1,
  *     set distinct to false in all aggregate expression
+ *
+ * Note: the rule should not be applied to streaming source, since the number of rows it sees is
+ * just for current microbatch. It does not mean the streaming source will ever produce max 1
+ * rows during lifetime of the query. Suppose the case: the streaming query has a case where
+ * batch 0 runs with empty data in streaming source A which triggers the rule with Aggregate,
+ * and batch 1 runs with several data in streaming source A which no longer trigger the rule.
+ * In the above scenario, this could fail the query as stateful operator is expected to be planned
+ * for every batches whereas here it is planned "selectively".
  */
 object OptimizeOneRowPlan extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
+    val enableForStreaming = conf.getConf(SQLConf.STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED)
+
     plan.transformUpWithPruning(_.containsAnyPattern(SORT, AGGREGATE), ruleId) {
-      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) => child
-      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) => child
-      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) =>
+      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) => child
+      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) => child
+      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(child, enableForStreaming) =>
         Project(agg.aggregateExpressions, child)
-      case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) =>
+      case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) &&
+        isChildEligible(agg.child, enableForStreaming) =>
         agg.transformExpressions {
           case aggExpr: AggregateExpression if aggExpr.isDistinct =>
             aggExpr.copy(isDistinct = false)
         }
     }
   }
+
+  private def isChildEligible(child: LogicalPlan, enableForStreaming: Boolean): Boolean = {
+    enableForStreaming || !child.isStreaming
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 3e62f656ac9e4..74ff4f09a157f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2142,6 +2142,17 @@ object SQLConf {
       .createWithDefault(true)
 
 
+  val STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED =
+    buildConf("spark.sql.streaming.optimizeOneRowPlan.enabled")
+      .internal()
+      .doc("When true, enable OptimizeOneRowPlan rule for the case where the child is a " +
+        "streaming Dataset. This is a fallback flag to revert the 'incorrect' behavior, hence " +
+        "this configuration must not be used without understanding in depth. Use this only to " +
+        "quickly recover failure in existing query!")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val VARIABLE_SUBSTITUTE_ENABLED =
     buildConf("spark.sql.variable.substitute")
       .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
index d17da5d31edd4..782badaef924f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -22,6 +22,7 @@ import java.sql.Timestamp
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions.{expr, lit, window}
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * This test ensures that any optimizations done by Spark SQL optimizer are
@@ -451,4 +452,76 @@ class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
       )
     }
   }
+
+  test("SPARK-48481: DISTINCT with empty stream source should retain AGGREGATE") {
+    def doTest(numExpectedStatefulOperatorsForOneEmptySource: Int): Unit = {
+      withTempView("tv1", "tv2") {
+        val inputStream1 = MemoryStream[Int]
+        val ds1 = inputStream1.toDS()
+        ds1.registerTempTable("tv1")
+
+        val inputStream2 = MemoryStream[Int]
+        val ds2 = inputStream2.toDS()
+        ds2.registerTempTable("tv2")
+
+        // DISTINCT is rewritten to AGGREGATE, hence an AGGREGATEs for each source
+        val unioned = spark.sql(
+          """
+            | WITH u AS (
+            |   SELECT DISTINCT value AS value FROM tv1
+            | ), v AS (
+            |   SELECT DISTINCT value AS value FROM tv2
+            | )
+            | SELECT value FROM u UNION ALL SELECT value FROM v
+            |""".stripMargin
+        )
+
+        testStream(unioned, OutputMode.Update())(
+          MultiAddData(inputStream1, 1, 1, 2)(inputStream2, 1, 1, 2),
+          CheckNewAnswer(1, 2, 1, 2),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            // Aggregate should be "stateful" one
+            assert(stateOperators.length === 2)
+            stateOperators.zipWithIndex.foreach { case (op, id) =>
+              assert(op.numRowsUpdated === 2, s"stateful OP ID: $id")
+            }
+          },
+          AddData(inputStream2, 2, 2, 3),
+          // NOTE: this is probably far from expectation to have 2 as output given user intends
+          // deduplicate, but the behavior is still correct with rewritten node and output mode:
+          // Aggregate & Update mode.
+          // TODO: Probably we should disallow DISTINCT or rewrite to
+          //  dropDuplicates(WithinWatermark) for streaming source?
+          CheckNewAnswer(2, 3),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            // Aggregate should be "stateful" one
+            assert(stateOperators.length === numExpectedStatefulOperatorsForOneEmptySource)
+            val opWithUpdatedRows = stateOperators.zipWithIndex.filterNot(_._1.numRowsUpdated == 0)
+            assert(opWithUpdatedRows.length === 1)
+            // If this were dropDuplicates, numRowsUpdated should have been 1.
+            assert(opWithUpdatedRows.head._1.numRowsUpdated === 2,
+              s"stateful OP ID: ${opWithUpdatedRows.head._2}")
+          },
+          AddData(inputStream1, 4, 4, 5),
+          CheckNewAnswer(4, 5),
+          Execute { qe =>
+            val stateOperators = qe.lastProgress.stateOperators
+            assert(stateOperators.length === numExpectedStatefulOperatorsForOneEmptySource)
+            val opWithUpdatedRows = stateOperators.zipWithIndex.filterNot(_._1.numRowsUpdated == 0)
+            assert(opWithUpdatedRows.length === 1)
+            assert(opWithUpdatedRows.head._1.numRowsUpdated === 2,
+              s"stateful OP ID: ${opWithUpdatedRows.head._2}")
+          }
+        )
+      }
+    }
+
+    doTest(numExpectedStatefulOperatorsForOneEmptySource = 2)
+
+    withSQLConf(SQLConf.STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED.key -> "true") {
+      doTest(numExpectedStatefulOperatorsForOneEmptySource = 1)
+    }
+  }
 }

From 7f99f2cbd7d2d637f15b8444aebae3f9630ed3ab Mon Sep 17 00:00:00 2001
From: Yi Wu <yi.wu@databricks.com>
Date: Mon, 3 Jun 2024 11:21:50 +0800
Subject: [PATCH 356/521] [SPARK-48394][3.5][CORE] Cleanup mapIdToMapIndex on
 mapoutput unregister

This PR backports https://github.com/apache/spark/pull/46706 to branch 3.5.

### What changes were proposed in this pull request?

This PR cleans up `mapIdToMapIndex` when the corresponding mapstatus is unregistered in three places:
* `removeMapOutput`
* `removeOutputsByFilter`
* `addMapOutput` (old mapstatus overwritten)

### Why are the changes needed?

There is only one valid mapstatus for the same `mapIndex` at the same time in Spark. `mapIdToMapIndex` should also follows the same rule to avoid chaos.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46768 from Ngone51/SPARK-48394-3.5.

Authored-by: Yi Wu <yi.wu@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/MapOutputTracker.scala   | 26 ++++++---
 .../apache/spark/MapOutputTrackerSuite.scala  | 55 +++++++++++++++++++
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 3495536a3508f..9a7a3b0c0e75e 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -42,7 +42,6 @@ import org.apache.spark.scheduler.{MapStatus, MergeStatus, ShuffleOutputStatus}
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId, ShuffleMergedBlockId}
 import org.apache.spark.util._
-import org.apache.spark.util.collection.OpenHashMap
 import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
 
 /**
@@ -151,17 +150,22 @@ private class ShuffleStatus(
   /**
    * Mapping from a mapId to the mapIndex, this is required to reduce the searching overhead within
    * the function updateMapOutput(mapId, bmAddress).
+   *
+   * Exposed for testing.
    */
-  private[this] val mapIdToMapIndex = new OpenHashMap[Long, Int]()
+  private[spark] val mapIdToMapIndex = new HashMap[Long, Int]()
 
   /**
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
    */
   def addMapOutput(mapIndex: Int, status: MapStatus): Unit = withWriteLock {
-    if (mapStatuses(mapIndex) == null) {
+    val currentMapStatus = mapStatuses(mapIndex)
+    if (currentMapStatus == null) {
       _numAvailableMapOutputs += 1
       invalidateSerializedMapOutputStatusCache()
+    } else {
+      mapIdToMapIndex.remove(currentMapStatus.mapId)
     }
     mapStatuses(mapIndex) = status
     mapIdToMapIndex(status.mapId) = mapIndex
@@ -190,8 +194,8 @@ private class ShuffleStatus(
           mapStatus.updateLocation(bmAddress)
           invalidateSerializedMapOutputStatusCache()
         case None =>
-          if (mapIndex.map(mapStatusesDeleted).exists(_.mapId == mapId)) {
-            val index = mapIndex.get
+          val index = mapStatusesDeleted.indexWhere(x => x != null && x.mapId == mapId)
+          if (index >= 0 && mapStatuses(index) == null) {
             val mapStatus = mapStatusesDeleted(index)
             mapStatus.updateLocation(bmAddress)
             mapStatuses(index) = mapStatus
@@ -216,9 +220,11 @@ private class ShuffleStatus(
    */
   def removeMapOutput(mapIndex: Int, bmAddress: BlockManagerId): Unit = withWriteLock {
     logDebug(s"Removing existing map output ${mapIndex} ${bmAddress}")
-    if (mapStatuses(mapIndex) != null && mapStatuses(mapIndex).location == bmAddress) {
+    val currentMapStatus = mapStatuses(mapIndex)
+    if (currentMapStatus != null && currentMapStatus.location == bmAddress) {
       _numAvailableMapOutputs -= 1
-      mapStatusesDeleted(mapIndex) = mapStatuses(mapIndex)
+      mapIdToMapIndex.remove(currentMapStatus.mapId)
+      mapStatusesDeleted(mapIndex) = currentMapStatus
       mapStatuses(mapIndex) = null
       invalidateSerializedMapOutputStatusCache()
     }
@@ -284,9 +290,11 @@ private class ShuffleStatus(
    */
   def removeOutputsByFilter(f: BlockManagerId => Boolean): Unit = withWriteLock {
     for (mapIndex <- mapStatuses.indices) {
-      if (mapStatuses(mapIndex) != null && f(mapStatuses(mapIndex).location)) {
+      val currentMapStatus = mapStatuses(mapIndex)
+      if (currentMapStatus != null && f(currentMapStatus.location)) {
         _numAvailableMapOutputs -= 1
-        mapStatusesDeleted(mapIndex) = mapStatuses(mapIndex)
+        mapIdToMapIndex.remove(currentMapStatus.mapId)
+        mapStatusesDeleted(mapIndex) = currentMapStatus
         mapStatuses(mapIndex) = null
         invalidateSerializedMapOutputStatusCache()
       }
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index 450ff01921a83..d6f925ddced92 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -1109,4 +1109,59 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
       rpcEnv.shutdown()
     }
   }
+
+  test(
+    "SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after removeOutputsByFilter"
+  ) {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      tracker.removeOutputsOnHost("hostA")
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 0)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
+
+  test("SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after unregisterMapOutput") {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      tracker.unregisterMapOutput(0, 0, BlockManagerId("exec-1", "hostA", 1000))
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 0)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
+
+  test("SPARK-48394: mapIdToMapIndex should cleanup unused mapIndexes after registerMapOutput") {
+    val rpcEnv = createRpcEnv("test")
+    val tracker = newTrackerMaster()
+    try {
+      tracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME,
+        new MapOutputTrackerMasterEndpoint(rpcEnv, tracker, conf))
+      tracker.registerShuffle(0, 1, 1)
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-1", "hostA", 1000),
+        Array(2L), 0))
+      // Another task also finished working on partition 0.
+      tracker.registerMapOutput(0, 0, MapStatus(BlockManagerId("exec-2", "hostB", 1000),
+        Array(2L), 1))
+      assert(tracker.shuffleStatuses(0).mapIdToMapIndex.filter(_._2 == 0).size == 1)
+    } finally {
+      tracker.stop()
+      rpcEnv.shutdown()
+    }
+  }
 }

From d3a324d63f82ffc4a4818bb1bfe7485d12f1dada Mon Sep 17 00:00:00 2001
From: Anish Shrigondekar <anish.shrigondekar@databricks.com>
Date: Wed, 5 Jun 2024 16:34:45 +0800
Subject: [PATCH 357/521] [SPARK-48535][SS] Update config docs to indicate
 possibility of data loss/corruption issue if skip nulls for stream-stream
 joins config is enabled

### What changes were proposed in this pull request?
Update config docs to indicate possibility of data loss/corruption issue if skip nulls for stream-stream joins config is enabled

### Why are the changes needed?
Clarifying the implications of turning off this config after a certain Spark version

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A - config doc only change

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46875 from anishshri-db/task/SPARK-48535.

Authored-by: Anish Shrigondekar <anish.shrigondekar@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit c4f720dfb41919dade7002b49462b3ff6b91eb22)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala    | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 74ff4f09a157f..ba27a03fdc311 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2120,7 +2120,9 @@ object SQLConf {
   buildConf("spark.sql.streaming.stateStore.skipNullsForStreamStreamJoins.enabled")
     .internal()
     .doc("When true, this config will skip null values in hash based stream-stream joins. " +
-      "The number of skipped null values will be shown as custom metric of stream join operator.")
+      "The number of skipped null values will be shown as custom metric of stream join operator. " +
+      "If the streaming query was started with Spark 3.5 or above, please exercise caution " +
+      "before enabling this config since it may hide potential data loss/corruption issues.")
     .version("3.3.0")
     .booleanConf
     .createWithDefault(false)

From a00c11546273089dbfa993fa4c170eb70beecbc3 Mon Sep 17 00:00:00 2001
From: Uros Stankovic <uros.stankovic@databricks.com>
Date: Thu, 6 Jun 2024 13:08:48 -0700
Subject: [PATCH 358/521] [SPARK-48286] Fix analysis of column with exists
 default expression - Add user facing error

FIRST CHANGE

Pass correct parameter list to `org.apache.spark.sql.catalyst.util.ResolveDefaultColumns#analyze` when it is invoked from `org.apache.spark.sql.connector.catalog.CatalogV2Util#structFieldToV2Column`.

`org.apache.spark.sql.catalyst.util.ResolveDefaultColumns#analyze` method accepts 3 parameter

1) Field to analyze
2) Statement type - String
3) Metadata key - CURRENT_DEFAULT or EXISTS_DEFAULT

Method `org.apache.spark.sql.connector.catalog.CatalogV2Util#structFieldToV2Column`
pass `fieldToAnalyze` and `EXISTS_DEFAULT` as second parameter, so it is not metadata key, instead of that, it is statement type, so different expression is analyzed.

Pull requests where original change was introduced
https://github.com/apache/spark/pull/40049 - Initial commit
https://github.com/apache/spark/pull/44876 - Refactor that did not touch the issue
https://github.com/apache/spark/pull/44935 - Another refactor that did not touch the issue

SECOND CHANGE
Add user facing exception when default value is not foldable or resolved. Otherwise, user would see message "You hit a bug in Spark ...".
It is needed to pass correct value to `Column` object

Yes, this is a bug fix, existence default value has now proper expression, but before this change, existence default value was actually current default value of column.

Unit test

No

Closes #46594 from urosstan-db/SPARK-48286-Analyze-exists-default-expression-instead-of-current-default-expression.

Lead-authored-by: Uros Stankovic <uros.stankovic@databricks.com>
Co-authored-by: Uros Stankovic <155642965+urosstan-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 0f21df0b29cc18f0e0c7b12543f3a037e4032e65)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../util/ResolveDefaultColumnsUtil.scala      | 16 +++++++++++++
 .../sql/connector/catalog/CatalogV2Util.scala |  7 +++++-
 ...SourceV2DataFrameSessionCatalogSuite.scala |  9 ++++++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 24 +++++++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 50ff3eeab0c16..f55fa2d8f5e85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -279,6 +279,7 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU
       throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions(
         statementType, colName, defaultSQL)
     }
+
     // Analyze the parse result.
     val plan = try {
       val analyzer: Analyzer = DefaultColumnAnalyzer
@@ -293,6 +294,21 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU
     val analyzed: Expression = plan.collectFirst {
       case Project(Seq(a: Alias), OneRowRelation()) => a.child
     }.get
+
+    if (!analyzed.foldable) {
+      throw QueryCompilationErrors.defaultValueNotConstantError(statementType, colName, defaultSQL)
+    }
+
+    // Another extra check, expressions should already be resolved if AnalysisException is not
+    // thrown in the code block above
+    if (!analyzed.resolved) {
+      throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
+        statementType,
+        colName,
+        defaultSQL,
+        cause = null)
+    }
+
     // Perform implicit coercion from the provided expression type to the required column type.
     if (dataType == analyzed.dataType) {
       analyzed
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index be569b1de9dbc..47c438f154ab9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -512,10 +512,15 @@ private[sql] object CatalogV2Util {
     }
 
     if (isDefaultColumn) {
-      val e = analyze(f, EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+      val e = analyze(
+        f,
+        statementType = "Column analysis",
+        metadataKey = EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+
       assert(e.resolved && e.foldable,
         "The existence default value must be a simple SQL string that is resolved and foldable, " +
           "but got: " + f.getExistenceDefaultValue().get)
+
       val defaultValue = new ColumnDefaultValue(
         f.getCurrentDefaultValue().get, LiteralValue(e.eval(), f.dataType))
       val cleanedMetadata = metadataWithKeysRemoved(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 835566238c9c1..79fbabbeacaa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -110,7 +110,14 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
     Option(tables.get(ident)) match {
       case Some(table) =>
         val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
-        val schema = CatalogV2Util.applySchemaChanges(table.schema, changes, None, "ALTER TABLE")
+        val provider = Option(properties.get("provider"))
+
+        val schema = CatalogV2Util.applySchemaChanges(
+          table.schema,
+          changes,
+          provider,
+          "ALTER TABLE"
+        )
 
         // fail if the last column in the schema was dropped
         if (schema.fields.isEmpty) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 7745e9c0a4ee7..d976e4465ff91 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3306,6 +3306,30 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-48286: Add new column with default value which is not foldable") {
+    val foldableExpressions = Seq("1", "2 + 1")
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) {
+      withTable("tab") {
+        spark.sql(s"CREATE TABLE tab (col1 INT DEFAULT 100) USING $v2Source")
+        val exception = intercept[AnalysisException] {
+          // Rand function is not foldable
+          spark.sql(s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT rand()")
+        }
+        assert(exception.getSqlState == "42623")
+        assert(exception.errorClass.get == "INVALID_DEFAULT_VALUE.NOT_CONSTANT")
+        assert(exception.messageParameters("colName") == "`col2`")
+        assert(exception.messageParameters("defaultValue") == "rand()")
+        assert(exception.messageParameters("statement") == "ALTER TABLE")
+      }
+      foldableExpressions.foreach(expr => {
+        withTable("tab") {
+          spark.sql(s"CREATE TABLE tab (col1 INT DEFAULT 100) USING $v2Source")
+          spark.sql(s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT $expr")
+        }
+      })
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,

From 88582e11519b18549b6ed19868e1749963a5299e Mon Sep 17 00:00:00 2001
From: Ziqi Liu <ziqi.liu@databricks.com>
Date: Mon, 17 Jun 2024 14:34:35 +0800
Subject: [PATCH 359/521] [SPARK-48610][SQL] refactor: use auxiliary idMap
 instead of OP_ID_TAG

### What changes were proposed in this pull request?

refactor: In `ExplainUtils.processPlan`, use auxiliary idMap instead of OP_ID_TAG

### Why are the changes needed?

https://github.com/apache/spark/pull/45282 introduced synchronize to `ExplainUtils.processPlan`  to avoid race condition when multiple queries refers to same cached plan.

The granularity of lock is too large. We can try to fix the root cause of this concurrency issue by refactoring the usage of mutable `OP_ID_TAG`, which is not a good practice in terms of immutable nature of SparkPlan.

Instead, we can use an auxiliary id map, with object identity as the key. The entire scope of `OP_ID_TAG` usage is within `ExplainUtils.processPlan`, therefore it's safe to do so, with thread local to make it available in other involved classes.

### Does this PR introduce _any_ user-facing change?
  NO

### How was this patch tested?
existing UTs.

### Was this patch authored or co-authored using generative AI tooling?
NO

Closes #46965 from liuzqt/SPARK-48610.

Authored-by: Ziqi Liu <ziqi.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d3da240ee3023887062909f99dc382b38b4daf1b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/catalyst/plans/QueryPlan.scala  | 18 ++++-
 .../spark/sql/execution/ExplainUtils.scala    | 75 ++++++++-----------
 2 files changed, 47 insertions(+), 46 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index aee4790eb42aa..12ee0274fd7a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans
 
+import java.util.IdentityHashMap
+
 import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
@@ -429,7 +431,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   override def verboseString(maxFields: Int): String = simpleString(maxFields)
 
   override def simpleStringWithNodeId(): String = {
-    val operatorId = getTagValue(QueryPlan.OP_ID_TAG).map(id => s"$id").getOrElse("unknown")
+    val operatorId = Option(QueryPlan.localIdMap.get().get(this)).map(id => s"$id")
+      .getOrElse("unknown")
     s"$nodeName ($operatorId)".trim
   }
 
@@ -449,7 +452,8 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   }
 
   protected def formattedNodeName: String = {
-    val opId = getTagValue(QueryPlan.OP_ID_TAG).map(id => s"$id").getOrElse("unknown")
+    val opId = Option(QueryPlan.localIdMap.get().get(this)).map(id => s"$id")
+      .getOrElse("unknown")
     val codegenId =
       getTagValue(QueryPlan.CODEGEN_ID_TAG).map(id => s" [codegen id : $id]").getOrElse("")
     s"($opId) $nodeName$codegenId"
@@ -626,9 +630,17 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
 }
 
 object QueryPlan extends PredicateHelper {
-  val OP_ID_TAG = TreeNodeTag[Int]("operatorId")
   val CODEGEN_ID_TAG = new TreeNodeTag[Int]("wholeStageCodegenId")
 
+  /**
+   * A thread local map to store the mapping between the query plan and the query plan id.
+   * The scope of this thread local is within ExplainUtils.processPlan. The reason we define it here
+   * is because [[ QueryPlan ]] also needs this, and it doesn't have access to `execution` package
+   * from `catalyst`.
+   */
+  val localIdMap: ThreadLocal[java.util.Map[QueryPlan[_], Int]] = ThreadLocal.withInitial(() =>
+    new IdentityHashMap[QueryPlan[_], Int]())
+
   /**
    * Normalize the exprIds in the given expression, by updating the exprId in `AttributeReference`
    * with its referenced ordinal from input attributes. It's similar to `BindReferences` but we
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
index 11f6ae0e47ee1..421a963453f0d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExplainUtils.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import java.util.Collections.newSetFromMap
 import java.util.IdentityHashMap
-import java.util.Set
 
 import scala.collection.mutable.{ArrayBuffer, BitSet}
 
@@ -30,6 +28,8 @@ import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveS
 import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
 
 object ExplainUtils extends AdaptiveSparkPlanHelper {
+  def localIdMap: ThreadLocal[java.util.Map[QueryPlan[_], Int]] = QueryPlan.localIdMap
+
   /**
    * Given a input physical plan, performs the following tasks.
    *   1. Computes the whole stage codegen id for current operator and records it in the
@@ -80,24 +80,26 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * instances but cached plan is an exception. The `InMemoryRelation#innerChildren` use a shared
    * plan instance across multi-queries. Add lock for this method to avoid tag race condition.
    */
-  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = synchronized {
+  def processPlan[T <: QueryPlan[T]](plan: T, append: String => Unit): Unit = {
+    val prevIdMap = localIdMap.get()
     try {
-      // Initialize a reference-unique set of Operators to avoid accdiental overwrites and to allow
-      // intentional overwriting of IDs generated in previous AQE iteration
-      val operators = newSetFromMap[QueryPlan[_]](new IdentityHashMap())
+      // Initialize a reference-unique id map to store generated ids, which also avoid accidental
+      // overwrites and to allow intentional overwriting of IDs generated in previous AQE iteration
+      val idMap = new IdentityHashMap[QueryPlan[_], Int]()
+      localIdMap.set(idMap)
       // Initialize an array of ReusedExchanges to help find Adaptively Optimized Out
       // Exchanges as part of SPARK-42753
       val reusedExchanges = ArrayBuffer.empty[ReusedExchangeExec]
 
       var currentOperatorID = 0
-      currentOperatorID = generateOperatorIDs(plan, currentOperatorID, operators, reusedExchanges,
+      currentOperatorID = generateOperatorIDs(plan, currentOperatorID, idMap, reusedExchanges,
         true)
 
       val subqueries = ArrayBuffer.empty[(SparkPlan, Expression, BaseSubqueryExec)]
       getSubqueries(plan, subqueries)
 
       currentOperatorID = subqueries.foldLeft(currentOperatorID) {
-        (curId, plan) => generateOperatorIDs(plan._3.child, curId, operators, reusedExchanges,
+        (curId, plan) => generateOperatorIDs(plan._3.child, curId, idMap, reusedExchanges,
           true)
       }
 
@@ -105,9 +107,9 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
       val optimizedOutExchanges = ArrayBuffer.empty[Exchange]
       reusedExchanges.foreach{ reused =>
         val child = reused.child
-        if (!operators.contains(child)) {
+        if (!idMap.containsKey(child)) {
           optimizedOutExchanges.append(child)
-          currentOperatorID = generateOperatorIDs(child, currentOperatorID, operators,
+          currentOperatorID = generateOperatorIDs(child, currentOperatorID, idMap,
             reusedExchanges, false)
         }
       }
@@ -144,7 +146,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
         append("\n")
       }
     } finally {
-      removeTags(plan)
+      localIdMap.set(prevIdMap)
     }
   }
 
@@ -159,13 +161,15 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * @param plan Input query plan to process
    * @param startOperatorID The start value of operation id. The subsequent operations will be
    *                        assigned higher value.
-   * @param visited A unique set of operators visited by generateOperatorIds. The set is scoped
-   *                at the callsite function processPlan. It serves two purpose: Firstly, it is
-   *                used to avoid accidentally overwriting existing IDs that were generated in
-   *                the same processPlan call. Secondly, it is used to allow for intentional ID
-   *                overwriting as part of SPARK-42753 where an Adaptively Optimized Out Exchange
-   *                and its subtree may contain IDs that were generated in a previous AQE
-   *                iteration's processPlan call which would result in incorrect IDs.
+   * @param idMap   A reference-unique map store operators visited by generateOperatorIds and its
+   *                id. This Map is scoped at the callsite function processPlan. It serves three
+   *                purpose:
+   *                Firstly, it stores the QueryPlan - generated ID mapping. Secondly, it is used to
+   *                avoid accidentally overwriting existing IDs that were generated in the same
+   *                processPlan call. Thirdly, it is used to allow for intentional ID overwriting as
+   *                part of SPARK-42753 where an Adaptively Optimized Out Exchange and its subtree
+   *                may contain IDs that were generated in a previous AQE iteration's processPlan
+   *                call which would result in incorrect IDs.
    * @param reusedExchanges A unique set of ReusedExchange nodes visited which will be used to
    *                        idenitfy adaptively optimized out exchanges in SPARK-42753.
    * @param addReusedExchanges Whether to add ReusedExchange nodes to reusedExchanges set. We set it
@@ -177,7 +181,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
   private def generateOperatorIDs(
       plan: QueryPlan[_],
       startOperatorID: Int,
-      visited: Set[QueryPlan[_]],
+      idMap: java.util.Map[QueryPlan[_], Int],
       reusedExchanges: ArrayBuffer[ReusedExchangeExec],
       addReusedExchanges: Boolean): Int = {
     var currentOperationID = startOperatorID
@@ -186,36 +190,35 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
       return currentOperationID
     }
 
-    def setOpId(plan: QueryPlan[_]): Unit = if (!visited.contains(plan)) {
+    def setOpId(plan: QueryPlan[_]): Unit = idMap.computeIfAbsent(plan, plan => {
       plan match {
         case r: ReusedExchangeExec if addReusedExchanges =>
           reusedExchanges.append(r)
         case _ =>
       }
-      visited.add(plan)
       currentOperationID += 1
-      plan.setTagValue(QueryPlan.OP_ID_TAG, currentOperationID)
-    }
+      currentOperationID
+    })
 
     plan.foreachUp {
       case _: WholeStageCodegenExec =>
       case _: InputAdapter =>
       case p: AdaptiveSparkPlanExec =>
-        currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID, visited,
+        currentOperationID = generateOperatorIDs(p.executedPlan, currentOperationID, idMap,
           reusedExchanges, addReusedExchanges)
         if (!p.executedPlan.fastEquals(p.initialPlan)) {
-          currentOperationID = generateOperatorIDs(p.initialPlan, currentOperationID, visited,
+          currentOperationID = generateOperatorIDs(p.initialPlan, currentOperationID, idMap,
             reusedExchanges, addReusedExchanges)
         }
         setOpId(p)
       case p: QueryStageExec =>
-        currentOperationID = generateOperatorIDs(p.plan, currentOperationID, visited,
+        currentOperationID = generateOperatorIDs(p.plan, currentOperationID, idMap,
           reusedExchanges, addReusedExchanges)
         setOpId(p)
       case other: QueryPlan[_] =>
         setOpId(other)
         currentOperationID = other.innerChildren.foldLeft(currentOperationID) {
-          (curId, plan) => generateOperatorIDs(plan, curId, visited, reusedExchanges,
+          (curId, plan) => generateOperatorIDs(plan, curId, idMap, reusedExchanges,
             addReusedExchanges)
         }
     }
@@ -241,7 +244,7 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
     }
 
     def collectOperatorWithID(plan: QueryPlan[_]): Unit = {
-      plan.getTagValue(QueryPlan.OP_ID_TAG).foreach { id =>
+      Option(ExplainUtils.localIdMap.get().get(plan)).foreach { id =>
         if (collectedOperators.add(id)) operators += plan
       }
     }
@@ -334,20 +337,6 @@ object ExplainUtils extends AdaptiveSparkPlanHelper {
    * `operationId` tag value.
    */
   def getOpId(plan: QueryPlan[_]): String = {
-    plan.getTagValue(QueryPlan.OP_ID_TAG).map(v => s"$v").getOrElse("unknown")
-  }
-
-  def removeTags(plan: QueryPlan[_]): Unit = {
-    def remove(p: QueryPlan[_], children: Seq[QueryPlan[_]]): Unit = {
-      p.unsetTagValue(QueryPlan.OP_ID_TAG)
-      p.unsetTagValue(QueryPlan.CODEGEN_ID_TAG)
-      children.foreach(removeTags)
-    }
-
-    plan foreach {
-      case p: AdaptiveSparkPlanExec => remove(p, Seq(p.executedPlan, p.initialPlan))
-      case p: QueryStageExec => remove(p, Seq(p.plan))
-      case plan: QueryPlan[_] => remove(plan, plan.innerChildren)
-    }
+    Option(ExplainUtils.localIdMap.get().get(plan)).map(v => s"$v").getOrElse("unknown")
   }
 }

From 15e6369e946bd04ddf9e1039909fe97c60d35504 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Mon, 17 Jun 2024 11:19:58 -0700
Subject: [PATCH 360/521] [SPARK-48642][CORE] False SparkOutOfMemoryError
 caused by killing task on spilling

### What changes were proposed in this pull request?

Throw `RuntimeException` instead of `SparkOutOfMemoryError` when underlying calls throw `InterruptedIOException` in `TaskMemoryManager#trySpillAndAcquire`

### Why are the changes needed?

A false `SparkOutOfMemoryError` case was identified in our production Spark jobs, and it is similar to SPARK-20250

```
2024-06-17 06:03:20 CST Executor INFO - Executor is trying to kill task 1580.1 in stage 48.0 (TID 59486), reason: another attempt succeeded
2024-06-17 06:03:20 CST TaskMemoryManager ERROR - error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter7cfefcb7
java.io.InterruptedIOException: null
	at org.apache.spark.io.ReadAheadInputStream.waitForAsyncReadComplete(ReadAheadInputStream.java:234) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.io.ReadAheadInputStream.read(ReadAheadInputStream.java:272) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.io.ReadAheadInputStream.read(ReadAheadInputStream.java:251) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at java.io.DataInputStream.readInt(DataInputStream.java:393) ~[?:?]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillReader.<init>(UnsafeSorterSpillReader.java:80) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeSorterSpillWriter.getReader(UnsafeSorterSpillWriter.java:159) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter$SpillableIterator.spill(UnsafeExternalSorter.java:626) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.spill(UnsafeExternalSorter.java:204) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.TaskMemoryManager.trySpillAndAcquire(TaskMemoryManager.java:227) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:190) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:317) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.MemoryConsumer.allocatePage(MemoryConsumer.java:116) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPageIfNecessary(UnsafeExternalSorter.java:431) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.allocateMemoryForRecordIfNecessary(UnsafeExternalSorter.java:450) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:485) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:138) ~[spark-sql_2.12-3.3.1.45.jar:3.3.1.45]
	...
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:332) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1508) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?]
	at java.lang.Thread.run(Thread.java:833) ~[?:?]
Caused by: java.lang.InterruptedException
	at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1638) ~[?:?]
	at org.apache.spark.io.ReadAheadInputStream.waitForAsyncReadComplete(ReadAheadInputStream.java:231) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	... 111 more
2024-06-17 06:03:21 CST Executor ERROR - Exception in task 1580.1 in stage 48.0 (TID 59486)
org.apache.spark.memory.SparkOutOfMemoryError: error while calling spill() on org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter7cfefcb7 : null
	at org.apache.spark.memory.TaskMemoryManager.trySpillAndAcquire(TaskMemoryManager.java:253) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.TaskMemoryManager.acquireExecutionMemory(TaskMemoryManager.java:190) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.TaskMemoryManager.allocatePage(TaskMemoryManager.java:317) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.memory.MemoryConsumer.allocatePage(MemoryConsumer.java:116) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.acquireNewPageIfNecessary(UnsafeExternalSorter.java:431) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.allocateMemoryForRecordIfNecessary(UnsafeExternalSorter.java:450) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter.insertRecord(UnsafeExternalSorter.java:485) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.sql.execution.UnsafeExternalRowSorter.insertRow(UnsafeExternalRowSorter.java:138) ~[spark-sql_2.12-3.3.1.45.jar:3.3.1.45]
	...
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:332) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.scheduler.Task.run(Task.scala:136) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1508) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) ~[spark-core_2.12-3.3.1.45.jar:3.3.1.45]
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136) ~[?:?]
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635) ~[?:?]
	at java.lang.Thread.run(Thread.java:833) ~[?:?]
```

### Does this PR introduce _any_ user-facing change?

Yes, the killing task on spilling won't report a false `SparkOutOfMemoryError`, so that the killed task status is KILLED instead of FAILED.

### How was this patch tested?

Existing tests to ensure the change breaks nothing.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47000 from pan3793/SPARK-48642.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 00a96bb42ec69793f6251bf718d997e21e87c824)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/java/org/apache/spark/memory/TaskMemoryManager.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index 83352611770fd..08c080f5a5a1d 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -18,6 +18,7 @@
 package org.apache.spark.memory;
 
 import javax.annotation.concurrent.GuardedBy;
+import java.io.InterruptedIOException;
 import java.io.IOException;
 import java.nio.channels.ClosedByInterruptException;
 import java.util.Arrays;
@@ -242,7 +243,7 @@ private long trySpillAndAcquire(
         cList.remove(idx);
         return 0;
       }
-    } catch (ClosedByInterruptException e) {
+    } catch (ClosedByInterruptException | InterruptedIOException e) {
       // This called by user to kill a task (e.g: speculative task).
       logger.error("error while calling spill() on " + consumerToSpill, e);
       throw new RuntimeException(e.getMessage());

From a3f4cc715077a190209136251d39b1ba4befe0b3 Mon Sep 17 00:00:00 2001
From: Uros Stankovic <uros.stankovic@databricks.com>
Date: Mon, 17 Jun 2024 11:37:38 -0700
Subject: [PATCH 361/521] [SPARK-48286][SQL][3.5] Fix column default value
 check - Add error class

### What changes were proposed in this pull request?
Add error class INVALID_DEFAULT_VALUE.NOT_CONSTANT to fix compile issue we made by merging following PR https://github.com/apache/spark/pull/46594 into 3.5 branch

### Why are the changes needed?
To unblock compilation of branch 3.5

### Does this PR introduce _any_ user-facing change?
Same as original PR

### How was this patch tested?
Original PR had the tests

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46978 from urosstan-db/SPARK-48286-Add-default-value-not-foldable-error.

Authored-by: Uros Stankovic <uros.stankovic@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../src/main/resources/error/error-classes.json     |  5 +++++
 ...-conditions-invalid-default-value-error-class.md |  4 ++++
 .../spark/sql/errors/QueryCompilationErrors.scala   | 13 +++++++++++++
 .../spark/sql/connector/DataSourceV2SQLSuite.scala  |  1 -
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json
index b9d4c2c297f84..f1943a8ff3e04 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1292,6 +1292,11 @@
           "which requires <expectedType> type, but the statement provided a value of incompatible <actualType> type."
         ]
       },
+      "NOT_CONSTANT" : {
+        "message" : [
+          "which is not a constant expression whose equivalent value is known at query planning time."
+        ]
+      },
       "SUBQUERY_EXPRESSION" : {
         "message" : [
           "which contains subquery expressions."
diff --git a/docs/sql-error-conditions-invalid-default-value-error-class.md b/docs/sql-error-conditions-invalid-default-value-error-class.md
index 466b5a9274cad..92c70ce69fc5f 100644
--- a/docs/sql-error-conditions-invalid-default-value-error-class.md
+++ b/docs/sql-error-conditions-invalid-default-value-error-class.md
@@ -29,6 +29,10 @@ This error class has the following derived error classes:
 
 which requires `<expectedType>` type, but the statement provided a value of incompatible `<actualType>` type.
 
+## NOT_CONSTANT
+
+which is not a constant expression whose equivalent value is known at query planning time.
+
 ## SUBQUERY_EXPRESSION
 
 which contains subquery expressions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index a78e092c4bfa9..ec58298babdb7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -3406,6 +3406,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "defaultValue" -> defaultValue))
   }
 
+  def defaultValueNotConstantError(
+      statement: String,
+      colName: String,
+      defaultValue: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
+      messageParameters = Map(
+        "statement" -> toSQLStmt(statement),
+        "colName" -> toSQLId(colName),
+        "defaultValue" -> defaultValue
+      ))
+  }
+
   def nullableColumnOrFieldError(name: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "NULLABLE_COLUMN_OR_FIELD",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d976e4465ff91..6df71d08ef726 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3315,7 +3315,6 @@ class DataSourceV2SQLSuiteV1Filter
           // Rand function is not foldable
           spark.sql(s"ALTER TABLE tab ADD COLUMN col2 DOUBLE DEFAULT rand()")
         }
-        assert(exception.getSqlState == "42623")
         assert(exception.errorClass.get == "INVALID_DEFAULT_VALUE.NOT_CONSTANT")
         assert(exception.messageParameters("colName") == "`col2`")
         assert(exception.messageParameters("defaultValue") == "rand()")

From 145c045654d7b8e3738c1bbfe83a58d087fbef24 Mon Sep 17 00:00:00 2001
From: JacobZheng0927 <zsh517559523@163.com>
Date: Tue, 18 Jun 2024 00:05:26 -0500
Subject: [PATCH 362/521] [SPARK-47910][CORE] close stream when
 DiskBlockObjectWriter closeResources to avoid memory leak

### What changes were proposed in this pull request?
close stream when DiskBlockObjectWriter closeResources to avoid memory leak

### Why are the changes needed?
[SPARK-34647](https://issues.apache.org/jira/browse/SPARK-34647) replaced the ZstdInputStream with ZstdInputStreamNoFinalizer. This meant that all usages of CompressionCodec.compressedOutputStream would need to manually close the stream as this would no longer be handled by the finalizer mechanism.
When using zstd for shuffle write compression, if for some reason the execution of this process is interrupted(eg. enable spark.sql.execution.interruptOnCancel and cancel Job). The memory used by `ZstdInputStreamNoFinalizer` may not be freed, causing a memory leak.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
#### Spark Shell Configuration
```
$> export SPARK_SUBMIT_OPTS="-XX:+AlwaysPreTouch -Xms1g"
$> $SPARK_HOME/bin/spark-shell --conf spark.io.compression.codec=zstd
```

#### Test Script
```scala
import java.util.concurrent.TimeUnit
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import scala.util.Random

sc.setJobGroup("jobA", "this is a job to be cancelled", interruptOnCancel = true)
(1 to 50).foreach { batch => {
  val jobA = Future {
    val df1 = spark.range(2000000).map { _ =>(Random.nextString(20),Random.nextInt(1000),Random.nextInt(1000),Random.nextInt(10))}.toDF("a","b","c","d")
    val df2 = spark.range(2000000).map { _ =>(Random.nextString(20),Random.nextInt(1000),Random.nextInt(1000),Random.nextInt(10))}.toDF("a","b","c","d")
    df1.join(df2,"b").show()  }
  Thread.sleep(5000)
  sc.cancelJobGroup("jobA")
}}
```

#### Memory Monitor
```
$> while true; do echo \"$(date +%Y-%m-%d' '%H:%M:%S)\",$(pmap -x <PID> | grep "total kB" | awk '{print $4}'); sleep 10; done;
```

#### Results
##### Before
```
"2024-05-13 16:54:23",1332384
"2024-05-13 16:54:33",1417112
"2024-05-13 16:54:43",2211684
"2024-05-13 16:54:53",3060820
"2024-05-13 16:55:03",3850444
"2024-05-13 16:55:14",4631744
"2024-05-13 16:55:24",5317200
"2024-05-13 16:55:34",6019464
"2024-05-13 16:55:44",6489180
"2024-05-13 16:55:54",7255548
"2024-05-13 16:56:05",7718728
"2024-05-13 16:56:15",8388392
"2024-05-13 16:56:25",8927636
"2024-05-13 16:56:36",9473412
"2024-05-13 16:56:46",10000380
"2024-05-13 16:56:56",10344024
"2024-05-13 16:57:07",10734204
"2024-05-13 16:57:17",11211900
"2024-05-13 16:57:27",11665524
"2024-05-13 16:57:38",12268976
"2024-05-13 16:57:48",12896264
"2024-05-13 16:57:58",13572244
"2024-05-13 16:58:09",14252416
"2024-05-13 16:58:19",14915560
"2024-05-13 16:58:30",15484196
"2024-05-13 16:58:40",16170324
```

##### After
```
"2024-05-13 16:35:44",1355428
"2024-05-13 16:35:54",1391028
"2024-05-13 16:36:04",1673720
"2024-05-13 16:36:14",2103716
"2024-05-13 16:36:24",2129876
"2024-05-13 16:36:35",2166412
"2024-05-13 16:36:45",2177672
"2024-05-13 16:36:55",2188340
"2024-05-13 16:37:05",2190688
"2024-05-13 16:37:15",2195168
"2024-05-13 16:37:26",2199296
"2024-05-13 16:37:36",2228052
"2024-05-13 16:37:46",2238104
"2024-05-13 16:37:56",2260624
"2024-05-13 16:38:06",2307184
"2024-05-13 16:38:16",2331140
"2024-05-13 16:38:27",2323388
"2024-05-13 16:38:37",2357552
"2024-05-13 16:38:47",2352948
"2024-05-13 16:38:57",2364744
"2024-05-13 16:39:07",2368528
"2024-05-13 16:39:18",2385492
"2024-05-13 16:39:28",2389184
"2024-05-13 16:39:38",2388060
"2024-05-13 16:39:48",2388336
"2024-05-13 16:39:58",2386916
```

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #46131 from JacobZheng0927/zstdMemoryLeak.

Authored-by: JacobZheng0927 <zsh517559523@163.com>
Signed-off-by: Mridul Muralidharan <mridul<at>gmail.com>
(cherry picked from commit e265c6041aa2c29d73647b7bd69012c1dc152a1c)
Signed-off-by: Mridul Muralidharan <mridulatgmail.com>
---
 .../spark/storage/DiskBlockObjectWriter.scala | 49 ++++++++---
 .../storage/DiskBlockObjectWriterSuite.scala  | 82 ++++++++++++++++++-
 2 files changed, 114 insertions(+), 17 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index f8bd73e65617f..5110870b4faca 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -125,6 +125,12 @@ private[spark] class DiskBlockObjectWriter(
    */
   private var numRecordsCommitted = 0L
 
+  // For testing only.
+  private[storage] def getSerializerWrappedStream: OutputStream = bs
+
+  // For testing only.
+  private[storage] def getSerializationStream: SerializationStream = objOut
+
   /**
    * Set the checksum that the checksumOutputStream should use
    */
@@ -173,19 +179,36 @@ private[spark] class DiskBlockObjectWriter(
    * Should call after committing or reverting partial writes.
    */
   private def closeResources(): Unit = {
-    if (initialized) {
-      Utils.tryWithSafeFinally {
-        mcs.manualClose()
-      } {
-        channel = null
-        mcs = null
-        bs = null
-        fos = null
-        ts = null
-        objOut = null
-        initialized = false
-        streamOpen = false
-        hasBeenClosed = true
+    try {
+      if (streamOpen) {
+        Utils.tryWithSafeFinally {
+          if (null != objOut) objOut.close()
+          bs = null
+        } {
+          objOut = null
+          if (null != bs) bs.close()
+          bs = null
+        }
+      }
+    } catch {
+      case e: IOException =>
+        logInfo(log"Exception occurred while closing the output stream" +
+          log"${MDC(ERROR, e.getMessage)}")
+    } finally {
+      if (initialized) {
+        Utils.tryWithSafeFinally {
+          mcs.manualClose()
+        } {
+          channel = null
+          mcs = null
+          bs = null
+          fos = null
+          ts = null
+          objOut = null
+          initialized = false
+          streamOpen = false
+          hasBeenClosed = true
+        }
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
index 70a57eed07acd..4352436c872fe 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
@@ -16,11 +16,14 @@
  */
 package org.apache.spark.storage
 
-import java.io.File
+import java.io.{File, InputStream, OutputStream}
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
 import org.apache.spark.executor.ShuffleWriteMetrics
-import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
+import org.apache.spark.serializer.{DeserializationStream, JavaSerializer, SerializationStream, Serializer, SerializerInstance, SerializerManager}
 import org.apache.spark.util.Utils
 
 class DiskBlockObjectWriterSuite extends SparkFunSuite {
@@ -43,10 +46,14 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite {
   private def createWriter(): (DiskBlockObjectWriter, File, ShuffleWriteMetrics) = {
     val file = new File(tempDir, "somefile")
     val conf = new SparkConf()
-    val serializerManager = new SerializerManager(new JavaSerializer(conf), conf)
+    val serializerManager = new CustomSerializerManager(new JavaSerializer(conf), conf, None)
     val writeMetrics = new ShuffleWriteMetrics()
     val writer = new DiskBlockObjectWriter(
-      file, serializerManager, new JavaSerializer(new SparkConf()).newInstance(), 1024, true,
+      file,
+      serializerManager,
+      new CustomJavaSerializer(new SparkConf()).newInstance(),
+      1024,
+      true,
       writeMetrics)
     (writer, file, writeMetrics)
   }
@@ -196,9 +203,76 @@ class DiskBlockObjectWriterSuite extends SparkFunSuite {
     for (i <- 1 to 500) {
       writer.write(i, i)
     }
+
+    val bs = writer.getSerializerWrappedStream.asInstanceOf[OutputStreamWithCloseDetecting]
+    val objOut = writer.getSerializationStream.asInstanceOf[SerializationStreamWithCloseDetecting]
+
     writer.closeAndDelete()
     assert(!file.exists())
     assert(writeMetrics.bytesWritten == 0)
     assert(writeMetrics.recordsWritten == 0)
+    assert(bs.isClosed)
+    assert(objOut.isClosed)
+  }
+}
+
+trait CloseDetecting {
+  var isClosed = false
+}
+
+class OutputStreamWithCloseDetecting(outputStream: OutputStream)
+    extends OutputStream
+    with CloseDetecting {
+  override def write(b: Int): Unit = outputStream.write(b)
+
+  override def close(): Unit = {
+    isClosed = true
+    outputStream.close()
+  }
+}
+
+class CustomSerializerManager(
+    defaultSerializer: Serializer,
+    conf: SparkConf,
+    encryptionKey: Option[Array[Byte]])
+    extends SerializerManager(defaultSerializer, conf, encryptionKey) {
+  override def wrapStream(blockId: BlockId, s: OutputStream): OutputStream = {
+    new OutputStreamWithCloseDetecting(wrapForCompression(blockId, wrapForEncryption(s)))
+  }
+}
+
+class CustomJavaSerializer(conf: SparkConf) extends JavaSerializer(conf) {
+
+  override def newInstance(): SerializerInstance = {
+    new CustomJavaSerializerInstance(super.newInstance())
   }
 }
+
+class SerializationStreamWithCloseDetecting(serializationStream: SerializationStream)
+    extends SerializationStream with CloseDetecting {
+
+  override def close(): Unit = {
+    isClosed = true
+    serializationStream.close()
+  }
+
+  override def writeObject[T: ClassTag](t: T): SerializationStream =
+    serializationStream.writeObject(t)
+
+  override def flush(): Unit = serializationStream.flush()
+}
+
+class CustomJavaSerializerInstance(instance: SerializerInstance) extends SerializerInstance {
+  override def serializeStream(s: OutputStream): SerializationStream =
+    new SerializationStreamWithCloseDetecting(instance.serializeStream(s))
+
+  override def serialize[T: ClassTag](t: T): ByteBuffer = instance.serialize(t)
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer): T = instance.deserialize(bytes)
+
+  override def deserialize[T: ClassTag](bytes: ByteBuffer, loader: ClassLoader): T =
+    instance.deserialize(bytes, loader)
+
+  override def deserializeStream(s: InputStream): DeserializationStream =
+    instance.deserializeStream(s)
+}

From f57af38c87e3c187204a50b6cd58d71141099b91 Mon Sep 17 00:00:00 2001
From: JacobZheng0927 <zsh517559523@163.com>
Date: Wed, 19 Jun 2024 17:10:08 +0800
Subject: [PATCH 363/521] [SPARK-47910][CORE][3.5][FOLLOWUP] Remove the use of
 MDC

### What changes were proposed in this pull request?
The pull request https://github.com/apache/spark/pull/46131 was merged into the 3.5 branch. The log output in this PR uses MDC, which is not yet supported in version 3.5.

### Why are the changes needed?
Remove the use of MDC.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
No need.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47022 from JacobZheng0927/SPARK-47910-3.5.

Authored-by: JacobZheng0927 <zsh517559523@163.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../scala/org/apache/spark/storage/DiskBlockObjectWriter.scala | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
index 5110870b4faca..ebc10300ef471 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala
@@ -192,8 +192,7 @@ private[spark] class DiskBlockObjectWriter(
       }
     } catch {
       case e: IOException =>
-        logInfo(log"Exception occurred while closing the output stream" +
-          log"${MDC(ERROR, e.getMessage)}")
+        logInfo("Exception occurred while closing the output stream: " + e.getMessage)
     } finally {
       if (initialized) {
         Utils.tryWithSafeFinally {

From 4741f7a339b66bb8abf4f4e947972c3c27a48e57 Mon Sep 17 00:00:00 2001
From: jackylee-ch <lijunqing@baidu.com>
Date: Mon, 24 Jun 2024 15:57:21 +0800
Subject: [PATCH 364/521] [SPARK-48498][SQL][3.5] Always do char padding in
 predicates

### What changes were proposed in this pull request?

For some data sources, CHAR type padding is not applied on both the write and read sides (by disabling `spark.sql.readSideCharPadding`), as a different SQL flavor, which is similar to MySQL: https://dev.mysql.com/doc/refman/8.0/en/char.html

However, there is a bug in Spark that we always pad the string literal when comparing CHAR type and STRING literals, which assumes the CHAR type columns are always padded, either on the write side or read side. This is not always true.

This PR makes Spark always pad the CHAR type columns when comparing with string literals, to satisfy the CHAR type semantic.

### Why are the changes needed?

bug fix if people disable read side char padding

### Does this PR introduce _any_ user-facing change?

Yes. After this PR, comparing CHAR type with STRING literals follows the CHAR semantic, while before it mostly returns false.

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46958 from jackylee-ch/backport_char_padding_fix_to_3.5.

Lead-authored-by: jackylee-ch <lijunqing@baidu.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/internal/SQLConf.scala   |  8 ++++
 .../datasources/ApplyCharTypePadding.scala    | 39 ++++++++++++++-----
 .../spark/sql/CharVarcharTestSuite.scala      | 28 +++++++++++++
 .../apache/spark/sql/PlanStabilitySuite.scala |  8 ++--
 4 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index ba27a03fdc311..275ec71cb0615 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4175,6 +4175,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val LEGACY_NO_CHAR_PADDING_IN_PREDICATE = buildConf("spark.sql.legacy.noCharPaddingInPredicate")
+    .internal()
+    .doc("When true, Spark will not apply char type padding for CHAR type columns in string " +
+      s"comparison predicates, when '${READ_SIDE_CHAR_PADDING.key}' is false.")
+    .version("3.5.2")
+    .booleanConf
+    .createWithDefault(false)
+
   val CLI_PRINT_HEADER =
     buildConf("spark.sql.cli.print.header")
      .doc("When set to true, spark-sql CLI prints the names of the columns in query output.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
index b5bf337a5a2e6..1b7b0d702ab98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, IN}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{CharType, Metadata, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -66,9 +67,10 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
             r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols)
           })
       }
-      paddingForStringComparison(newPlan)
+      paddingForStringComparison(newPlan, padCharCol = false)
     } else {
-      paddingForStringComparison(plan)
+      paddingForStringComparison(
+        plan, padCharCol = !conf.getConf(SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE))
     }
   }
 
@@ -90,7 +92,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
     }
   }
 
-  private def paddingForStringComparison(plan: LogicalPlan): LogicalPlan = {
+  private def paddingForStringComparison(plan: LogicalPlan, padCharCol: Boolean): LogicalPlan = {
     plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
       case operator => operator.transformExpressionsUpWithPruning(
         _.containsAnyPattern(BINARY_COMPARISON, IN)) {
@@ -99,12 +101,12 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
         // String literal is treated as char type when it's compared to a char type column.
         // We should pad the shorter one to the longer length.
         case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
             b.withNewChildren(newChildren)
           }.getOrElse(b)
 
         case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
             b.withNewChildren(newChildren.reverse)
           }.getOrElse(b)
 
@@ -117,9 +119,10 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
               val literalCharLengths = literalChars.map(_.numChars())
               val targetLen = (length +: literalCharLengths).max
               Some(i.copy(
-                value = addPadding(e, length, targetLen),
+                value = addPadding(e, length, targetLen, alwaysPad = padCharCol),
                 list = list.zip(literalCharLengths).map {
-                  case (lit, charLength) => addPadding(lit, charLength, targetLen)
+                  case (lit, charLength) =>
+                    addPadding(lit, charLength, targetLen, alwaysPad = false)
                 } ++ nulls.map(Literal.create(_, StringType))))
             case _ => None
           }.getOrElse(i)
@@ -162,6 +165,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
   private def padAttrLitCmp(
       expr: Expression,
       metadata: Metadata,
+      padCharCol: Boolean,
       lit: Expression): Option[Seq[Expression]] = {
     if (expr.dataType == StringType) {
       CharVarcharUtils.getRawType(metadata).flatMap {
@@ -174,7 +178,14 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
             if (length < stringLitLen) {
               Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
             } else if (length > stringLitLen) {
-              Some(Seq(expr, StringRPad(lit, Literal(length))))
+              val paddedExpr = if (padCharCol) {
+                StringRPad(expr, Literal(length))
+              } else {
+                expr
+              }
+              Some(Seq(paddedExpr, StringRPad(lit, Literal(length))))
+            } else if (padCharCol)  {
+              Some(Seq(StringRPad(expr, Literal(length)), lit))
             } else {
               None
             }
@@ -186,7 +197,15 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
     }
   }
 
-  private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = {
-    if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr
+  private def addPadding(
+      expr: Expression,
+      charLength: Int,
+      targetLength: Int,
+      alwaysPad: Boolean): Expression = {
+    if (targetLength > charLength) {
+      StringRPad(expr, Literal(targetLength))
+    } else if (alwaysPad) {
+      StringRPad(expr, Literal(charLength))
+    } else expr
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 4a7632486c046..6732e8d41fea4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -1064,6 +1064,34 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
       }
     }
   }
+
+  test("SPARK-48498: always do char padding in predicates") {
+    import testImplicits._
+    withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false") {
+      withTempPath { dir =>
+        withTable("t") {
+          Seq(
+            "12" -> "12",
+            "12" -> "12 ",
+            "12 " -> "12",
+            "12 " -> "12 "
+          ).toDF("c1", "c2").write.format(format).save(dir.toString)
+          sql(s"CREATE TABLE t (c1 CHAR(3), c2 STRING) USING $format LOCATION '$dir'")
+          // Comparing CHAR column with STRING column directly compares the stored value.
+          checkAnswer(
+            sql("SELECT c1 = c2 FROM t"),
+            Seq(Row(true), Row(false), Row(false), Row(true))
+          )
+          // No matter the CHAR type value is padded or not in the storage, we should always pad it
+          // before comparison with STRING literals.
+          checkAnswer(
+            sql("SELECT c1 = '12', c1 = '12 ', c1 = '12  ' FROM t WHERE c2 = '12'"),
+            Seq(Row(true, true, true), Row(true, true, true))
+          )
+        }
+      }
+    }
+  }
 }
 
 class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index b5b3492269415..c26757c9cff70 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -256,9 +256,11 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
   protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = {
     val queryString = resourceToString(s"$tpcdsGroup/$query.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
-    // Disable char/varchar read-side handling for better performance.
-    withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+    withSQLConf(
+      // Disable char/varchar read-side handling for better performance.
+      SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
+      SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE.key -> "true",
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
       val qe = sql(queryString).queryExecution
       val plan = qe.executedPlan
       val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))

From 0db5bdecfa6cbfff1be7690bb783a858989776b9 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 24 Jun 2024 19:25:56 +0900
Subject: [PATCH 365/521] [SPARK-48639][CONNECT][PYTHON] Add Origin to
 Relation.RelationCommon

This PR proposes to add `Origin` (from https://github.com/apache/spark/pull/46789) to `Relation.RelationCommon`

To have the common protobuf message to keep the source code info.

No. This is not used.

CI should validate protobuf definition, and exiting tests should pass.

No.

Closes #47024 from HyukjinKwon/SPARK-48639.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Hyukjin Kwon <gurwls223@gmail.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 09cb5921b7d88e63e172aa6aea370319e8bbd2fa)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../protobuf/spark/connect/relations.proto    |   4 +-
 .../sql/connect/proto/relations_pb2.py        | 264 +++++++++---------
 .../sql/connect/proto/relations_pb2.pyi       |   9 +-
 3 files changed, 135 insertions(+), 142 deletions(-)

diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index f7f1315ede0f8..8b384728983bd 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -103,8 +103,8 @@ message Unknown {}
 
 // Common metadata of all relations.
 message RelationCommon {
-  // (Required) Shared relation metadata.
-  string source_info = 1;
+  // (Optional) Shared relation metadata.
+  reserved 1;
 
   // (Optional) A per-client globally unique id for a given connect plan.
   optional int64 plan_id = 2;
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 3f7e57949373b..7dd494db86959 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -35,7 +35,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"@\n\x0eRelationCommon\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_idJ\x04\x08\x01\x10\x02"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -61,135 +61,135 @@
     _UNKNOWN._serialized_start = 3336
     _UNKNOWN._serialized_end = 3345
     _RELATIONCOMMON._serialized_start = 3347
-    _RELATIONCOMMON._serialized_end = 3438
-    _SQL._serialized_start = 3441
-    _SQL._serialized_end = 3672
-    _SQL_ARGSENTRY._serialized_start = 3582
-    _SQL_ARGSENTRY._serialized_end = 3672
-    _READ._serialized_start = 3675
-    _READ._serialized_end = 4338
-    _READ_NAMEDTABLE._serialized_start = 3853
-    _READ_NAMEDTABLE._serialized_end = 4045
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3987
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4045
-    _READ_DATASOURCE._serialized_start = 4048
-    _READ_DATASOURCE._serialized_end = 4325
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3987
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4045
-    _PROJECT._serialized_start = 4340
-    _PROJECT._serialized_end = 4457
-    _FILTER._serialized_start = 4459
-    _FILTER._serialized_end = 4571
-    _JOIN._serialized_start = 4574
-    _JOIN._serialized_end = 5235
-    _JOIN_JOINDATATYPE._serialized_start = 4913
-    _JOIN_JOINDATATYPE._serialized_end = 5005
-    _JOIN_JOINTYPE._serialized_start = 5008
-    _JOIN_JOINTYPE._serialized_end = 5216
-    _SETOPERATION._serialized_start = 5238
-    _SETOPERATION._serialized_end = 5717
-    _SETOPERATION_SETOPTYPE._serialized_start = 5554
-    _SETOPERATION_SETOPTYPE._serialized_end = 5668
-    _LIMIT._serialized_start = 5719
-    _LIMIT._serialized_end = 5795
-    _OFFSET._serialized_start = 5797
-    _OFFSET._serialized_end = 5876
-    _TAIL._serialized_start = 5878
-    _TAIL._serialized_end = 5953
-    _AGGREGATE._serialized_start = 5956
-    _AGGREGATE._serialized_end = 6538
-    _AGGREGATE_PIVOT._serialized_start = 6295
-    _AGGREGATE_PIVOT._serialized_end = 6406
-    _AGGREGATE_GROUPTYPE._serialized_start = 6409
-    _AGGREGATE_GROUPTYPE._serialized_end = 6538
-    _SORT._serialized_start = 6541
-    _SORT._serialized_end = 6701
-    _DROP._serialized_start = 6704
-    _DROP._serialized_end = 6845
-    _DEDUPLICATE._serialized_start = 6848
-    _DEDUPLICATE._serialized_end = 7088
-    _LOCALRELATION._serialized_start = 7090
-    _LOCALRELATION._serialized_end = 7179
-    _CACHEDLOCALRELATION._serialized_start = 7181
-    _CACHEDLOCALRELATION._serialized_end = 7253
-    _CACHEDREMOTERELATION._serialized_start = 7255
-    _CACHEDREMOTERELATION._serialized_end = 7310
-    _SAMPLE._serialized_start = 7313
-    _SAMPLE._serialized_end = 7586
-    _RANGE._serialized_start = 7589
-    _RANGE._serialized_end = 7734
-    _SUBQUERYALIAS._serialized_start = 7736
-    _SUBQUERYALIAS._serialized_end = 7850
-    _REPARTITION._serialized_start = 7853
-    _REPARTITION._serialized_end = 7995
-    _SHOWSTRING._serialized_start = 7998
-    _SHOWSTRING._serialized_end = 8140
-    _HTMLSTRING._serialized_start = 8142
-    _HTMLSTRING._serialized_end = 8256
-    _STATSUMMARY._serialized_start = 8258
-    _STATSUMMARY._serialized_end = 8350
-    _STATDESCRIBE._serialized_start = 8352
-    _STATDESCRIBE._serialized_end = 8433
-    _STATCROSSTAB._serialized_start = 8435
-    _STATCROSSTAB._serialized_end = 8536
-    _STATCOV._serialized_start = 8538
-    _STATCOV._serialized_end = 8634
-    _STATCORR._serialized_start = 8637
-    _STATCORR._serialized_end = 8774
-    _STATAPPROXQUANTILE._serialized_start = 8777
-    _STATAPPROXQUANTILE._serialized_end = 8941
-    _STATFREQITEMS._serialized_start = 8943
-    _STATFREQITEMS._serialized_end = 9068
-    _STATSAMPLEBY._serialized_start = 9071
-    _STATSAMPLEBY._serialized_end = 9380
-    _STATSAMPLEBY_FRACTION._serialized_start = 9272
-    _STATSAMPLEBY_FRACTION._serialized_end = 9371
-    _NAFILL._serialized_start = 9383
-    _NAFILL._serialized_end = 9517
-    _NADROP._serialized_start = 9520
-    _NADROP._serialized_end = 9654
-    _NAREPLACE._serialized_start = 9657
-    _NAREPLACE._serialized_end = 9953
-    _NAREPLACE_REPLACEMENT._serialized_start = 9812
-    _NAREPLACE_REPLACEMENT._serialized_end = 9953
-    _TODF._serialized_start = 9955
-    _TODF._serialized_end = 10043
-    _WITHCOLUMNSRENAMED._serialized_start = 10046
-    _WITHCOLUMNSRENAMED._serialized_end = 10285
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10218
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10285
-    _WITHCOLUMNS._serialized_start = 10287
-    _WITHCOLUMNS._serialized_end = 10406
-    _WITHWATERMARK._serialized_start = 10409
-    _WITHWATERMARK._serialized_end = 10543
-    _HINT._serialized_start = 10546
-    _HINT._serialized_end = 10678
-    _UNPIVOT._serialized_start = 10681
-    _UNPIVOT._serialized_end = 11008
-    _UNPIVOT_VALUES._serialized_start = 10938
-    _UNPIVOT_VALUES._serialized_end = 10997
-    _TOSCHEMA._serialized_start = 11010
-    _TOSCHEMA._serialized_end = 11116
-    _REPARTITIONBYEXPRESSION._serialized_start = 11119
-    _REPARTITIONBYEXPRESSION._serialized_end = 11322
-    _MAPPARTITIONS._serialized_start = 11325
-    _MAPPARTITIONS._serialized_end = 11506
-    _GROUPMAP._serialized_start = 11509
-    _GROUPMAP._serialized_end = 12144
-    _COGROUPMAP._serialized_start = 12147
-    _COGROUPMAP._serialized_end = 12673
-    _APPLYINPANDASWITHSTATE._serialized_start = 12676
-    _APPLYINPANDASWITHSTATE._serialized_end = 13033
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13036
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13280
-    _PYTHONUDTF._serialized_start = 13283
-    _PYTHONUDTF._serialized_end = 13460
-    _COLLECTMETRICS._serialized_start = 13463
-    _COLLECTMETRICS._serialized_end = 13599
-    _PARSE._serialized_start = 13602
-    _PARSE._serialized_end = 13990
-    _PARSE_OPTIONSENTRY._serialized_start = 3987
-    _PARSE_OPTIONSENTRY._serialized_end = 4045
-    _PARSE_PARSEFORMAT._serialized_start = 13891
-    _PARSE_PARSEFORMAT._serialized_end = 13979
+    _RELATIONCOMMON._serialized_end = 3411
+    _SQL._serialized_start = 3414
+    _SQL._serialized_end = 3645
+    _SQL_ARGSENTRY._serialized_start = 3555
+    _SQL_ARGSENTRY._serialized_end = 3645
+    _READ._serialized_start = 3648
+    _READ._serialized_end = 4311
+    _READ_NAMEDTABLE._serialized_start = 3826
+    _READ_NAMEDTABLE._serialized_end = 4018
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3960
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4018
+    _READ_DATASOURCE._serialized_start = 4021
+    _READ_DATASOURCE._serialized_end = 4298
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3960
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4018
+    _PROJECT._serialized_start = 4313
+    _PROJECT._serialized_end = 4430
+    _FILTER._serialized_start = 4432
+    _FILTER._serialized_end = 4544
+    _JOIN._serialized_start = 4547
+    _JOIN._serialized_end = 5208
+    _JOIN_JOINDATATYPE._serialized_start = 4886
+    _JOIN_JOINDATATYPE._serialized_end = 4978
+    _JOIN_JOINTYPE._serialized_start = 4981
+    _JOIN_JOINTYPE._serialized_end = 5189
+    _SETOPERATION._serialized_start = 5211
+    _SETOPERATION._serialized_end = 5690
+    _SETOPERATION_SETOPTYPE._serialized_start = 5527
+    _SETOPERATION_SETOPTYPE._serialized_end = 5641
+    _LIMIT._serialized_start = 5692
+    _LIMIT._serialized_end = 5768
+    _OFFSET._serialized_start = 5770
+    _OFFSET._serialized_end = 5849
+    _TAIL._serialized_start = 5851
+    _TAIL._serialized_end = 5926
+    _AGGREGATE._serialized_start = 5929
+    _AGGREGATE._serialized_end = 6511
+    _AGGREGATE_PIVOT._serialized_start = 6268
+    _AGGREGATE_PIVOT._serialized_end = 6379
+    _AGGREGATE_GROUPTYPE._serialized_start = 6382
+    _AGGREGATE_GROUPTYPE._serialized_end = 6511
+    _SORT._serialized_start = 6514
+    _SORT._serialized_end = 6674
+    _DROP._serialized_start = 6677
+    _DROP._serialized_end = 6818
+    _DEDUPLICATE._serialized_start = 6821
+    _DEDUPLICATE._serialized_end = 7061
+    _LOCALRELATION._serialized_start = 7063
+    _LOCALRELATION._serialized_end = 7152
+    _CACHEDLOCALRELATION._serialized_start = 7154
+    _CACHEDLOCALRELATION._serialized_end = 7226
+    _CACHEDREMOTERELATION._serialized_start = 7228
+    _CACHEDREMOTERELATION._serialized_end = 7283
+    _SAMPLE._serialized_start = 7286
+    _SAMPLE._serialized_end = 7559
+    _RANGE._serialized_start = 7562
+    _RANGE._serialized_end = 7707
+    _SUBQUERYALIAS._serialized_start = 7709
+    _SUBQUERYALIAS._serialized_end = 7823
+    _REPARTITION._serialized_start = 7826
+    _REPARTITION._serialized_end = 7968
+    _SHOWSTRING._serialized_start = 7971
+    _SHOWSTRING._serialized_end = 8113
+    _HTMLSTRING._serialized_start = 8115
+    _HTMLSTRING._serialized_end = 8229
+    _STATSUMMARY._serialized_start = 8231
+    _STATSUMMARY._serialized_end = 8323
+    _STATDESCRIBE._serialized_start = 8325
+    _STATDESCRIBE._serialized_end = 8406
+    _STATCROSSTAB._serialized_start = 8408
+    _STATCROSSTAB._serialized_end = 8509
+    _STATCOV._serialized_start = 8511
+    _STATCOV._serialized_end = 8607
+    _STATCORR._serialized_start = 8610
+    _STATCORR._serialized_end = 8747
+    _STATAPPROXQUANTILE._serialized_start = 8750
+    _STATAPPROXQUANTILE._serialized_end = 8914
+    _STATFREQITEMS._serialized_start = 8916
+    _STATFREQITEMS._serialized_end = 9041
+    _STATSAMPLEBY._serialized_start = 9044
+    _STATSAMPLEBY._serialized_end = 9353
+    _STATSAMPLEBY_FRACTION._serialized_start = 9245
+    _STATSAMPLEBY_FRACTION._serialized_end = 9344
+    _NAFILL._serialized_start = 9356
+    _NAFILL._serialized_end = 9490
+    _NADROP._serialized_start = 9493
+    _NADROP._serialized_end = 9627
+    _NAREPLACE._serialized_start = 9630
+    _NAREPLACE._serialized_end = 9926
+    _NAREPLACE_REPLACEMENT._serialized_start = 9785
+    _NAREPLACE_REPLACEMENT._serialized_end = 9926
+    _TODF._serialized_start = 9928
+    _TODF._serialized_end = 10016
+    _WITHCOLUMNSRENAMED._serialized_start = 10019
+    _WITHCOLUMNSRENAMED._serialized_end = 10258
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10191
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10258
+    _WITHCOLUMNS._serialized_start = 10260
+    _WITHCOLUMNS._serialized_end = 10379
+    _WITHWATERMARK._serialized_start = 10382
+    _WITHWATERMARK._serialized_end = 10516
+    _HINT._serialized_start = 10519
+    _HINT._serialized_end = 10651
+    _UNPIVOT._serialized_start = 10654
+    _UNPIVOT._serialized_end = 10981
+    _UNPIVOT_VALUES._serialized_start = 10911
+    _UNPIVOT_VALUES._serialized_end = 10970
+    _TOSCHEMA._serialized_start = 10983
+    _TOSCHEMA._serialized_end = 11089
+    _REPARTITIONBYEXPRESSION._serialized_start = 11092
+    _REPARTITIONBYEXPRESSION._serialized_end = 11295
+    _MAPPARTITIONS._serialized_start = 11298
+    _MAPPARTITIONS._serialized_end = 11479
+    _GROUPMAP._serialized_start = 11482
+    _GROUPMAP._serialized_end = 12117
+    _COGROUPMAP._serialized_start = 12120
+    _COGROUPMAP._serialized_end = 12646
+    _APPLYINPANDASWITHSTATE._serialized_start = 12649
+    _APPLYINPANDASWITHSTATE._serialized_end = 13006
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13009
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13253
+    _PYTHONUDTF._serialized_start = 13256
+    _PYTHONUDTF._serialized_end = 13433
+    _COLLECTMETRICS._serialized_start = 13436
+    _COLLECTMETRICS._serialized_end = 13572
+    _PARSE._serialized_start = 13575
+    _PARSE._serialized_end = 13963
+    _PARSE_OPTIONSENTRY._serialized_start = 3960
+    _PARSE_OPTIONSENTRY._serialized_end = 4018
+    _PARSE_PARSEFORMAT._serialized_start = 13864
+    _PARSE_PARSEFORMAT._serialized_end = 13952
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 007b92ef5f42d..1c0036afbc436 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -579,26 +579,19 @@ class RelationCommon(google.protobuf.message.Message):
 
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
-    SOURCE_INFO_FIELD_NUMBER: builtins.int
     PLAN_ID_FIELD_NUMBER: builtins.int
-    source_info: builtins.str
-    """(Required) Shared relation metadata."""
     plan_id: builtins.int
     """(Optional) A per-client globally unique id for a given connect plan."""
     def __init__(
         self,
         *,
-        source_info: builtins.str = ...,
         plan_id: builtins.int | None = ...,
     ) -> None: ...
     def HasField(
         self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
     ) -> builtins.bool: ...
     def ClearField(
-        self,
-        field_name: typing_extensions.Literal[
-            "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info"
-        ],
+        self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
     ) -> None: ...
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]

From 405b23b1aef889fad03bf4e3ced6d427dc9a43f4 Mon Sep 17 00:00:00 2001
From: Steve Weis <steve.weis@databricks.com>
Date: Thu, 27 Jun 2024 10:58:15 +0800
Subject: [PATCH 366/521] [SPARK-47172][CORE][3.5] Add support for AES-GCM for
 RPC encryption

### What changes were proposed in this pull request?

This change adds AES-GCM as an optional AES cipher mode for RPC encryption. The current default is using AES-CTR without any authentication. That would allow someone on the network to easily modify RPC contents on the wire and impact Spark behavior. See [SPARK-47172](https://issues.apache.org/jira/browse/SPARK-47172) for more details.

### Why are the changes needed?

The current default is using AES-CTR without any authentication. That would allow someone on the network to easily modify RPC contents on the wire and impact Spark behavior.

### Does this PR introduce _any_ user-facing change?

Yes, it adds an additional configuration flag is reflected in the documentation.

### How was this patch tested?

Existing unit tests are all ensured to pass. New unit tests are written to explicitly test GCM support and to verify that modifying ciphertext content will cause an exception and fail.

`build/sbt "network-common/test:testOnly"`
`build/sbt "network-common/test:testOnly org.apache.spark.network.crypto.AuthIntegrationSuite"` `build/sbt "network-common/test:testOnly org.apache.spark.network.crypto.AuthEngineSuite"`

### Was this patch authored or co-authored using generative AI tooling?

Nope.

Closes #47060 from sweisdb/SPARK-47172-3.5.

Authored-by: Steve Weis <steve.weis@databricks.com>
Signed-off-by: Yi Wu <yi.wu@databricks.com>
---
 .../spark/network/crypto/AuthEngine.java      |  21 +-
 .../network/crypto/CtrTransportCipher.java    | 381 ++++++++++++++++
 .../network/crypto/GcmTransportCipher.java    | 422 ++++++++++++++++++
 .../spark/network/crypto/TransportCipher.java | 374 +---------------
 .../util/ByteBufferWriteableChannel.java      |  61 +++
 .../spark/network/crypto/AuthEngineSuite.java | 203 ++-------
 .../network/crypto/AuthIntegrationSuite.java  |  79 ++--
 .../network/crypto/CtrAuthEngineSuite.java    | 178 ++++++++
 .../network/crypto/GcmAuthEngineSuite.java    | 343 ++++++++++++++
 .../network/crypto/TransportCipherSuite.java  |   4 +-
 docs/security.md                              |   9 +
 11 files changed, 1534 insertions(+), 541 deletions(-)
 create mode 100644 common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
 create mode 100644 common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
 create mode 100644 common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java
 create mode 100644 common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java
 create mode 100644 common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java

diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
index 14f0c23fd05fc..ee558bce7dab9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java
@@ -45,6 +45,8 @@ class AuthEngine implements Closeable {
   public static final byte[] INPUT_IV_INFO = "inputIv".getBytes(UTF_8);
   public static final byte[] OUTPUT_IV_INFO = "outputIv".getBytes(UTF_8);
   private static final String MAC_ALGORITHM = "HMACSHA256";
+  private static final String LEGACY_CIPHER_ALGORITHM = "AES/CTR/NoPadding";
+  private static final String CIPHER_ALGORITHM = "AES/GCM/NoPadding";
   private static final int AES_GCM_KEY_SIZE_BYTES = 16;
   private static final byte[] EMPTY_TRANSCRIPT = new byte[0];
   private static final int UNSAFE_SKIP_HKDF_VERSION = 1;
@@ -227,12 +229,19 @@ private TransportCipher generateTransportCipher(
         OUTPUT_IV_INFO,  // This is the HKDF info field used to differentiate IV values
         AES_GCM_KEY_SIZE_BYTES);
     SecretKeySpec sessionKey = new SecretKeySpec(derivedKey, "AES");
-    return new TransportCipher(
-        cryptoConf,
-        conf.cipherTransformation(),
-        sessionKey,
-        isClient ? clientIv : serverIv,  // If it's the client, use the client IV first
-        isClient ? serverIv : clientIv);
+    if (LEGACY_CIPHER_ALGORITHM.equalsIgnoreCase(conf.cipherTransformation())) {
+      return new CtrTransportCipher(
+          cryptoConf,
+          sessionKey,
+          isClient ? clientIv : serverIv,  // If it's the client, use the client IV first
+          isClient ? serverIv : clientIv);
+    } else if (CIPHER_ALGORITHM.equalsIgnoreCase(conf.cipherTransformation())) {
+      return new GcmTransportCipher(sessionKey);
+    } else {
+      throw new IllegalArgumentException(
+              String.format("Unsupported cipher mode: %s. %s and %s are supported.",
+                      conf.cipherTransformation(), CIPHER_ALGORITHM, LEGACY_CIPHER_ALGORITHM));
+    }
   }
 
   private byte[] getTranscript(AuthMessage... encryptedPublicKeys) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
new file mode 100644
index 0000000000000..85b893751b39c
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java
@@ -0,0 +1,381 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.security.GeneralSecurityException;
+import java.util.Properties;
+import javax.crypto.spec.SecretKeySpec;
+import javax.crypto.spec.IvParameterSpec;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.*;
+import org.apache.commons.crypto.stream.CryptoInputStream;
+import org.apache.commons.crypto.stream.CryptoOutputStream;
+
+import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.ByteArrayReadableChannel;
+import org.apache.spark.network.util.ByteArrayWritableChannel;
+
+/**
+ * Cipher for encryption and decryption.
+ */
+public class CtrTransportCipher implements TransportCipher {
+  @VisibleForTesting
+  static final String ENCRYPTION_HANDLER_NAME = "CtrTransportEncryption";
+  private static final String DECRYPTION_HANDLER_NAME = "CtrTransportDecryption";
+  @VisibleForTesting
+  static final int STREAM_BUFFER_SIZE = 1024 * 32;
+
+  private final Properties conf;
+  private static final String CIPHER_ALGORITHM = "AES/CTR/NoPadding";
+  private final SecretKeySpec key;
+  private final byte[] inIv;
+  private final byte[] outIv;
+
+  public CtrTransportCipher(
+      Properties conf,
+      SecretKeySpec key,
+      byte[] inIv,
+      byte[] outIv) {
+    this.conf = conf;
+    this.key = key;
+    this.inIv = inIv;
+    this.outIv = outIv;
+  }
+
+  /*
+   * This method is for testing purposes only.
+   */
+  @VisibleForTesting
+  public String getKeyId() throws GeneralSecurityException {
+    return TransportCipherUtil.getKeyId(key);
+  }
+
+  @VisibleForTesting
+  SecretKeySpec getKey() {
+    return key;
+  }
+
+  /** The IV for the input channel (i.e. output channel of the remote side). */
+  public byte[] getInputIv() {
+    return inIv;
+  }
+
+  /** The IV for the output channel (i.e. input channel of the remote side). */
+  public byte[] getOutputIv() {
+    return outIv;
+  }
+
+  @VisibleForTesting
+  CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
+    return new CryptoOutputStream(CIPHER_ALGORITHM, conf, ch, key, new IvParameterSpec(outIv));
+  }
+
+  @VisibleForTesting
+  CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
+    return new CryptoInputStream(CIPHER_ALGORITHM, conf, ch, key, new IvParameterSpec(inIv));
+  }
+
+  /**
+   * Add handlers to channel.
+   *
+   * @param ch the channel for adding handlers
+   * @throws IOException
+   */
+  public void addToChannel(Channel ch) throws IOException {
+    ch.pipeline()
+      .addFirst(ENCRYPTION_HANDLER_NAME, new EncryptionHandler(this))
+      .addFirst(DECRYPTION_HANDLER_NAME, new DecryptionHandler(this));
+  }
+
+  @VisibleForTesting
+  static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
+    private final ByteArrayWritableChannel byteEncChannel;
+    private final CryptoOutputStream cos;
+    private final ByteArrayWritableChannel byteRawChannel;
+    private boolean isCipherValid;
+
+    EncryptionHandler(CtrTransportCipher cipher) throws IOException {
+      byteEncChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
+      cos = cipher.createOutputStream(byteEncChannel);
+      byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
+      isCipherValid = true;
+    }
+
+    @Override
+    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
+      throws Exception {
+      ctx.write(createEncryptedMessage(msg), promise);
+    }
+
+    @VisibleForTesting
+    EncryptedMessage createEncryptedMessage(Object msg) {
+      return new EncryptedMessage(this, cos, msg, byteEncChannel, byteRawChannel);
+    }
+
+    @Override
+    public void close(ChannelHandlerContext ctx, ChannelPromise promise) throws Exception {
+      try {
+        if (isCipherValid) {
+          cos.close();
+        }
+      } finally {
+        super.close(ctx, promise);
+      }
+    }
+
+    /**
+     * SPARK-25535. Workaround for CRYPTO-141. Avoid further interaction with the underlying cipher
+     * after an error occurs.
+     */
+    void reportError() {
+      this.isCipherValid = false;
+    }
+
+    boolean isCipherValid() {
+      return isCipherValid;
+    }
+  }
+
+  private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
+    private final CryptoInputStream cis;
+    private final ByteArrayReadableChannel byteChannel;
+    private boolean isCipherValid;
+
+    DecryptionHandler(CtrTransportCipher cipher) throws IOException {
+      byteChannel = new ByteArrayReadableChannel();
+      cis = cipher.createInputStream(byteChannel);
+      isCipherValid = true;
+    }
+
+    @Override
+    public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
+      ByteBuf buffer = (ByteBuf) data;
+
+      try {
+        if (!isCipherValid) {
+          throw new IOException("Cipher is in invalid state.");
+        }
+        byte[] decryptedData = new byte[buffer.readableBytes()];
+        byteChannel.feedData(buffer);
+
+        int offset = 0;
+        while (offset < decryptedData.length) {
+          // SPARK-25535: workaround for CRYPTO-141.
+          try {
+            offset += cis.read(decryptedData, offset, decryptedData.length - offset);
+          } catch (InternalError ie) {
+            isCipherValid = false;
+            throw ie;
+          }
+        }
+
+        ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
+      } finally {
+        buffer.release();
+      }
+    }
+
+    @Override
+    public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
+      // We do the closing of the stream / channel in handlerRemoved(...) as
+      // this method will be called in all cases:
+      //
+      //     - when the Channel becomes inactive
+      //     - when the handler is removed from the ChannelPipeline
+      try {
+        if (isCipherValid) {
+          cis.close();
+        }
+      } finally {
+        super.handlerRemoved(ctx);
+      }
+    }
+  }
+
+  @VisibleForTesting
+  static class EncryptedMessage extends AbstractFileRegion {
+    private final boolean isByteBuf;
+    private final ByteBuf buf;
+    private final FileRegion region;
+    private final CryptoOutputStream cos;
+    private final EncryptionHandler handler;
+    private final long count;
+    private long transferred;
+
+    // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
+    // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
+    // from upper handler, another is used to store encrypted data.
+    private final ByteArrayWritableChannel byteEncChannel;
+    private final ByteArrayWritableChannel byteRawChannel;
+
+    private ByteBuffer currentEncrypted;
+
+    EncryptedMessage(
+        EncryptionHandler handler,
+        CryptoOutputStream cos,
+        Object msg,
+        ByteArrayWritableChannel byteEncChannel,
+        ByteArrayWritableChannel byteRawChannel) {
+      Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
+        "Unrecognized message type: %s", msg.getClass().getName());
+      this.handler = handler;
+      this.isByteBuf = msg instanceof ByteBuf;
+      this.buf = isByteBuf ? (ByteBuf) msg : null;
+      this.region = isByteBuf ? null : (FileRegion) msg;
+      this.transferred = 0;
+      this.cos = cos;
+      this.byteEncChannel = byteEncChannel;
+      this.byteRawChannel = byteRawChannel;
+      this.count = isByteBuf ? buf.readableBytes() : region.count();
+    }
+
+    @Override
+    public long count() {
+      return count;
+    }
+
+    @Override
+    public long position() {
+      return 0;
+    }
+
+    @Override
+    public long transferred() {
+      return transferred;
+    }
+
+    @Override
+    public EncryptedMessage touch(Object o) {
+      super.touch(o);
+      if (region != null) {
+        region.touch(o);
+      }
+      if (buf != null) {
+        buf.touch(o);
+      }
+      return this;
+    }
+
+    @Override
+    public EncryptedMessage retain(int increment) {
+      super.retain(increment);
+      if (region != null) {
+        region.retain(increment);
+      }
+      if (buf != null) {
+        buf.retain(increment);
+      }
+      return this;
+    }
+
+    @Override
+    public boolean release(int decrement) {
+      if (region != null) {
+        region.release(decrement);
+      }
+      if (buf != null) {
+        buf.release(decrement);
+      }
+      return super.release(decrement);
+    }
+
+    @Override
+    public long transferTo(WritableByteChannel target, long position) throws IOException {
+      Preconditions.checkArgument(position == transferred(), "Invalid position.");
+
+      if (transferred == count) {
+        return 0;
+      }
+
+      long totalBytesWritten = 0L;
+      do {
+        if (currentEncrypted == null) {
+          encryptMore();
+        }
+
+        long remaining = currentEncrypted.remaining();
+        if (remaining == 0)  {
+          // Just for safety to avoid endless loop. It usually won't happen, but since the
+          // underlying `region.transferTo` is allowed to transfer 0 bytes, we should handle it for
+          // safety.
+          currentEncrypted = null;
+          byteEncChannel.reset();
+          return totalBytesWritten;
+        }
+
+        long bytesWritten = target.write(currentEncrypted);
+        totalBytesWritten += bytesWritten;
+        transferred += bytesWritten;
+        if (bytesWritten < remaining) {
+          // break as the underlying buffer in "target" is full
+          break;
+        }
+        currentEncrypted = null;
+        byteEncChannel.reset();
+      } while (transferred < count);
+
+      return totalBytesWritten;
+    }
+
+    private void encryptMore() throws IOException {
+      if (!handler.isCipherValid()) {
+        throw new IOException("Cipher is in invalid state.");
+      }
+      byteRawChannel.reset();
+
+      if (isByteBuf) {
+        int copied = byteRawChannel.write(buf.nioBuffer());
+        buf.skipBytes(copied);
+      } else {
+        region.transferTo(byteRawChannel, region.transferred());
+      }
+
+      try {
+        cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
+        cos.flush();
+      } catch (InternalError ie) {
+        handler.reportError();
+        throw ie;
+      }
+
+      currentEncrypted = ByteBuffer.wrap(byteEncChannel.getData(),
+        0, byteEncChannel.length());
+    }
+
+    @Override
+    protected void deallocate() {
+      byteRawChannel.reset();
+      byteEncChannel.reset();
+      if (region != null) {
+        region.release();
+      }
+      if (buf != null) {
+        buf.release();
+      }
+    }
+  }
+
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
new file mode 100644
index 0000000000000..d3f1bf490d3a3
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java
@@ -0,0 +1,422 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.primitives.Longs;
+import com.google.crypto.tink.subtle.*;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.*;
+import io.netty.util.ReferenceCounted;
+import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.ByteBufferWriteableChannel;
+
+import javax.crypto.spec.SecretKeySpec;
+import java.io.IOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.security.GeneralSecurityException;
+import java.security.InvalidAlgorithmParameterException;
+
+public class GcmTransportCipher implements TransportCipher {
+    private static final String HKDF_ALG = "HmacSha256";
+    private static final int LENGTH_HEADER_BYTES = 8;
+    @VisibleForTesting
+    static final int CIPHERTEXT_BUFFER_SIZE = 32 * 1024; // 32KB
+    private final SecretKeySpec aesKey;
+
+    public GcmTransportCipher(SecretKeySpec aesKey)  {
+        this.aesKey = aesKey;
+    }
+
+    AesGcmHkdfStreaming getAesGcmHkdfStreaming() throws InvalidAlgorithmParameterException {
+        return new AesGcmHkdfStreaming(
+            aesKey.getEncoded(),
+            HKDF_ALG,
+            aesKey.getEncoded().length,
+            CIPHERTEXT_BUFFER_SIZE,
+            0);
+    }
+
+    /*
+     * This method is for testing purposes only.
+     */
+    @VisibleForTesting
+    public String getKeyId() throws GeneralSecurityException {
+        return TransportCipherUtil.getKeyId(aesKey);
+    }
+
+    @VisibleForTesting
+    EncryptionHandler getEncryptionHandler() throws GeneralSecurityException {
+        return new EncryptionHandler();
+    }
+
+    @VisibleForTesting
+    DecryptionHandler getDecryptionHandler() throws GeneralSecurityException {
+        return new DecryptionHandler();
+    }
+
+    public void addToChannel(Channel ch) throws GeneralSecurityException {
+        ch.pipeline()
+            .addFirst("GcmTransportEncryption", getEncryptionHandler())
+            .addFirst("GcmTransportDecryption", getDecryptionHandler());
+    }
+
+    @VisibleForTesting
+    class EncryptionHandler extends ChannelOutboundHandlerAdapter {
+        private final ByteBuffer plaintextBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final AesGcmHkdfStreaming aesGcmHkdfStreaming;
+
+        EncryptionHandler() throws InvalidAlgorithmParameterException {
+            aesGcmHkdfStreaming = getAesGcmHkdfStreaming();
+            plaintextBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getPlaintextSegmentSize());
+            ciphertextBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getCiphertextSegmentSize());
+        }
+
+        @Override
+        public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
+                throws Exception {
+            GcmEncryptedMessage encryptedMessage = new GcmEncryptedMessage(
+                    aesGcmHkdfStreaming,
+                    msg,
+                    plaintextBuffer,
+                    ciphertextBuffer);
+            ctx.write(encryptedMessage, promise);
+        }
+    }
+
+    static class GcmEncryptedMessage extends AbstractFileRegion {
+        private final Object plaintextMessage;
+        private final ByteBuffer plaintextBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final ByteBuffer headerByteBuffer;
+        private final long bytesToRead;
+        private long bytesRead = 0;
+        private final StreamSegmentEncrypter encrypter;
+        private long transferred = 0;
+        private final long encryptedCount;
+
+        GcmEncryptedMessage(AesGcmHkdfStreaming aesGcmHkdfStreaming,
+                            Object plaintextMessage,
+                            ByteBuffer plaintextBuffer,
+                            ByteBuffer ciphertextBuffer) throws GeneralSecurityException {
+            Preconditions.checkArgument(
+                    plaintextMessage instanceof ByteBuf || plaintextMessage instanceof FileRegion,
+                    "Unrecognized message type: %s", plaintextMessage.getClass().getName());
+            this.plaintextMessage = plaintextMessage;
+            this.plaintextBuffer = plaintextBuffer;
+            this.ciphertextBuffer = ciphertextBuffer;
+            // If the ciphertext buffer cannot be fully written the target, transferTo may
+            // return with it containing some unwritten data. The initial call we'll explicitly
+            // set its limit to 0 to indicate the first call to transferTo.
+            ((Buffer) this.ciphertextBuffer).limit(0);
+            this.bytesToRead = getReadableBytes();
+            this.encryptedCount =
+                    LENGTH_HEADER_BYTES + aesGcmHkdfStreaming.expectedCiphertextSize(bytesToRead);
+            byte[] lengthAad = Longs.toByteArray(encryptedCount);
+            this.encrypter = aesGcmHkdfStreaming.newStreamSegmentEncrypter(lengthAad);
+            this.headerByteBuffer = createHeaderByteBuffer();
+        }
+
+        // The format of the output is:
+        // [8 byte length][Internal IV and header][Ciphertext][Auth Tag]
+        private ByteBuffer createHeaderByteBuffer() {
+            ByteBuffer encrypterHeader = encrypter.getHeader();
+            ByteBuffer output = ByteBuffer
+                    .allocate(encrypterHeader.remaining() + LENGTH_HEADER_BYTES)
+                    .putLong(encryptedCount)
+                    .put(encrypterHeader);
+            ((Buffer) output).flip();
+            return output;
+        }
+
+        @Override
+        public long position() {
+            return 0;
+        }
+
+        @Override
+        public long transferred() {
+            return transferred;
+        }
+
+        @Override
+        public long count() {
+            return encryptedCount;
+        }
+
+        @Override
+        public GcmEncryptedMessage touch(Object o) {
+            super.touch(o);
+            if (plaintextMessage instanceof ByteBuf) {
+                ByteBuf byteBuf = (ByteBuf) plaintextMessage;
+                byteBuf.touch(o);
+            } else if (plaintextMessage instanceof FileRegion) {
+                FileRegion fileRegion = (FileRegion) plaintextMessage;
+                fileRegion.touch(o);
+            }
+            return this;
+        }
+
+        @Override
+        public GcmEncryptedMessage retain(int increment) {
+            super.retain(increment);
+            if (plaintextMessage instanceof ByteBuf) {
+                ByteBuf byteBuf = (ByteBuf) plaintextMessage;
+                byteBuf.retain(increment);
+            } else if (plaintextMessage instanceof FileRegion) {
+                FileRegion fileRegion = (FileRegion) plaintextMessage;
+                fileRegion.retain(increment);
+            }
+            return this;
+        }
+
+        @Override
+        public boolean release(int decrement) {
+            if (plaintextMessage instanceof ByteBuf) {
+                ByteBuf byteBuf = (ByteBuf) plaintextMessage;
+                byteBuf.release(decrement);
+            } else if (plaintextMessage instanceof FileRegion) {
+                FileRegion fileRegion = (FileRegion) plaintextMessage;
+                fileRegion.release(decrement);
+            }
+            return super.release(decrement);
+        }
+
+        @Override
+        public long transferTo(WritableByteChannel target, long position) throws IOException {
+            int transferredThisCall = 0;
+            // If the header has is not empty, try to write it out to the target.
+            if (headerByteBuffer.hasRemaining()) {
+                int written = target.write(headerByteBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (headerByteBuffer.hasRemaining()) {
+                    return written;
+                }
+            }
+            // If the ciphertext buffer is not empty, try to write it to the target.
+            if (ciphertextBuffer.hasRemaining()) {
+                int written = target.write(ciphertextBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (ciphertextBuffer.hasRemaining()) {
+                    return transferredThisCall;
+               }
+            }
+            while (bytesRead < bytesToRead) {
+                long readableBytes = getReadableBytes();
+                int readLimit =
+                        (int) Math.min(readableBytes, plaintextBuffer.remaining());
+                if (plaintextMessage instanceof ByteBuf) {
+                    ByteBuf byteBuf = (ByteBuf) plaintextMessage;
+                    Preconditions.checkState(0 == plaintextBuffer.position());
+                    ((Buffer) plaintextBuffer).limit(readLimit);
+                    byteBuf.readBytes(plaintextBuffer);
+                    Preconditions.checkState(readLimit == plaintextBuffer.position());
+                } else if (plaintextMessage instanceof FileRegion) {
+                    FileRegion fileRegion = (FileRegion) plaintextMessage;
+                    ByteBufferWriteableChannel plaintextChannel =
+                            new ByteBufferWriteableChannel(plaintextBuffer);
+                    long plaintextRead =
+                            fileRegion.transferTo(plaintextChannel, fileRegion.transferred());
+                    if (plaintextRead < readLimit) {
+                        // If we do not read a full plaintext buffer or all the available
+                        // readable bytes, return what was transferred this call.
+                        return transferredThisCall;
+                    }
+                }
+                boolean lastSegment = getReadableBytes() == 0;
+                ((Buffer) plaintextBuffer).flip();
+                bytesRead += plaintextBuffer.remaining();
+                ((Buffer) ciphertextBuffer).clear();
+                try {
+                    encrypter.encryptSegment(plaintextBuffer, lastSegment, ciphertextBuffer);
+                } catch (GeneralSecurityException e) {
+                    throw new IllegalStateException("GeneralSecurityException from encrypter", e);
+                }
+                ((Buffer) plaintextBuffer).clear();
+                ((Buffer) ciphertextBuffer).flip();
+                int written = target.write(ciphertextBuffer);
+                transferredThisCall += written;
+                this.transferred += written;
+                if (ciphertextBuffer.hasRemaining()) {
+                    // In this case, upon calling transferTo again, it will try to write the
+                    // remaining ciphertext buffer in the conditional before this loop.
+                    return transferredThisCall;
+                }
+            }
+            return transferredThisCall;
+        }
+
+        private long getReadableBytes() {
+            if (plaintextMessage instanceof ByteBuf) {
+                ByteBuf byteBuf = (ByteBuf) plaintextMessage;
+                return byteBuf.readableBytes();
+            } else if (plaintextMessage instanceof FileRegion) {
+                FileRegion fileRegion = (FileRegion) plaintextMessage;
+                return fileRegion.count() - fileRegion.transferred();
+            } else {
+                throw new IllegalArgumentException("Unsupported message type: " +
+                        plaintextMessage.getClass().getName());
+            }
+        }
+
+        @Override
+        protected void deallocate() {
+            if (plaintextMessage instanceof ReferenceCounted) {
+                ((ReferenceCounted) plaintextMessage).release();
+            }
+            plaintextBuffer.clear();
+            ciphertextBuffer.clear();
+        }
+    }
+
+    @VisibleForTesting
+    class DecryptionHandler extends ChannelInboundHandlerAdapter {
+        private final ByteBuffer expectedLengthBuffer;
+        private final ByteBuffer headerBuffer;
+        private final ByteBuffer ciphertextBuffer;
+        private final AesGcmHkdfStreaming aesGcmHkdfStreaming;
+        private final StreamSegmentDecrypter decrypter;
+        private final int plaintextSegmentSize;
+        private boolean decrypterInit = false;
+        private boolean completed = false;
+        private int segmentNumber = 0;
+        private long expectedLength = -1;
+        private long ciphertextRead = 0;
+
+        DecryptionHandler() throws GeneralSecurityException {
+            aesGcmHkdfStreaming = getAesGcmHkdfStreaming();
+            expectedLengthBuffer = ByteBuffer.allocate(LENGTH_HEADER_BYTES);
+            headerBuffer = ByteBuffer.allocate(aesGcmHkdfStreaming.getHeaderLength());
+            ciphertextBuffer =
+                    ByteBuffer.allocate(aesGcmHkdfStreaming.getCiphertextSegmentSize());
+            decrypter = aesGcmHkdfStreaming.newStreamSegmentDecrypter();
+            plaintextSegmentSize = aesGcmHkdfStreaming.getPlaintextSegmentSize();
+        }
+
+        private boolean initalizeExpectedLength(ByteBuf ciphertextNettyBuf) {
+            if (expectedLength < 0) {
+                ciphertextNettyBuf.readBytes(expectedLengthBuffer);
+                if (expectedLengthBuffer.hasRemaining()) {
+                    // We did not read enough bytes to initialize the expected length.
+                    return false;
+                }
+                ((Buffer) expectedLengthBuffer).flip();
+                expectedLength = expectedLengthBuffer.getLong();
+                if (expectedLength < 0) {
+                    throw new IllegalStateException("Invalid expected ciphertext length.");
+                }
+                ciphertextRead += LENGTH_HEADER_BYTES;
+            }
+            return true;
+        }
+
+        private boolean initalizeDecrypter(ByteBuf ciphertextNettyBuf)
+                throws GeneralSecurityException {
+            // Check if the ciphertext header has been read. This contains
+            // the IV and other internal metadata.
+            if (!decrypterInit) {
+                ciphertextNettyBuf.readBytes(headerBuffer);
+                if (headerBuffer.hasRemaining()) {
+                    // We did not read enough bytes to initialize the header.
+                    return false;
+                }
+                ((Buffer) headerBuffer).flip();
+                byte[] lengthAad = Longs.toByteArray(expectedLength);
+                decrypter.init(headerBuffer, lengthAad);
+                decrypterInit = true;
+                ciphertextRead += aesGcmHkdfStreaming.getHeaderLength();
+                if (expectedLength == ciphertextRead) {
+                    // If the expected length is just the header, the ciphertext is 0 length.
+                    completed = true;
+                }
+            }
+            return true;
+        }
+
+        @Override
+        public void channelRead(ChannelHandlerContext ctx, Object ciphertextMessage)
+                throws GeneralSecurityException {
+            Preconditions.checkArgument(ciphertextMessage instanceof ByteBuf,
+                    "Unrecognized message type: %s",
+                    ciphertextMessage.getClass().getName());
+            ByteBuf ciphertextNettyBuf = (ByteBuf) ciphertextMessage;
+            // The format of the output is:
+            // [8 byte length][Internal IV and header][Ciphertext][Auth Tag]
+            try {
+                if (!initalizeExpectedLength(ciphertextNettyBuf)) {
+                    // We have not read enough bytes to initialize the expected length.
+                    return;
+                }
+                if (!initalizeDecrypter(ciphertextNettyBuf)) {
+                    // We have not read enough bytes to initialize a header, needed to
+                    // initialize a decrypter.
+                    return;
+                }
+                int nettyBufReadableBytes = ciphertextNettyBuf.readableBytes();
+                while (nettyBufReadableBytes > 0 && !completed) {
+                    // Read the ciphertext into the local buffer
+                    int readableBytes = Integer.min(
+                            nettyBufReadableBytes,
+                            ciphertextBuffer.remaining());
+                    int expectedRemaining = (int) (expectedLength - ciphertextRead);
+                    int bytesToRead = Integer.min(readableBytes, expectedRemaining);
+                    // The smallest ciphertext size is 16 bytes for the auth tag
+                    ((Buffer) ciphertextBuffer).limit(
+                            ((Buffer) ciphertextBuffer).position() + bytesToRead);
+                    ciphertextNettyBuf.readBytes(ciphertextBuffer);
+                    ciphertextRead += bytesToRead;
+                    // Check if this is the last segment
+                    if (ciphertextRead == expectedLength) {
+                        completed = true;
+                    } else if (ciphertextRead > expectedLength) {
+                        throw new IllegalStateException("Read more ciphertext than expected.");
+                    }
+                    // If the ciphertext buffer is full, or this is the last segment,
+                    // then decrypt it and fire a read.
+                    if (ciphertextBuffer.limit() == ciphertextBuffer.capacity() || completed) {
+                        ByteBuffer plaintextBuffer = ByteBuffer.allocate(plaintextSegmentSize);
+                        ((Buffer) ciphertextBuffer).flip();
+                        decrypter.decryptSegment(
+                                ciphertextBuffer,
+                                segmentNumber,
+                                completed,
+                                plaintextBuffer);
+                        segmentNumber++;
+                        // Clear the ciphertext buffer because it's been read
+                        ((Buffer) ciphertextBuffer).clear();
+                        ((Buffer) plaintextBuffer).flip();
+                        ctx.fireChannelRead(Unpooled.wrappedBuffer(plaintextBuffer));
+                    } else {
+                        // Set the ciphertext buffer up to read the next chunk
+                        ((Buffer) ciphertextBuffer).limit(ciphertextBuffer.capacity());
+                    }
+                    nettyBufReadableBytes = ciphertextNettyBuf.readableBytes();
+                }
+            } finally {
+                ciphertextNettyBuf.release();
+            }
+        }
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index b507f911fe11a..355c552720185 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -17,362 +17,32 @@
 
 package org.apache.spark.network.crypto;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.channels.ReadableByteChannel;
-import java.nio.channels.WritableByteChannel;
-import java.util.Properties;
-import javax.crypto.spec.SecretKeySpec;
-import javax.crypto.spec.IvParameterSpec;
-
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Preconditions;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.Unpooled;
-import io.netty.channel.*;
-import org.apache.commons.crypto.stream.CryptoInputStream;
-import org.apache.commons.crypto.stream.CryptoOutputStream;
-
-import org.apache.spark.network.util.AbstractFileRegion;
-import org.apache.spark.network.util.ByteArrayReadableChannel;
-import org.apache.spark.network.util.ByteArrayWritableChannel;
-
-/**
- * Cipher for encryption and decryption.
- */
-public class TransportCipher {
-  @VisibleForTesting
-  static final String ENCRYPTION_HANDLER_NAME = "TransportEncryption";
-  private static final String DECRYPTION_HANDLER_NAME = "TransportDecryption";
-  @VisibleForTesting
-  static final int STREAM_BUFFER_SIZE = 1024 * 32;
-
-  private final Properties conf;
-  private final String cipher;
-  private final SecretKeySpec key;
-  private final byte[] inIv;
-  private final byte[] outIv;
-
-  public TransportCipher(
-      Properties conf,
-      String cipher,
-      SecretKeySpec key,
-      byte[] inIv,
-      byte[] outIv) {
-    this.conf = conf;
-    this.cipher = cipher;
-    this.key = key;
-    this.inIv = inIv;
-    this.outIv = outIv;
-  }
-
-  public String getCipherTransformation() {
-    return cipher;
-  }
-
-  @VisibleForTesting
-  SecretKeySpec getKey() {
-    return key;
-  }
-
-  /** The IV for the input channel (i.e. output channel of the remote side). */
-  public byte[] getInputIv() {
-    return inIv;
-  }
-
-  /** The IV for the output channel (i.e. input channel of the remote side). */
-  public byte[] getOutputIv() {
-    return outIv;
-  }
-
-  @VisibleForTesting
-  CryptoOutputStream createOutputStream(WritableByteChannel ch) throws IOException {
-    return new CryptoOutputStream(cipher, conf, ch, key, new IvParameterSpec(outIv));
-  }
-
-  @VisibleForTesting
-  CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
-    return new CryptoInputStream(cipher, conf, ch, key, new IvParameterSpec(inIv));
-  }
-
-  /**
-   * Add handlers to channel.
-   *
-   * @param ch the channel for adding handlers
-   * @throws IOException
-   */
-  public void addToChannel(Channel ch) throws IOException {
-    ch.pipeline()
-      .addFirst(ENCRYPTION_HANDLER_NAME, new EncryptionHandler(this))
-      .addFirst(DECRYPTION_HANDLER_NAME, new DecryptionHandler(this));
-  }
-
-  @VisibleForTesting
-  static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
-    private final ByteArrayWritableChannel byteEncChannel;
-    private final CryptoOutputStream cos;
-    private final ByteArrayWritableChannel byteRawChannel;
-    private boolean isCipherValid;
-
-    EncryptionHandler(TransportCipher cipher) throws IOException {
-      byteEncChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
-      cos = cipher.createOutputStream(byteEncChannel);
-      byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
-      isCipherValid = true;
-    }
+import com.google.crypto.tink.subtle.Hex;
+import com.google.crypto.tink.subtle.Hkdf;
+import io.netty.channel.Channel;
 
-    @Override
-    public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
-      throws Exception {
-      ctx.write(createEncryptedMessage(msg), promise);
-    }
-
-    @VisibleForTesting
-    EncryptedMessage createEncryptedMessage(Object msg) {
-      return new EncryptedMessage(this, cos, msg, byteEncChannel, byteRawChannel);
-    }
+import javax.crypto.spec.SecretKeySpec;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.security.GeneralSecurityException;
 
-    @Override
-    public void close(ChannelHandlerContext ctx, ChannelPromise promise) throws Exception {
-      try {
-        if (isCipherValid) {
-          cos.close();
-        }
-      } finally {
-        super.close(ctx, promise);
-      }
-    }
+interface TransportCipher {
+    String getKeyId() throws GeneralSecurityException;
+    void addToChannel(Channel channel) throws IOException, GeneralSecurityException;
+}
 
-    /**
-     * SPARK-25535. Workaround for CRYPTO-141. Avoid further interaction with the underlying cipher
-     * after an error occurs.
+class TransportCipherUtil {
+    /*
+     * This method is used for testing to verify key derivation.
      */
-    void reportError() {
-      this.isCipherValid = false;
-    }
-
-    boolean isCipherValid() {
-      return isCipherValid;
-    }
-  }
-
-  private static class DecryptionHandler extends ChannelInboundHandlerAdapter {
-    private final CryptoInputStream cis;
-    private final ByteArrayReadableChannel byteChannel;
-    private boolean isCipherValid;
-
-    DecryptionHandler(TransportCipher cipher) throws IOException {
-      byteChannel = new ByteArrayReadableChannel();
-      cis = cipher.createInputStream(byteChannel);
-      isCipherValid = true;
-    }
-
-    @Override
-    public void channelRead(ChannelHandlerContext ctx, Object data) throws Exception {
-      ByteBuf buffer = (ByteBuf) data;
-
-      try {
-        if (!isCipherValid) {
-          throw new IOException("Cipher is in invalid state.");
-        }
-        byte[] decryptedData = new byte[buffer.readableBytes()];
-        byteChannel.feedData(buffer);
-
-        int offset = 0;
-        while (offset < decryptedData.length) {
-          // SPARK-25535: workaround for CRYPTO-141.
-          try {
-            offset += cis.read(decryptedData, offset, decryptedData.length - offset);
-          } catch (InternalError ie) {
-            isCipherValid = false;
-            throw ie;
-          }
-        }
-
-        ctx.fireChannelRead(Unpooled.wrappedBuffer(decryptedData, 0, decryptedData.length));
-      } finally {
-        buffer.release();
-      }
-    }
-
-    @Override
-    public void handlerRemoved(ChannelHandlerContext ctx) throws Exception {
-      // We do the closing of the stream / channel in handlerRemoved(...) as
-      // this method will be called in all cases:
-      //
-      //     - when the Channel becomes inactive
-      //     - when the handler is removed from the ChannelPipeline
-      try {
-        if (isCipherValid) {
-          cis.close();
-        }
-      } finally {
-        super.handlerRemoved(ctx);
-      }
-    }
-  }
-
-  @VisibleForTesting
-  static class EncryptedMessage extends AbstractFileRegion {
-    private final boolean isByteBuf;
-    private final ByteBuf buf;
-    private final FileRegion region;
-    private final CryptoOutputStream cos;
-    private final EncryptionHandler handler;
-    private final long count;
-    private long transferred;
-
-    // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
-    // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
-    // from upper handler, another is used to store encrypted data.
-    private final ByteArrayWritableChannel byteEncChannel;
-    private final ByteArrayWritableChannel byteRawChannel;
-
-    private ByteBuffer currentEncrypted;
-
-    EncryptedMessage(
-        EncryptionHandler handler,
-        CryptoOutputStream cos,
-        Object msg,
-        ByteArrayWritableChannel byteEncChannel,
-        ByteArrayWritableChannel byteRawChannel) {
-      Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
-        "Unrecognized message type: %s", msg.getClass().getName());
-      this.handler = handler;
-      this.isByteBuf = msg instanceof ByteBuf;
-      this.buf = isByteBuf ? (ByteBuf) msg : null;
-      this.region = isByteBuf ? null : (FileRegion) msg;
-      this.transferred = 0;
-      this.cos = cos;
-      this.byteEncChannel = byteEncChannel;
-      this.byteRawChannel = byteRawChannel;
-      this.count = isByteBuf ? buf.readableBytes() : region.count();
-    }
-
-    @Override
-    public long count() {
-      return count;
-    }
-
-    @Override
-    public long position() {
-      return 0;
-    }
-
-    @Override
-    public long transferred() {
-      return transferred;
-    }
-
-    @Override
-    public EncryptedMessage touch(Object o) {
-      super.touch(o);
-      if (region != null) {
-        region.touch(o);
-      }
-      if (buf != null) {
-        buf.touch(o);
-      }
-      return this;
-    }
-
-    @Override
-    public EncryptedMessage retain(int increment) {
-      super.retain(increment);
-      if (region != null) {
-        region.retain(increment);
-      }
-      if (buf != null) {
-        buf.retain(increment);
-      }
-      return this;
-    }
-
-    @Override
-    public boolean release(int decrement) {
-      if (region != null) {
-        region.release(decrement);
-      }
-      if (buf != null) {
-        buf.release(decrement);
-      }
-      return super.release(decrement);
-    }
-
-    @Override
-    public long transferTo(WritableByteChannel target, long position) throws IOException {
-      Preconditions.checkArgument(position == transferred(), "Invalid position.");
-
-      if (transferred == count) {
-        return 0;
-      }
-
-      long totalBytesWritten = 0L;
-      do {
-        if (currentEncrypted == null) {
-          encryptMore();
-        }
-
-        long remaining = currentEncrypted.remaining();
-        if (remaining == 0)  {
-          // Just for safety to avoid endless loop. It usually won't happen, but since the
-          // underlying `region.transferTo` is allowed to transfer 0 bytes, we should handle it for
-          // safety.
-          currentEncrypted = null;
-          byteEncChannel.reset();
-          return totalBytesWritten;
-        }
-
-        long bytesWritten = target.write(currentEncrypted);
-        totalBytesWritten += bytesWritten;
-        transferred += bytesWritten;
-        if (bytesWritten < remaining) {
-          // break as the underlying buffer in "target" is full
-          break;
-        }
-        currentEncrypted = null;
-        byteEncChannel.reset();
-      } while (transferred < count);
-
-      return totalBytesWritten;
-    }
-
-    private void encryptMore() throws IOException {
-      if (!handler.isCipherValid()) {
-        throw new IOException("Cipher is in invalid state.");
-      }
-      byteRawChannel.reset();
-
-      if (isByteBuf) {
-        int copied = byteRawChannel.write(buf.nioBuffer());
-        buf.skipBytes(copied);
-      } else {
-        region.transferTo(byteRawChannel, region.transferred());
-      }
-
-      try {
-        cos.write(byteRawChannel.getData(), 0, byteRawChannel.length());
-        cos.flush();
-      } catch (InternalError ie) {
-        handler.reportError();
-        throw ie;
-      }
-
-      currentEncrypted = ByteBuffer.wrap(byteEncChannel.getData(),
-        0, byteEncChannel.length());
-    }
-
-    @Override
-    protected void deallocate() {
-      byteRawChannel.reset();
-      byteEncChannel.reset();
-      if (region != null) {
-        region.release();
-      }
-      if (buf != null) {
-        buf.release();
-      }
+    @VisibleForTesting
+    static String getKeyId(SecretKeySpec key) throws GeneralSecurityException {
+        byte[] keyIdBytes = Hkdf.computeHkdf("HmacSha256",
+                key.getEncoded(),
+                null,
+                "keyID".getBytes(StandardCharsets.UTF_8),
+                32);
+        return Hex.encode(keyIdBytes);
     }
-  }
-
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java b/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java
new file mode 100644
index 0000000000000..d49f46afa7ec4
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/ByteBufferWriteableChannel.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.io.IOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.channels.ClosedChannelException;
+import java.nio.channels.WritableByteChannel;
+
+public class ByteBufferWriteableChannel implements WritableByteChannel {
+    private final ByteBuffer destination;
+    private boolean open;
+
+    public ByteBufferWriteableChannel(ByteBuffer destination) {
+        this.destination = destination;
+        this.open = true;
+    }
+
+    @Override
+    public int write(ByteBuffer src) throws IOException {
+        if (!isOpen()) {
+            throw new ClosedChannelException();
+        }
+        int bytesToWrite = Math.min(src.remaining(), destination.remaining());
+        // Destination buffer is full
+        if (bytesToWrite == 0) {
+            return 0;
+        }
+        ByteBuffer temp = src.slice();
+        ((Buffer) temp).limit(bytesToWrite);
+        destination.put(temp);
+        ((Buffer) src).position(((Buffer) src).position() + bytesToWrite);
+        return bytesToWrite;
+    }
+
+    @Override
+    public boolean isOpen() {
+        return open;
+    }
+
+    @Override
+    public void close() {
+        open = false;
+    }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index 971e3ef2ff98c..ad737e5332dd4 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -18,75 +18,76 @@
 package org.apache.spark.network.crypto;
 
 import java.nio.ByteBuffer;
-import java.nio.channels.WritableByteChannel;
 import java.security.GeneralSecurityException;
-import java.util.Collections;
-import java.util.Random;
+import java.util.Map;
 
+import com.google.common.collect.ImmutableMap;
 import com.google.crypto.tink.subtle.Hex;
-import io.netty.buffer.ByteBuf;
-import io.netty.buffer.Unpooled;
-import io.netty.channel.FileRegion;
-import org.apache.spark.network.util.ByteArrayWritableChannel;
-import org.apache.spark.network.util.ConfigProvider;
-import org.apache.spark.network.util.MapConfigProvider;
-import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.util.*;
+
 import static org.junit.Assert.*;
-import org.junit.BeforeClass;
 import org.junit.Test;
-import static org.mockito.Mockito.*;
-import org.mockito.invocation.InvocationOnMock;
-import org.mockito.stubbing.Answer;
-
-public class AuthEngineSuite {
-
-  private static final String clientPrivate =
-      "efe6b68b3fce92158e3637f6ef9d937e75558928dd4b401de04b43d300a73186";
-  private static final String clientChallengeHex =
-      "fb00000005617070496400000010890b6e960f48e998777267a7e4e623220000003c48ad7dc7ec9466da9" +
-      "3bda9f11488dc9404050e02c661d87d67c782444944c6e369b27e0a416c30845a2d9e64271511ca98b41d" +
-      "65f8c426e18ff380f6";
-  private static final String serverResponseHex =
-      "fb00000005617070496400000010708451c9dd2792c97c1ca66e6df449ef0000003c64fe899ecdaf458d4" +
-      "e25e9d5c5a380b8e6d1a184692fac065ed84f8592c18e9629f9c636809dca2ffc041f20346eb53db78738" +
-      "08ecad08b46b5ee3ff";
 
-  private static final String derivedKey = "2d6e7a9048c8265c33a8f3747bfcc84c";
+abstract class AuthEngineSuite {
+  static final String clientPrivate =
+          "efe6b68b3fce92158e3637f6ef9d937e75558928dd4b401de04b43d300a73186";
+  static final String clientChallengeHex =
+          "fb00000005617070496400000010890b6e960f48e998777267a7e4e623220000003c48ad7dc7ec9466da9" +
+          "3bda9f11488dc9404050e02c661d87d67c782444944c6e369b27e0a416c30845a2d9e64271511ca98b41d" +
+          "65f8c426e18ff380f6";
+  static final String serverResponseHex =
+          "fb00000005617070496400000010708451c9dd2792c97c1ca66e6df449ef0000003c64fe899ecdaf458d4" +
+          "e25e9d5c5a380b8e6d1a184692fac065ed84f8592c18e9629f9c636809dca2ffc041f20346eb53db78738" +
+          "08ecad08b46b5ee3ff";
+  static final String derivedKeyId =
+          "de04fd52d71040ed9d260579dacfdf4f5695f991ce8ddb1dde05a7335880906e";
   // This key would have been derived for version 1.0 protocol that did not run a final HKDF round.
-  private static final String unsafeDerivedKey =
-      "31963f15a320d5c90333f7ecf5cf3a31c7eaf151de07fef8494663a9f47cfd31";
-  private static final String inputIv = "fc6a5dc8b90a9dad8f54f08b51a59ed2";
-  private static final String outputIv = "a72709baf00785cad6329ce09f631f71";
-  private static TransportConf conf;
-
-  @BeforeClass
-  public static void setUp() {
-    ConfigProvider v2Provider = new MapConfigProvider(Collections.singletonMap(
-            "spark.network.crypto.authEngineVersion", "2"));
-    conf = new TransportConf("rpc", v2Provider);
+  static final String unsafeDerivedKey =
+          "31963f15a320d5c90333f7ecf5cf3a31c7eaf151de07fef8494663a9f47cfd31";
+  static TransportConf conf;
+
+  static TransportConf getConf(int authEngineVerison, boolean useCtr) {
+    String authEngineVersion = (authEngineVerison == 1) ? "1" : "2";
+    String mode = useCtr ? "AES/CTR/NoPadding" : "AES/GCM/NoPadding";
+    Map<String, String> confMap = ImmutableMap.of(
+            "spark.network.crypto.enabled", "true",
+            "spark.network.crypto.authEngineVersion", authEngineVersion,
+            "spark.network.crypto.cipher", mode
+    );
+    ConfigProvider v2Provider = new MapConfigProvider(confMap);
+    return new TransportConf("rpc", v2Provider);
   }
 
   @Test
   public void testAuthEngine() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       client.deriveSessionCipher(clientChallenge, serverResponse);
-
       TransportCipher serverCipher = server.sessionCipher();
       TransportCipher clientCipher = client.sessionCipher();
+      assertEquals(clientCipher.getKeyId(), serverCipher.getKeyId());
+    }
+  }
 
-      assertArrayEquals(serverCipher.getInputIv(), clientCipher.getOutputIv());
-      assertArrayEquals(serverCipher.getOutputIv(), clientCipher.getInputIv());
-      assertEquals(serverCipher.getKey(), clientCipher.getKey());
+  @Test
+  public void testFixedChallengeResponse() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      assertEquals(client.sessionCipher().getKeyId(), derivedKeyId);
     }
   }
 
   @Test
   public void testCorruptChallengeAppId() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -98,7 +99,6 @@ public void testCorruptChallengeAppId() throws Exception {
 
   @Test
   public void testCorruptChallengeSalt() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -109,7 +109,6 @@ public void testCorruptChallengeSalt() throws Exception {
 
   @Test
   public void testCorruptChallengeCiphertext() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -120,7 +119,6 @@ public void testCorruptChallengeCiphertext() throws Exception {
 
   @Test
   public void testCorruptResponseAppId() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -134,20 +132,18 @@ public void testCorruptResponseAppId() throws Exception {
 
   @Test
   public void testCorruptResponseSalt() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       serverResponse.salt[0] ^= 1;
       assertThrows(GeneralSecurityException.class,
-        () -> client.deriveSessionCipher(clientChallenge, serverResponse));
+              () -> client.deriveSessionCipher(clientChallenge, serverResponse));
     }
   }
 
   @Test
   public void testCorruptServerCiphertext() throws Exception {
-
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
@@ -169,45 +165,6 @@ public void testFixedChallenge() throws Exception {
     }
   }
 
-  @Test
-  public void testFixedChallengeResponse() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
-      byte[] clientPrivateKey = Hex.decode(clientPrivate);
-      client.setClientPrivateKey(clientPrivateKey);
-      AuthMessage clientChallenge =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
-      AuthMessage serverResponse =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
-      // Verify that the client will accept an old transcript.
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-      TransportCipher clientCipher = client.sessionCipher();
-      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), derivedKey);
-      assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
-      assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
-    }
-  }
-
-  @Test
-  public void testFixedChallengeResponseUnsafeVersion() throws Exception {
-    ConfigProvider v1Provider = new MapConfigProvider(Collections.singletonMap(
-            "spark.network.crypto.authEngineVersion", "1"));
-    TransportConf v1Conf = new TransportConf("rpc", v1Provider);
-    try (AuthEngine client = new AuthEngine("appId", "secret", v1Conf)) {
-      byte[] clientPrivateKey = Hex.decode(clientPrivate);
-      client.setClientPrivateKey(clientPrivateKey);
-      AuthMessage clientChallenge =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
-      AuthMessage serverResponse =
-              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
-      // Verify that the client will accept an old transcript.
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-      TransportCipher clientCipher = client.sessionCipher();
-      assertEquals(Hex.encode(clientCipher.getKey().getEncoded()), unsafeDerivedKey);
-      assertEquals(Hex.encode(clientCipher.getInputIv()), inputIv);
-      assertEquals(Hex.encode(clientCipher.getOutputIv()), outputIv);
-    }
-  }
-
   @Test
   public void testMismatchedSecret() throws Exception {
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -216,70 +173,4 @@ public void testMismatchedSecret() throws Exception {
       assertThrows(GeneralSecurityException.class, () -> server.response(clientChallenge));
     }
   }
-
-  @Test
-  public void testEncryptedMessage() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
-         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
-      AuthMessage clientChallenge = client.challenge();
-      AuthMessage serverResponse = server.response(clientChallenge);
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-
-      TransportCipher cipher = server.sessionCipher();
-      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
-
-      byte[] data = new byte[TransportCipher.STREAM_BUFFER_SIZE + 1];
-      new Random().nextBytes(data);
-      ByteBuf buf = Unpooled.wrappedBuffer(data);
-
-      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
-      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
-      while (emsg.transferred() < emsg.count()) {
-        emsg.transferTo(channel, emsg.transferred());
-      }
-      assertEquals(data.length, channel.length());
-    }
-  }
-
-  @Test
-  public void testEncryptedMessageWhenTransferringZeroBytes() throws Exception {
-    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
-         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
-      AuthMessage clientChallenge = client.challenge();
-      AuthMessage serverResponse = server.response(clientChallenge);
-      client.deriveSessionCipher(clientChallenge, serverResponse);
-
-      TransportCipher cipher = server.sessionCipher();
-      TransportCipher.EncryptionHandler handler = new TransportCipher.EncryptionHandler(cipher);
-
-      int testDataLength = 4;
-      FileRegion region = mock(FileRegion.class);
-      when(region.count()).thenReturn((long) testDataLength);
-      // Make `region.transferTo` do nothing in first call and transfer 4 bytes in the second one.
-      when(region.transferTo(any(), anyLong())).thenAnswer(new Answer<Long>() {
-
-        private boolean firstTime = true;
-
-        @Override
-        public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
-          if (firstTime) {
-            firstTime = false;
-            return 0L;
-          } else {
-            WritableByteChannel channel = invocationOnMock.getArgument(0);
-            channel.write(ByteBuffer.wrap(new byte[testDataLength]));
-            return (long) testDataLength;
-          }
-        }
-      });
-
-      TransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
-      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
-      // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
-      assertEquals(0L, emsg.transferTo(channel, emsg.transferred()));
-      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred()));
-      assertEquals(emsg.transferred(), emsg.count());
-      assertEquals(4, channel.length());
-    }
-  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index 4a5b426b1158a..ad8bbdb4c2655 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -49,7 +49,7 @@ public class AuthIntegrationSuite {
   private AuthTestCtx ctx;
 
   @After
-  public void cleanUp() throws Exception {
+  public void cleanUp() {
     if (ctx != null) {
       ctx.close();
     }
@@ -57,8 +57,8 @@ public void cleanUp() throws Exception {
   }
 
   @Test
-  public void testNewAuth() throws Exception {
-    ctx = new AuthTestCtx();
+  public void testNewCtrAuth() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/CTR/NoPadding");
     ctx.createServer("secret");
     ctx.createClient("secret");
 
@@ -68,8 +68,28 @@ public void testNewAuth() throws Exception {
   }
 
   @Test
-  public void testAuthFailure() throws Exception {
-    ctx = new AuthTestCtx();
+  public void testNewGcmAuth() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/GCM/NoPadding");
+    ctx.createServer("secret");
+    ctx.createClient("secret");
+    ByteBuffer reply = ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000);
+    assertEquals("Pong", JavaUtils.bytesToString(reply));
+    assertNull(ctx.authRpcHandler.saslHandler);
+  }
+
+  @Test
+  public void testCtrAuthFailure() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/CTR/NoPadding");
+    ctx.createServer("server");
+
+    assertThrows(Exception.class, () -> ctx.createClient("client"));
+    assertFalse(ctx.authRpcHandler.isAuthenticated());
+    assertFalse(ctx.serverChannel.isActive());
+  }
+
+  @Test
+  public void testGcmAuthFailure() throws Exception {
+    ctx = new AuthTestCtx(new DummyRpcHandler(), "AES/GCM/NoPadding");
     ctx.createServer("server");
 
     assertThrows(Exception.class, () -> ctx.createClient("client"));
@@ -100,7 +120,7 @@ public void testSaslClientFallback() throws Exception {
   }
 
   @Test
-  public void testAuthReplay() throws Exception {
+  public void testCtrAuthReplay() throws Exception {
     // This test covers the case where an attacker replays a challenge message sniffed from the
     // network, but doesn't know the actual secret. The server should close the connection as
     // soon as a message is sent after authentication is performed. This is emulated by removing
@@ -110,16 +130,16 @@ public void testAuthReplay() throws Exception {
     ctx.createClient("secret");
 
     assertNotNull(ctx.client.getChannel().pipeline()
-      .remove(TransportCipher.ENCRYPTION_HANDLER_NAME));
+      .remove(CtrTransportCipher.ENCRYPTION_HANDLER_NAME));
     assertThrows(Exception.class,
       () -> ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000));
     assertTrue(ctx.authRpcHandler.isAuthenticated());
   }
 
   @Test
-  public void testLargeMessageEncryption() throws Exception {
+  public void testLargeCtrMessageEncryption() throws Exception {
     // Use a big length to create a message that cannot be put into the encryption buffer completely
-    final int testErrorMessageLength = TransportCipher.STREAM_BUFFER_SIZE;
+    final int testErrorMessageLength = CtrTransportCipher.STREAM_BUFFER_SIZE;
     ctx = new AuthTestCtx(new RpcHandler() {
       @Override
       public void receive(
@@ -157,6 +177,23 @@ public void testValidMergedBlockMetaReqHandler() throws Exception {
     assertNotNull(ctx.authRpcHandler.getMergedBlockMetaReqHandler());
   }
 
+  private static class DummyRpcHandler extends RpcHandler {
+    @Override
+    public void receive(
+            TransportClient client,
+            ByteBuffer message,
+            RpcResponseCallback callback) {
+      String messageString = JavaUtils.bytesToString(message);
+      assertEquals("Ping", messageString);
+      callback.onSuccess(JavaUtils.stringToBytes("Pong"));
+    }
+
+    @Override
+    public StreamManager getStreamManager() {
+      return null;
+    }
+  }
+
   private static class AuthTestCtx {
 
     private final String appId = "testAppId";
@@ -169,25 +206,17 @@ private static class AuthTestCtx {
     volatile AuthRpcHandler authRpcHandler;
 
     AuthTestCtx() throws Exception {
-      this(new RpcHandler() {
-        @Override
-        public void receive(
-            TransportClient client,
-            ByteBuffer message,
-            RpcResponseCallback callback) {
-          assertEquals("Ping", JavaUtils.bytesToString(message));
-          callback.onSuccess(JavaUtils.stringToBytes("Pong"));
-        }
-
-        @Override
-        public StreamManager getStreamManager() {
-          return null;
-        }
-      });
+      this(new DummyRpcHandler());
     }
 
     AuthTestCtx(RpcHandler rpcHandler) throws Exception {
-      Map<String, String> testConf = ImmutableMap.of("spark.network.crypto.enabled", "true");
+        this(rpcHandler, "AES/CTR/NoPadding");
+    }
+
+    AuthTestCtx(RpcHandler rpcHandler, String mode) throws Exception {
+      Map<String, String> testConf = ImmutableMap.of(
+              "spark.network.crypto.enabled", "true",
+              "spark.network.crypto.cipher", mode);
       this.conf = new TransportConf("rpc", new MapConfigProvider(testConf));
       this.ctx = new TransportContext(conf, rpcHandler);
     }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java
new file mode 100644
index 0000000000000..dcec2f17be532
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/CtrAuthEngineSuite.java
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import com.google.crypto.tink.subtle.Hex;
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.FileRegion;
+import org.apache.spark.network.util.ByteArrayWritableChannel;
+import org.apache.spark.network.util.TransportConf;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.invocation.InvocationOnMock;
+import org.mockito.stubbing.Answer;
+
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Random;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.*;
+
+public class CtrAuthEngineSuite extends AuthEngineSuite {
+  private static final String inputIv = "fc6a5dc8b90a9dad8f54f08b51a59ed2";
+  private static final String outputIv = "a72709baf00785cad6329ce09f631f71";
+
+  @Before
+  public void setUp() {
+    conf = getConf(2, true);
+  }
+
+  @Test
+  public void testAuthEngine() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher serverCipher = server.sessionCipher();
+      TransportCipher clientCipher = client.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      assert(serverCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrClient = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher ctrServer = (CtrTransportCipher) serverCipher;
+      assertArrayEquals(ctrServer.getInputIv(), ctrClient.getOutputIv());
+      assertArrayEquals(ctrServer.getOutputIv(), ctrClient.getInputIv());
+      assertEquals(ctrServer.getKey(), ctrClient.getKey());
+    }
+  }
+
+  @Test
+  public void testCtrFixedChallengeIvResponse() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = client.sessionCipher();
+      assertEquals(clientCipher.getKeyId(), derivedKeyId);
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      assertEquals(Hex.encode(ctrTransportCipher.getInputIv()), inputIv);
+      assertEquals(Hex.encode(ctrTransportCipher.getOutputIv()), outputIv);
+    }
+  }
+
+  @Test
+  public void testFixedChallengeResponseUnsafeVersion() throws Exception {
+    TransportConf v1Conf = getConf(1, true);
+    try (AuthEngine client = new AuthEngine("appId", "secret", v1Conf)) {
+      byte[] clientPrivateKey = Hex.decode(clientPrivate);
+      client.setClientPrivateKey(clientPrivateKey);
+      AuthMessage clientChallenge =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
+      AuthMessage serverResponse =
+              AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(serverResponseHex)));
+      // Verify that the client will accept an old transcript.
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = client.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      assertEquals(Hex.encode(ctrTransportCipher.getKey().getEncoded()), unsafeDerivedKey);
+      assertEquals(Hex.encode(ctrTransportCipher.getInputIv()), inputIv);
+      assertEquals(Hex.encode(ctrTransportCipher.getOutputIv()), outputIv);
+    }
+  }
+
+  @Test
+  public void testCtrEncryptedMessage() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher clientCipher = server.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher.EncryptionHandler handler =
+              new CtrTransportCipher.EncryptionHandler(ctrTransportCipher);
+
+      byte[] data = new byte[CtrTransportCipher.STREAM_BUFFER_SIZE + 1];
+      new Random().nextBytes(data);
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(data.length);
+      CtrTransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(buf);
+      while (emsg.transferred() < emsg.count()) {
+        emsg.transferTo(channel, emsg.transferred());
+      }
+      assertEquals(data.length, channel.length());
+    }
+  }
+
+  @Test
+  public void testCtrEncryptedMessageWhenTransferringZeroBytes() throws Exception {
+    try (AuthEngine client = new AuthEngine("appId", "secret", conf);
+         AuthEngine server = new AuthEngine("appId", "secret", conf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      assert(clientCipher instanceof CtrTransportCipher);
+      CtrTransportCipher ctrTransportCipher = (CtrTransportCipher) clientCipher;
+      CtrTransportCipher.EncryptionHandler handler =
+              new CtrTransportCipher.EncryptionHandler(ctrTransportCipher);
+      int testDataLength = 4;
+      FileRegion region = mock(FileRegion.class);
+      when(region.count()).thenReturn((long) testDataLength);
+      // Make `region.transferTo` do nothing in first call and transfer 4 bytes in the second one.
+      when(region.transferTo(any(), anyLong())).thenAnswer(new Answer<Long>() {
+
+        private boolean firstTime = true;
+
+        @Override
+        public Long answer(InvocationOnMock invocationOnMock) throws Throwable {
+          if (firstTime) {
+            firstTime = false;
+            return 0L;
+          } else {
+            WritableByteChannel channel = invocationOnMock.getArgument(0);
+            channel.write(ByteBuffer.wrap(new byte[testDataLength]));
+            return (long) testDataLength;
+          }
+        }
+      });
+
+      CtrTransportCipher.EncryptedMessage emsg = handler.createEncryptedMessage(region);
+      ByteArrayWritableChannel channel = new ByteArrayWritableChannel(testDataLength);
+      // "transferTo" should act correctly when the underlying FileRegion transfers 0 bytes.
+      assertEquals(0L, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(testDataLength, emsg.transferTo(channel, emsg.transferred()));
+      assertEquals(emsg.transferred(), emsg.count());
+      assertEquals(4, channel.length());
+    }
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java
new file mode 100644
index 0000000000000..f25277aa1a997
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/GcmAuthEngineSuite.java
@@ -0,0 +1,343 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import io.netty.buffer.ByteBuf;
+import io.netty.buffer.Unpooled;
+import io.netty.channel.ChannelHandlerContext;
+import io.netty.channel.ChannelPromise;
+import org.apache.spark.network.util.AbstractFileRegion;
+import org.apache.spark.network.util.ByteBufferWriteableChannel;
+import org.apache.spark.network.util.TransportConf;
+import org.junit.Before;
+import org.junit.Test;
+import org.mockito.ArgumentCaptor;
+
+import javax.crypto.AEADBadTagException;
+import java.io.IOException;
+import java.nio.Buffer;
+import java.nio.ByteBuffer;
+import java.nio.channels.WritableByteChannel;
+import java.util.Arrays;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThrows;
+import static org.mockito.Mockito.*;
+
+public class GcmAuthEngineSuite extends AuthEngineSuite {
+
+  @Before
+  public void setUp() {
+    // Uses GCM mode
+    conf = getConf(2, false);
+  }
+
+  @Test
+  public void testGcmEncryptedMessage() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      byte[] data = new byte[plaintextSegmentSize + (plaintextSegmentSize / 2)];
+      // Just writing some bytes.
+      data[0] = 'a';
+      data[data.length / 2] = 'b';
+      data[data.length - 10] = 'c';
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ((Buffer) ciphertextBuffer).flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, ciphertext);
+      verify(ctx, times(2))
+              .fireChannelRead(captorPlaintext.capture());
+      ByteBuf lastPlaintextSegment = captorPlaintext.getValue();
+      assertEquals(plaintextSegmentSize/2,
+              lastPlaintextSegment.readableBytes());
+      assertEquals('c',
+              lastPlaintextSegment.getByte((plaintextSegmentSize/2) - 10));
+    }
+  }
+
+  static class FakeRegion extends AbstractFileRegion {
+    private final ByteBuffer[] source;
+    private int sourcePosition;
+    private final long count;
+
+    FakeRegion(ByteBuffer... source) {
+      this.source = source;
+      sourcePosition = 0;
+      count = remaining();
+    }
+
+    private long remaining() {
+      long remaining = 0;
+      for (ByteBuffer buffer : source) {
+        remaining += buffer.remaining();
+      }
+      return remaining;
+    }
+
+    @Override
+    public long position() {
+      return 0;
+    }
+
+    @Override
+    public long transferred() {
+      return count - remaining();
+    }
+
+    @Override
+    public long count() {
+      return count;
+    }
+
+    @Override
+    public long transferTo(WritableByteChannel target, long position) throws IOException {
+      if (sourcePosition < source.length) {
+        ByteBuffer currentBuffer = source[sourcePosition];
+        long written = target.write(currentBuffer);
+        if (!currentBuffer.hasRemaining()) {
+          sourcePosition++;
+        }
+        return written;
+      } else {
+        return 0;
+      }
+    }
+
+    @Override
+    protected void deallocate() {
+    }
+  }
+
+  private static ByteBuffer getTestByteBuf(int size, byte fill) {
+    byte[] data = new byte[size];
+    Arrays.fill(data, fill);
+    return ByteBuffer.wrap(data);
+  }
+
+  @Test
+  public void testGcmEncryptedMessageFileRegion() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      int halfSegmentSize = plaintextSegmentSize / 2;
+      int totalSize = plaintextSegmentSize + halfSegmentSize;
+
+      // Set up some fragmented segments to test
+      ByteBuffer halfSegment = getTestByteBuf(halfSegmentSize, (byte) 'a');
+      int smallFragmentSize = 128;
+      ByteBuffer smallFragment = getTestByteBuf(smallFragmentSize, (byte) 'b');
+      int remainderSize = totalSize - halfSegmentSize - smallFragmentSize;
+      ByteBuffer remainder = getTestByteBuf(remainderSize, (byte) 'c');
+      FakeRegion fakeRegion = new FakeRegion(halfSegment, smallFragment, remainder);
+      assertEquals(totalSize, fakeRegion.count());
+
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, fakeRegion, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+
+      // We'll simulate the FileRegion only transferring half a segment.
+      // The encrypted message should buffer the partial segment plaintext.
+      long ciphertextTransferred = 0;
+      while (ciphertextTransferred < encrypted.count()) {
+        long chunkTransferred = encrypted.transferTo(channel, 0);
+        ciphertextTransferred += chunkTransferred;
+      }
+      assertEquals(encrypted.count(), ciphertextTransferred);
+
+      ((Buffer) ciphertextBuffer).flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, ciphertext);
+      verify(ctx, times(2)).fireChannelRead(captorPlaintext.capture());
+      ByteBuf plaintext = captorPlaintext.getValue();
+      // We expect this to be the last partial plaintext segment
+      int expectedLength = totalSize % plaintextSegmentSize;
+      assertEquals(expectedLength, plaintext.readableBytes());
+      // This will be the "remainder" segment that is filled to 'c'
+      assertEquals('c', plaintext.getByte(0));
+    }
+  }
+
+
+  @Test
+  public void testGcmUnalignedDecryption() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+      TransportCipher clientCipher = server.sessionCipher();
+      // Verify that it derives a GcmTransportCipher
+      assert (clientCipher instanceof GcmTransportCipher);
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      // Allocating 1.5x the buffer size to test multiple segments and a fractional segment.
+      int plaintextSegmentSize = GcmTransportCipher.CIPHERTEXT_BUFFER_SIZE - 16;
+      int plaintextSize = plaintextSegmentSize + (plaintextSegmentSize / 2);
+      byte[] data = new byte[plaintextSize];
+      Arrays.fill(data, (byte) 'x');
+      ByteBuf buf = Unpooled.wrappedBuffer(data);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      // Get the encrypted value and pass it to the decryption handler
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ((Buffer) ciphertextBuffer).flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      // Split up the ciphertext into some different sized chunks
+      int firstChunkSize = plaintextSize / 2;
+      ByteBuf mockCiphertext = spy(ciphertext);
+      when(mockCiphertext.readableBytes())
+              .thenReturn(firstChunkSize, firstChunkSize).thenCallRealMethod();
+
+      // Capture the decrypted values and verify them
+      ArgumentCaptor<ByteBuf> captorPlaintext = ArgumentCaptor.forClass(ByteBuf.class);
+      decryptionHandler.channelRead(ctx, mockCiphertext);
+      verify(ctx, times(2)).fireChannelRead(captorPlaintext.capture());
+      ByteBuf lastPlaintextSegment = captorPlaintext.getValue();
+      assertEquals(plaintextSegmentSize/2,
+              lastPlaintextSegment.readableBytes());
+      assertEquals('x',
+              lastPlaintextSegment.getByte((plaintextSegmentSize/2) - 10));
+    }
+  }
+
+  @Test
+  public void testCorruptGcmEncryptedMessage() throws Exception {
+    TransportConf gcmConf = getConf(2, false);
+
+    try (AuthEngine client = new AuthEngine("appId", "secret", gcmConf);
+         AuthEngine server = new AuthEngine("appId", "secret", gcmConf)) {
+      AuthMessage clientChallenge = client.challenge();
+      AuthMessage serverResponse = server.response(clientChallenge);
+      client.deriveSessionCipher(clientChallenge, serverResponse);
+
+      TransportCipher clientCipher = server.sessionCipher();
+      assert (clientCipher instanceof GcmTransportCipher);
+
+      GcmTransportCipher gcmTransportCipher = (GcmTransportCipher) clientCipher;
+      GcmTransportCipher.EncryptionHandler encryptionHandler =
+              gcmTransportCipher.getEncryptionHandler();
+      GcmTransportCipher.DecryptionHandler decryptionHandler =
+              gcmTransportCipher.getDecryptionHandler();
+      byte[] zeroData = new byte[1024 * 32];
+      // Just writing some bytes.
+      ByteBuf buf = Unpooled.wrappedBuffer(zeroData);
+
+      // Mock the context and capture the arguments passed to it
+      ChannelHandlerContext ctx = mock(ChannelHandlerContext.class);
+      ChannelPromise promise = mock(ChannelPromise.class);
+      ArgumentCaptor<GcmTransportCipher.GcmEncryptedMessage> captorWrappedEncrypted =
+              ArgumentCaptor.forClass(GcmTransportCipher.GcmEncryptedMessage.class);
+      encryptionHandler.write(ctx, buf, promise);
+      verify(ctx).write(captorWrappedEncrypted.capture(), eq(promise));
+
+      GcmTransportCipher.GcmEncryptedMessage encrypted =
+              captorWrappedEncrypted.getValue();
+      ByteBuffer ciphertextBuffer =
+              ByteBuffer.allocate((int) encrypted.count());
+      ByteBufferWriteableChannel channel =
+              new ByteBufferWriteableChannel(ciphertextBuffer);
+      encrypted.transferTo(channel, 0);
+      ((Buffer) ciphertextBuffer).flip();
+      ByteBuf ciphertext = Unpooled.wrappedBuffer(ciphertextBuffer);
+
+      byte b = ciphertext.getByte(100);
+      // Inverting the bits of the 100th bit
+      ciphertext.setByte(100, ~b & 0xFF);
+      assertThrows(AEADBadTagException.class, () -> decryptionHandler.channelRead(ctx, ciphertext));
+    }
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
index cde5c1c1022c4..35f7886e174a9 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
@@ -41,10 +41,10 @@
 public class TransportCipherSuite {
 
   @Test
-  public void testBufferNotLeaksOnInternalError() throws IOException {
+  public void testCtrBufferNotLeaksOnInternalError() throws IOException {
     String algorithm = "TestAlgorithm";
     TransportConf conf = new TransportConf("Test", MapConfigProvider.EMPTY);
-    TransportCipher cipher = new TransportCipher(conf.cryptoConf(), conf.cipherTransformation(),
+    CtrTransportCipher cipher = new CtrTransportCipher(conf.cryptoConf(),
       new SecretKeySpec(new byte[256], algorithm), new byte[0], new byte[0]) {
 
       @Override
diff --git a/docs/security.md b/docs/security.md
index c0a4b4da03030..a9065a225017e 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -175,6 +175,15 @@ The following table describes the different options available for configuring th
   </td>
   <td>2.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.network.crypto.cipher</code></td>
+  <td>AES/CTR/NoPadding</td>
+  <td>
+    Cipher mode to use. Defaults "AES/CTR/NoPadding" for backward compatibility, which is not authenticated. 
+    Recommended to use "AES/GCM/NoPadding", which is an authenticated encryption mode.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.network.crypto.authEngineVersion</code></td>
   <td>1</td>

From e6d5ddd2e8461b0c22f5bfabf334d59088d863cc Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Thu, 27 Jun 2024 12:24:05 +0900
Subject: [PATCH 367/521] Revert "[SPARK-48639][CONNECT][PYTHON] Add Origin to
 Relation.RelationCommon"

This reverts commit 0db5bdecfa6cbfff1be7690bb783a858989776b9.
---
 .../protobuf/spark/connect/relations.proto    |   4 +-
 .../sql/connect/proto/relations_pb2.py        | 264 +++++++++---------
 .../sql/connect/proto/relations_pb2.pyi       |   9 +-
 3 files changed, 142 insertions(+), 135 deletions(-)

diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
index 8b384728983bd..f7f1315ede0f8 100644
--- a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -103,8 +103,8 @@ message Unknown {}
 
 // Common metadata of all relations.
 message RelationCommon {
-  // (Optional) Shared relation metadata.
-  reserved 1;
+  // (Required) Shared relation metadata.
+  string source_info = 1;
 
   // (Optional) A per-client globally unique id for a given connect plan.
   optional int64 plan_id = 2;
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 7dd494db86959..3f7e57949373b 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -35,7 +35,7 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"@\n\x0eRelationCommon\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_idJ\x04\x08\x01\x10\x02"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xe1\x18\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xe7\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x12<\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x07posArgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xb5\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x42\r\n\x0b_is_barrier"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
 _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
@@ -61,135 +61,135 @@
     _UNKNOWN._serialized_start = 3336
     _UNKNOWN._serialized_end = 3345
     _RELATIONCOMMON._serialized_start = 3347
-    _RELATIONCOMMON._serialized_end = 3411
-    _SQL._serialized_start = 3414
-    _SQL._serialized_end = 3645
-    _SQL_ARGSENTRY._serialized_start = 3555
-    _SQL_ARGSENTRY._serialized_end = 3645
-    _READ._serialized_start = 3648
-    _READ._serialized_end = 4311
-    _READ_NAMEDTABLE._serialized_start = 3826
-    _READ_NAMEDTABLE._serialized_end = 4018
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3960
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4018
-    _READ_DATASOURCE._serialized_start = 4021
-    _READ_DATASOURCE._serialized_end = 4298
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3960
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4018
-    _PROJECT._serialized_start = 4313
-    _PROJECT._serialized_end = 4430
-    _FILTER._serialized_start = 4432
-    _FILTER._serialized_end = 4544
-    _JOIN._serialized_start = 4547
-    _JOIN._serialized_end = 5208
-    _JOIN_JOINDATATYPE._serialized_start = 4886
-    _JOIN_JOINDATATYPE._serialized_end = 4978
-    _JOIN_JOINTYPE._serialized_start = 4981
-    _JOIN_JOINTYPE._serialized_end = 5189
-    _SETOPERATION._serialized_start = 5211
-    _SETOPERATION._serialized_end = 5690
-    _SETOPERATION_SETOPTYPE._serialized_start = 5527
-    _SETOPERATION_SETOPTYPE._serialized_end = 5641
-    _LIMIT._serialized_start = 5692
-    _LIMIT._serialized_end = 5768
-    _OFFSET._serialized_start = 5770
-    _OFFSET._serialized_end = 5849
-    _TAIL._serialized_start = 5851
-    _TAIL._serialized_end = 5926
-    _AGGREGATE._serialized_start = 5929
-    _AGGREGATE._serialized_end = 6511
-    _AGGREGATE_PIVOT._serialized_start = 6268
-    _AGGREGATE_PIVOT._serialized_end = 6379
-    _AGGREGATE_GROUPTYPE._serialized_start = 6382
-    _AGGREGATE_GROUPTYPE._serialized_end = 6511
-    _SORT._serialized_start = 6514
-    _SORT._serialized_end = 6674
-    _DROP._serialized_start = 6677
-    _DROP._serialized_end = 6818
-    _DEDUPLICATE._serialized_start = 6821
-    _DEDUPLICATE._serialized_end = 7061
-    _LOCALRELATION._serialized_start = 7063
-    _LOCALRELATION._serialized_end = 7152
-    _CACHEDLOCALRELATION._serialized_start = 7154
-    _CACHEDLOCALRELATION._serialized_end = 7226
-    _CACHEDREMOTERELATION._serialized_start = 7228
-    _CACHEDREMOTERELATION._serialized_end = 7283
-    _SAMPLE._serialized_start = 7286
-    _SAMPLE._serialized_end = 7559
-    _RANGE._serialized_start = 7562
-    _RANGE._serialized_end = 7707
-    _SUBQUERYALIAS._serialized_start = 7709
-    _SUBQUERYALIAS._serialized_end = 7823
-    _REPARTITION._serialized_start = 7826
-    _REPARTITION._serialized_end = 7968
-    _SHOWSTRING._serialized_start = 7971
-    _SHOWSTRING._serialized_end = 8113
-    _HTMLSTRING._serialized_start = 8115
-    _HTMLSTRING._serialized_end = 8229
-    _STATSUMMARY._serialized_start = 8231
-    _STATSUMMARY._serialized_end = 8323
-    _STATDESCRIBE._serialized_start = 8325
-    _STATDESCRIBE._serialized_end = 8406
-    _STATCROSSTAB._serialized_start = 8408
-    _STATCROSSTAB._serialized_end = 8509
-    _STATCOV._serialized_start = 8511
-    _STATCOV._serialized_end = 8607
-    _STATCORR._serialized_start = 8610
-    _STATCORR._serialized_end = 8747
-    _STATAPPROXQUANTILE._serialized_start = 8750
-    _STATAPPROXQUANTILE._serialized_end = 8914
-    _STATFREQITEMS._serialized_start = 8916
-    _STATFREQITEMS._serialized_end = 9041
-    _STATSAMPLEBY._serialized_start = 9044
-    _STATSAMPLEBY._serialized_end = 9353
-    _STATSAMPLEBY_FRACTION._serialized_start = 9245
-    _STATSAMPLEBY_FRACTION._serialized_end = 9344
-    _NAFILL._serialized_start = 9356
-    _NAFILL._serialized_end = 9490
-    _NADROP._serialized_start = 9493
-    _NADROP._serialized_end = 9627
-    _NAREPLACE._serialized_start = 9630
-    _NAREPLACE._serialized_end = 9926
-    _NAREPLACE_REPLACEMENT._serialized_start = 9785
-    _NAREPLACE_REPLACEMENT._serialized_end = 9926
-    _TODF._serialized_start = 9928
-    _TODF._serialized_end = 10016
-    _WITHCOLUMNSRENAMED._serialized_start = 10019
-    _WITHCOLUMNSRENAMED._serialized_end = 10258
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10191
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10258
-    _WITHCOLUMNS._serialized_start = 10260
-    _WITHCOLUMNS._serialized_end = 10379
-    _WITHWATERMARK._serialized_start = 10382
-    _WITHWATERMARK._serialized_end = 10516
-    _HINT._serialized_start = 10519
-    _HINT._serialized_end = 10651
-    _UNPIVOT._serialized_start = 10654
-    _UNPIVOT._serialized_end = 10981
-    _UNPIVOT_VALUES._serialized_start = 10911
-    _UNPIVOT_VALUES._serialized_end = 10970
-    _TOSCHEMA._serialized_start = 10983
-    _TOSCHEMA._serialized_end = 11089
-    _REPARTITIONBYEXPRESSION._serialized_start = 11092
-    _REPARTITIONBYEXPRESSION._serialized_end = 11295
-    _MAPPARTITIONS._serialized_start = 11298
-    _MAPPARTITIONS._serialized_end = 11479
-    _GROUPMAP._serialized_start = 11482
-    _GROUPMAP._serialized_end = 12117
-    _COGROUPMAP._serialized_start = 12120
-    _COGROUPMAP._serialized_end = 12646
-    _APPLYINPANDASWITHSTATE._serialized_start = 12649
-    _APPLYINPANDASWITHSTATE._serialized_end = 13006
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13009
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13253
-    _PYTHONUDTF._serialized_start = 13256
-    _PYTHONUDTF._serialized_end = 13433
-    _COLLECTMETRICS._serialized_start = 13436
-    _COLLECTMETRICS._serialized_end = 13572
-    _PARSE._serialized_start = 13575
-    _PARSE._serialized_end = 13963
-    _PARSE_OPTIONSENTRY._serialized_start = 3960
-    _PARSE_OPTIONSENTRY._serialized_end = 4018
-    _PARSE_PARSEFORMAT._serialized_start = 13864
-    _PARSE_PARSEFORMAT._serialized_end = 13952
+    _RELATIONCOMMON._serialized_end = 3438
+    _SQL._serialized_start = 3441
+    _SQL._serialized_end = 3672
+    _SQL_ARGSENTRY._serialized_start = 3582
+    _SQL_ARGSENTRY._serialized_end = 3672
+    _READ._serialized_start = 3675
+    _READ._serialized_end = 4338
+    _READ_NAMEDTABLE._serialized_start = 3853
+    _READ_NAMEDTABLE._serialized_end = 4045
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3987
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4045
+    _READ_DATASOURCE._serialized_start = 4048
+    _READ_DATASOURCE._serialized_end = 4325
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3987
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4045
+    _PROJECT._serialized_start = 4340
+    _PROJECT._serialized_end = 4457
+    _FILTER._serialized_start = 4459
+    _FILTER._serialized_end = 4571
+    _JOIN._serialized_start = 4574
+    _JOIN._serialized_end = 5235
+    _JOIN_JOINDATATYPE._serialized_start = 4913
+    _JOIN_JOINDATATYPE._serialized_end = 5005
+    _JOIN_JOINTYPE._serialized_start = 5008
+    _JOIN_JOINTYPE._serialized_end = 5216
+    _SETOPERATION._serialized_start = 5238
+    _SETOPERATION._serialized_end = 5717
+    _SETOPERATION_SETOPTYPE._serialized_start = 5554
+    _SETOPERATION_SETOPTYPE._serialized_end = 5668
+    _LIMIT._serialized_start = 5719
+    _LIMIT._serialized_end = 5795
+    _OFFSET._serialized_start = 5797
+    _OFFSET._serialized_end = 5876
+    _TAIL._serialized_start = 5878
+    _TAIL._serialized_end = 5953
+    _AGGREGATE._serialized_start = 5956
+    _AGGREGATE._serialized_end = 6538
+    _AGGREGATE_PIVOT._serialized_start = 6295
+    _AGGREGATE_PIVOT._serialized_end = 6406
+    _AGGREGATE_GROUPTYPE._serialized_start = 6409
+    _AGGREGATE_GROUPTYPE._serialized_end = 6538
+    _SORT._serialized_start = 6541
+    _SORT._serialized_end = 6701
+    _DROP._serialized_start = 6704
+    _DROP._serialized_end = 6845
+    _DEDUPLICATE._serialized_start = 6848
+    _DEDUPLICATE._serialized_end = 7088
+    _LOCALRELATION._serialized_start = 7090
+    _LOCALRELATION._serialized_end = 7179
+    _CACHEDLOCALRELATION._serialized_start = 7181
+    _CACHEDLOCALRELATION._serialized_end = 7253
+    _CACHEDREMOTERELATION._serialized_start = 7255
+    _CACHEDREMOTERELATION._serialized_end = 7310
+    _SAMPLE._serialized_start = 7313
+    _SAMPLE._serialized_end = 7586
+    _RANGE._serialized_start = 7589
+    _RANGE._serialized_end = 7734
+    _SUBQUERYALIAS._serialized_start = 7736
+    _SUBQUERYALIAS._serialized_end = 7850
+    _REPARTITION._serialized_start = 7853
+    _REPARTITION._serialized_end = 7995
+    _SHOWSTRING._serialized_start = 7998
+    _SHOWSTRING._serialized_end = 8140
+    _HTMLSTRING._serialized_start = 8142
+    _HTMLSTRING._serialized_end = 8256
+    _STATSUMMARY._serialized_start = 8258
+    _STATSUMMARY._serialized_end = 8350
+    _STATDESCRIBE._serialized_start = 8352
+    _STATDESCRIBE._serialized_end = 8433
+    _STATCROSSTAB._serialized_start = 8435
+    _STATCROSSTAB._serialized_end = 8536
+    _STATCOV._serialized_start = 8538
+    _STATCOV._serialized_end = 8634
+    _STATCORR._serialized_start = 8637
+    _STATCORR._serialized_end = 8774
+    _STATAPPROXQUANTILE._serialized_start = 8777
+    _STATAPPROXQUANTILE._serialized_end = 8941
+    _STATFREQITEMS._serialized_start = 8943
+    _STATFREQITEMS._serialized_end = 9068
+    _STATSAMPLEBY._serialized_start = 9071
+    _STATSAMPLEBY._serialized_end = 9380
+    _STATSAMPLEBY_FRACTION._serialized_start = 9272
+    _STATSAMPLEBY_FRACTION._serialized_end = 9371
+    _NAFILL._serialized_start = 9383
+    _NAFILL._serialized_end = 9517
+    _NADROP._serialized_start = 9520
+    _NADROP._serialized_end = 9654
+    _NAREPLACE._serialized_start = 9657
+    _NAREPLACE._serialized_end = 9953
+    _NAREPLACE_REPLACEMENT._serialized_start = 9812
+    _NAREPLACE_REPLACEMENT._serialized_end = 9953
+    _TODF._serialized_start = 9955
+    _TODF._serialized_end = 10043
+    _WITHCOLUMNSRENAMED._serialized_start = 10046
+    _WITHCOLUMNSRENAMED._serialized_end = 10285
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 10218
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 10285
+    _WITHCOLUMNS._serialized_start = 10287
+    _WITHCOLUMNS._serialized_end = 10406
+    _WITHWATERMARK._serialized_start = 10409
+    _WITHWATERMARK._serialized_end = 10543
+    _HINT._serialized_start = 10546
+    _HINT._serialized_end = 10678
+    _UNPIVOT._serialized_start = 10681
+    _UNPIVOT._serialized_end = 11008
+    _UNPIVOT_VALUES._serialized_start = 10938
+    _UNPIVOT_VALUES._serialized_end = 10997
+    _TOSCHEMA._serialized_start = 11010
+    _TOSCHEMA._serialized_end = 11116
+    _REPARTITIONBYEXPRESSION._serialized_start = 11119
+    _REPARTITIONBYEXPRESSION._serialized_end = 11322
+    _MAPPARTITIONS._serialized_start = 11325
+    _MAPPARTITIONS._serialized_end = 11506
+    _GROUPMAP._serialized_start = 11509
+    _GROUPMAP._serialized_end = 12144
+    _COGROUPMAP._serialized_start = 12147
+    _COGROUPMAP._serialized_end = 12673
+    _APPLYINPANDASWITHSTATE._serialized_start = 12676
+    _APPLYINPANDASWITHSTATE._serialized_end = 13033
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 13036
+    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 13280
+    _PYTHONUDTF._serialized_start = 13283
+    _PYTHONUDTF._serialized_end = 13460
+    _COLLECTMETRICS._serialized_start = 13463
+    _COLLECTMETRICS._serialized_end = 13599
+    _PARSE._serialized_start = 13602
+    _PARSE._serialized_end = 13990
+    _PARSE_OPTIONSENTRY._serialized_start = 3987
+    _PARSE_OPTIONSENTRY._serialized_end = 4045
+    _PARSE_PARSEFORMAT._serialized_start = 13891
+    _PARSE_PARSEFORMAT._serialized_end = 13979
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 1c0036afbc436..007b92ef5f42d 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -579,19 +579,26 @@ class RelationCommon(google.protobuf.message.Message):
 
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
+    SOURCE_INFO_FIELD_NUMBER: builtins.int
     PLAN_ID_FIELD_NUMBER: builtins.int
+    source_info: builtins.str
+    """(Required) Shared relation metadata."""
     plan_id: builtins.int
     """(Optional) A per-client globally unique id for a given connect plan."""
     def __init__(
         self,
         *,
+        source_info: builtins.str = ...,
         plan_id: builtins.int | None = ...,
     ) -> None: ...
     def HasField(
         self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
     ) -> builtins.bool: ...
     def ClearField(
-        self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info"
+        ],
     ) -> None: ...
     def WhichOneof(
         self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]

From 6cee8e1f1c27a9a5f25a0263a8230981589df4bd Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 27 Jun 2024 15:21:38 +0800
Subject: [PATCH 368/521] [SPARK-47927][SQL][FOLLOWUP] fix ScalaUDF output
 nullability

This is a followup of https://github.com/apache/spark/pull/46156 , to fix the wrong nullability of ScalaUDF output.

fix nullability

no

new test

no

Closes #47081 from cloud-fan/udf.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d89aad3ada5795034ba039eb18e54c3313ed61f4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala |  9 +++++++--
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala   | 11 +++++++++++
 .../test/scala/org/apache/spark/sql/UDFSuite.scala    |  7 +++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 5890a9692e203..e63621d6a236b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -341,10 +341,15 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       new ResolveHints.RemoveAllHints),
     Batch("Nondeterministic", Once,
       PullOutNondeterministic),
-    Batch("UpdateNullability", Once,
+    Batch("ScalaUDF Null Handling", fixedPoint,
+      // `HandleNullInputsForUDF` may wrap the `ScalaUDF` with `If` expression to return null for
+      // null inputs, so the result can be null even if `ScalaUDF#nullable` is false. We need to
+      // run `UpdateAttributeNullability` to update nullability of the UDF output attribute in
+      // downstream operators. After updating attribute nullability, `ScalaUDF`s in downstream
+      // operators may need null handling as well, so we should run these two rules repeatedly.
+      HandleNullInputsForUDF,
       UpdateAttributeNullability),
     Batch("UDF", Once,
-      HandleNullInputsForUDF,
       ResolveEncodersInUDF),
     Batch("Subquery", Once,
       UpdateOuterReferences),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 8e5329d986ef7..1ed83bf14be7d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1706,4 +1706,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assert(refs.head.resolved)
     assert(refs.head.isStreaming)
   }
+
+  test("SPARK-47927: ScalaUDF output nullability") {
+    val udf = ScalaUDF(
+      function = (i: Int) => i + 1,
+      dataType = IntegerType,
+      children = $"a" :: Nil,
+      nullable = false,
+      inputEncoders = Seq(Some(ExpressionEncoder[Int]().resolveAndBind())))
+    val plan = testRelation.select(udf.as("u")).select($"u").analyze
+    assert(plan.output.head.nullable)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 56bc707450e31..9f8e979e3fba7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -1078,4 +1078,11 @@ class UDFSuite extends QueryTest with SharedSparkSession {
         .select(f(struct(ds2("value").as("_1")))),
       Row(Row(null)))
   }
+
+  test("SPARK-47927: ScalaUDF null handling") {
+    val f = udf[Int, Int](_ + 1)
+    val df = Seq(Some(1), None).toDF("c")
+      .select(f($"c").as("f"), f($"f"))
+    checkAnswer(df, Seq(Row(2, 3), Row(null, null)))
+  }
 }

From d35af54842d17725aea7208c9d7b170b33a95012 Mon Sep 17 00:00:00 2001
From: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Date: Thu, 27 Jun 2024 15:31:20 +0800
Subject: [PATCH 369/521] [SPARK-48428][SQL] Fix IllegalStateException in
 NestedColumnAliasing

### What changes were proposed in this pull request?

In #35170 SPARK-37855 and #32301 SPARK-35194 introduced conditions for ExtractValues that can currently not be handled. The considtion is introduced after `collectRootReferenceAndExtractValue` and just removes these candidates. This is problematic since these expressions might have contained `AttributeReference` that needed to not do an incorrect aliasing. This fixes this family of bugs by moving the conditions into the function `collectRootReferenceAndExtractValue`.

### Why are the changes needed?

The current code leads to `IllegalStateException` runtime failures.

### Does this PR introduce _any_ user-facing change?

Yes, fixes a bug.

### How was this patch tested?

Existing and new unit tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #46756 from eejbyfeldt/SPARK-48428.

Authored-by: Emil Ejbyfeldt <eejbyfeldt@liveintent.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit b11608c96f8aeeaa03c6e5038700483266b32448)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/NestedColumnAliasing.scala      | 16 +++++++-------
 .../optimizer/NestedColumnAliasingSuite.scala | 21 +++++++++++++++++++
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index 5d4fcf772b8fc..778813e4e9c63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -217,6 +217,11 @@ object NestedColumnAliasing {
     case _ => false
   }
 
+  private def canAlias(ev: Expression): Boolean = {
+    // we can not alias the attr from lambda variable whose expr id is not available
+    !ev.exists(_.isInstanceOf[NamedLambdaVariable]) && ev.references.size == 1
+  }
+
   /**
    * Returns two types of expressions:
    * - Root references that are individually accessed
@@ -225,11 +230,11 @@ object NestedColumnAliasing {
    */
   private def collectRootReferenceAndExtractValue(e: Expression): Seq[Expression] = e match {
     case _: AttributeReference => Seq(e)
-    case GetStructField(_: ExtractValue | _: AttributeReference, _, _) => Seq(e)
+    case GetStructField(_: ExtractValue | _: AttributeReference, _, _) if canAlias(e) => Seq(e)
     case GetArrayStructFields(_: MapValues |
                               _: MapKeys |
                               _: ExtractValue |
-                              _: AttributeReference, _, _, _, _) => Seq(e)
+                              _: AttributeReference, _, _, _, _) if canAlias(e) => Seq(e)
     case es if es.children.nonEmpty => es.children.flatMap(collectRootReferenceAndExtractValue)
     case _ => Seq.empty
   }
@@ -248,13 +253,8 @@ object NestedColumnAliasing {
     val otherRootReferences = new mutable.ArrayBuffer[AttributeReference]()
     exprList.foreach { e =>
       extractor(e).foreach {
-        // we can not alias the attr from lambda variable whose expr id is not available
-        case ev: ExtractValue if !ev.exists(_.isInstanceOf[NamedLambdaVariable]) =>
-          if (ev.references.size == 1) {
-            nestedFieldReferences.append(ev)
-          }
+        case ev: ExtractValue => nestedFieldReferences.append(ev)
         case ar: AttributeReference => otherRootReferences.append(ar)
-        case _ => // ignore
       }
     }
     val exclusiveAttrSet = AttributeSet(exclusiveAttrs ++ otherRootReferences)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
index cb6b9ac8d8bec..6ce394dbd68be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -861,6 +861,27 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     // The plan is expected to be unchanged.
     comparePlans(plan, RemoveNoopOperators.apply(optimized.get))
   }
+
+  test("SPARK-48428: Do not pushdown when attr is used in expression with mutliple references") {
+    val query = contact
+      .limit(5)
+      .select(
+        GetStructField(GetStructField(CreateStruct(Seq($"id", $"employer")), 1), 0),
+        $"employer.id")
+      .analyze
+
+    val optimized = Optimize.execute(query)
+
+    val expected = contact
+      .select($"id", $"employer")
+      .limit(5)
+      .select(
+        GetStructField(GetStructField(CreateStruct(Seq($"id", $"employer")), 1), 0),
+        $"employer.id")
+      .analyze
+
+    comparePlans(optimized, expected)
+  }
 }
 
 object NestedColumnAliasingSuite {

From 789ac5b5c3f4ad1df06808c82545ff69b302490d Mon Sep 17 00:00:00 2001
From: Yi Wu <yi.wu@databricks.com>
Date: Thu, 27 Jun 2024 16:46:26 +0800
Subject: [PATCH 370/521] [SPARK-46957][CORE] Decommission migrated shuffle
 files should be able to cleanup from executor

### What changes were proposed in this pull request?

This PR uses `SortShuffleManager#taskIdMapsForShuffle` to track the migrated shuffle files on the destination executor.

### Why are the changes needed?

This is a long-standing bug in decommission where the migrated shuffle files can't be cleaned up from the executor. Normally, the shuffle files are tracked by `taskIdMapsForShuffle` during the map task execution. Upon receiving the `RemoveShuffle(shuffleId)` request from driver, executor can clean up those shuffle files by searching `taskIdMapsForShuffle`. However, for the migrated shuffle files by decommission, they lose the track in the destination executor's  `taskIdMapsForShuffle` and can't be deleted as a result.

Note this bug only affects shuffle removal on the executor. For shuffle removal on the external shuffle service (when `spark.shuffle.service.removeShuffle` enabled and the executor stores the shuffle files has gone), we don't rely on `taskIdMapsForShuffle` but using the specific shuffle block id to locate the shuffle file directly. So it won't be an issue there.

### Does this PR introduce _any_ user-facing change?

No. (Common users won't see the difference underlying.)

### How was this patch tested?

Add unit test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47037 from Ngone51/SPARK-46957.

Authored-by: Yi Wu <yi.wu@databricks.com>
Signed-off-by: Yi Wu <yi.wu@databricks.com>
(cherry picked from commit b5a55e46e9c126f73287ff3a8290828a9cd484a0)
Signed-off-by: Yi Wu <yi.wu@databricks.com>
---
 .../LocalDiskShuffleExecutorComponents.java   |  3 +-
 .../shuffle/IndexShuffleBlockResolver.scala   | 20 ++++-
 .../shuffle/sort/SortShuffleManager.scala     |  3 +-
 .../sort/UnsafeShuffleWriterSuite.java        |  6 +-
 ...kManagerDecommissionIntegrationSuite.scala | 76 +++++++++++++++++++
 5 files changed, 103 insertions(+), 5 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index eb4d9d9abc8e3..861a8e623a6e5 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -56,7 +56,8 @@ public void initializeExecutor(String appId, String execId, Map<String, String>
     if (blockManager == null) {
       throw new IllegalStateException("No blockManager available from the SparkEnv.");
     }
-    blockResolver = new IndexShuffleBlockResolver(sparkConf, blockManager);
+    blockResolver =
+      new IndexShuffleBlockResolver(sparkConf, blockManager, Map.of() /* Shouldn't be accessed */);
   }
 
   @Override
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 919b0f5f7c135..299f299249b9a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -21,6 +21,7 @@ import java.io._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.nio.file.Files
+import java.util.{Map => JMap}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -37,6 +38,7 @@ import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
@@ -52,7 +54,8 @@ import org.apache.spark.util.Utils
 private[spark] class IndexShuffleBlockResolver(
     conf: SparkConf,
     // var for testing
-    var _blockManager: BlockManager = null)
+    var _blockManager: BlockManager = null,
+    val taskIdMapsForShuffle: JMap[Int, OpenHashSet[Long]] = JMap.of())
   extends ShuffleBlockResolver
   with Logging with MigratableResolver {
 
@@ -270,6 +273,21 @@ private[spark] class IndexShuffleBlockResolver(
             throw SparkCoreErrors.failedRenameTempFileError(fileTmp, file)
           }
         }
+        blockId match {
+          case ShuffleIndexBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case ShuffleDataBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case _ => // Unreachable
+        }
         blockManager.reportBlockStatus(blockId, BlockStatus(StorageLevel.DISK_ONLY, 0, diskSize))
       }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 79dff6f87534a..4234d0ec5fd04 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -87,7 +87,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
-  override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
+  override val shuffleBlockResolver =
+    new IndexShuffleBlockResolver(conf, taskIdMapsForShuffle = taskIdMapsForShuffle)
 
   /**
    * Obtains a [[ShuffleHandle]] to pass to tasks.
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 1fa17b908699f..ed3a3b887c304 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -314,7 +314,8 @@ public void writeWithoutSpilling() throws Exception {
 
   @Test
   public void writeChecksumFileWithoutSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Map.of());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
@@ -344,7 +345,8 @@ public void writeChecksumFileWithoutSpill() throws Exception {
 
   @Test
   public void writeChecksumFileWithSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Map.of());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index d9d2e6102f120..2ba348222f7be 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.storage
 
+import java.io.File
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, Semaphore, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 
+import org.apache.commons.io.FileUtils
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
@@ -352,4 +354,78 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     import scala.language.reflectiveCalls
     assert(listener.removeReasonValidated)
   }
+
+  test("SPARK-46957: Migrated shuffle files should be able to cleanup from executor") {
+
+    val sparkTempDir = System.getProperty("java.io.tmpdir")
+
+    def shuffleFiles: Seq[File] = {
+      FileUtils
+        .listFiles(new File(sparkTempDir), Array("data", "index"), true)
+        .asScala
+        .toSeq
+    }
+
+    val existingShuffleFiles = shuffleFiles
+
+    val conf = new SparkConf()
+      .setAppName("SPARK-46957")
+      .setMaster("local-cluster[2,1,1024]")
+      .set(config.DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+    sc = new SparkContext(conf)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+    val shuffleBlockUpdates = new ArrayBuffer[BlockId]()
+    var isDecommissionedExecutorRemoved = false
+    val execToDecommission = sc.getExecutorIds().head
+    sc.addSparkListener(new SparkListener {
+      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+        if (blockUpdated.blockUpdatedInfo.blockId.isShuffle) {
+          shuffleBlockUpdates += blockUpdated.blockUpdatedInfo.blockId
+        }
+      }
+
+      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+        assert(execToDecommission === executorRemoved.executorId)
+        isDecommissionedExecutorRemoved = true
+      }
+    })
+
+    // Run a job to create shuffle data
+    val result = sc.parallelize(1 to 1000, 10)
+      .map { i => (i % 2, i) }
+      .reduceByKey(_ + _).collect()
+
+    assert(result.head === (0, 250500))
+    assert(result.tail.head === (1, 250000))
+    sc.schedulerBackend
+      .asInstanceOf[StandaloneSchedulerBackend]
+      .decommissionExecutor(
+        execToDecommission,
+        ExecutorDecommissionInfo("test", None),
+        adjustTargetNumExecutors = true
+      )
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(isDecommissionedExecutorRemoved)
+      // Ensure there are shuffle data have been migrated
+      assert(shuffleBlockUpdates.size >= 2)
+    }
+
+    val shuffleId = shuffleBlockUpdates
+      .find(_.isInstanceOf[ShuffleIndexBlockId])
+      .map(_.asInstanceOf[ShuffleIndexBlockId].shuffleId)
+      .get
+
+    val newShuffleFiles = shuffleFiles.diff(existingShuffleFiles)
+    assert(newShuffleFiles.size >= shuffleBlockUpdates.size)
+
+    // Remove the shuffle data
+    sc.shuffleDriverComponents.removeShuffle(shuffleId, true)
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(newShuffleFiles.intersect(shuffleFiles).isEmpty)
+    }
+  }
 }

From 7aa12b6cd01da88cbbb3e8c6e50863e6139315b7 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 27 Jun 2024 19:11:39 +0800
Subject: [PATCH 371/521] Revert "[SPARK-46957][CORE] Decommission migrated
 shuffle files should be able to cleanup from executor"

This reverts commit 789ac5b5c3f4ad1df06808c82545ff69b302490d.
---
 .../LocalDiskShuffleExecutorComponents.java   |  3 +-
 .../shuffle/IndexShuffleBlockResolver.scala   | 20 +----
 .../shuffle/sort/SortShuffleManager.scala     |  3 +-
 .../sort/UnsafeShuffleWriterSuite.java        |  6 +-
 ...kManagerDecommissionIntegrationSuite.scala | 76 -------------------
 5 files changed, 5 insertions(+), 103 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index 861a8e623a6e5..eb4d9d9abc8e3 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -56,8 +56,7 @@ public void initializeExecutor(String appId, String execId, Map<String, String>
     if (blockManager == null) {
       throw new IllegalStateException("No blockManager available from the SparkEnv.");
     }
-    blockResolver =
-      new IndexShuffleBlockResolver(sparkConf, blockManager, Map.of() /* Shouldn't be accessed */);
+    blockResolver = new IndexShuffleBlockResolver(sparkConf, blockManager);
   }
 
   @Override
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 299f299249b9a..919b0f5f7c135 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -21,7 +21,6 @@ import java.io._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.nio.file.Files
-import java.util.{Map => JMap}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -38,7 +37,6 @@ import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
-import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
@@ -54,8 +52,7 @@ import org.apache.spark.util.collection.OpenHashSet
 private[spark] class IndexShuffleBlockResolver(
     conf: SparkConf,
     // var for testing
-    var _blockManager: BlockManager = null,
-    val taskIdMapsForShuffle: JMap[Int, OpenHashSet[Long]] = JMap.of())
+    var _blockManager: BlockManager = null)
   extends ShuffleBlockResolver
   with Logging with MigratableResolver {
 
@@ -273,21 +270,6 @@ private[spark] class IndexShuffleBlockResolver(
             throw SparkCoreErrors.failedRenameTempFileError(fileTmp, file)
           }
         }
-        blockId match {
-          case ShuffleIndexBlockId(shuffleId, mapId, _) =>
-            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
-              shuffleId, _ => new OpenHashSet[Long](8)
-            )
-            mapTaskIds.add(mapId)
-
-          case ShuffleDataBlockId(shuffleId, mapId, _) =>
-            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
-              shuffleId, _ => new OpenHashSet[Long](8)
-            )
-            mapTaskIds.add(mapId)
-
-          case _ => // Unreachable
-        }
         blockManager.reportBlockStatus(blockId, BlockStatus(StorageLevel.DISK_ONLY, 0, diskSize))
       }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 4234d0ec5fd04..79dff6f87534a 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -87,8 +87,7 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
-  override val shuffleBlockResolver =
-    new IndexShuffleBlockResolver(conf, taskIdMapsForShuffle = taskIdMapsForShuffle)
+  override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
 
   /**
    * Obtains a [[ShuffleHandle]] to pass to tasks.
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index ed3a3b887c304..1fa17b908699f 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -314,8 +314,7 @@ public void writeWithoutSpilling() throws Exception {
 
   @Test
   public void writeChecksumFileWithoutSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver =
-      new IndexShuffleBlockResolver(conf, blockManager, Map.of());
+    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
@@ -345,8 +344,7 @@ public void writeChecksumFileWithoutSpill() throws Exception {
 
   @Test
   public void writeChecksumFileWithSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver =
-      new IndexShuffleBlockResolver(conf, blockManager, Map.of());
+    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index 2ba348222f7be..d9d2e6102f120 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -17,14 +17,12 @@
 
 package org.apache.spark.storage
 
-import java.io.File
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, Semaphore, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 
-import org.apache.commons.io.FileUtils
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
@@ -354,78 +352,4 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     import scala.language.reflectiveCalls
     assert(listener.removeReasonValidated)
   }
-
-  test("SPARK-46957: Migrated shuffle files should be able to cleanup from executor") {
-
-    val sparkTempDir = System.getProperty("java.io.tmpdir")
-
-    def shuffleFiles: Seq[File] = {
-      FileUtils
-        .listFiles(new File(sparkTempDir), Array("data", "index"), true)
-        .asScala
-        .toSeq
-    }
-
-    val existingShuffleFiles = shuffleFiles
-
-    val conf = new SparkConf()
-      .setAppName("SPARK-46957")
-      .setMaster("local-cluster[2,1,1024]")
-      .set(config.DECOMMISSION_ENABLED, true)
-      .set(config.STORAGE_DECOMMISSION_ENABLED, true)
-      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
-    sc = new SparkContext(conf)
-    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
-    val shuffleBlockUpdates = new ArrayBuffer[BlockId]()
-    var isDecommissionedExecutorRemoved = false
-    val execToDecommission = sc.getExecutorIds().head
-    sc.addSparkListener(new SparkListener {
-      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
-        if (blockUpdated.blockUpdatedInfo.blockId.isShuffle) {
-          shuffleBlockUpdates += blockUpdated.blockUpdatedInfo.blockId
-        }
-      }
-
-      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
-        assert(execToDecommission === executorRemoved.executorId)
-        isDecommissionedExecutorRemoved = true
-      }
-    })
-
-    // Run a job to create shuffle data
-    val result = sc.parallelize(1 to 1000, 10)
-      .map { i => (i % 2, i) }
-      .reduceByKey(_ + _).collect()
-
-    assert(result.head === (0, 250500))
-    assert(result.tail.head === (1, 250000))
-    sc.schedulerBackend
-      .asInstanceOf[StandaloneSchedulerBackend]
-      .decommissionExecutor(
-        execToDecommission,
-        ExecutorDecommissionInfo("test", None),
-        adjustTargetNumExecutors = true
-      )
-
-    eventually(timeout(1.minute), interval(10.milliseconds)) {
-      assert(isDecommissionedExecutorRemoved)
-      // Ensure there are shuffle data have been migrated
-      assert(shuffleBlockUpdates.size >= 2)
-    }
-
-    val shuffleId = shuffleBlockUpdates
-      .find(_.isInstanceOf[ShuffleIndexBlockId])
-      .map(_.asInstanceOf[ShuffleIndexBlockId].shuffleId)
-      .get
-
-    val newShuffleFiles = shuffleFiles.diff(existingShuffleFiles)
-    assert(newShuffleFiles.size >= shuffleBlockUpdates.size)
-
-    // Remove the shuffle data
-    sc.shuffleDriverComponents.removeShuffle(shuffleId, true)
-
-    eventually(timeout(1.minute), interval(10.milliseconds)) {
-      assert(newShuffleFiles.intersect(shuffleFiles).isEmpty)
-    }
-  }
 }

From b28ddb176fd87aa0bab4afe5e0db4fc4c3ec9c59 Mon Sep 17 00:00:00 2001
From: Yi Wu <yi.wu@databricks.com>
Date: Thu, 27 Jun 2024 23:54:35 +0800
Subject: [PATCH 372/521] [SPARK-46957][CORE][3.5][3.4] Decommission migrated
 shuffle files should be able to cleanup from executor

### What changes were proposed in this pull request?

This PR uses `SortShuffleManager#taskIdMapsForShuffle` to track the migrated shuffle files on the destination executor.

### Why are the changes needed?

This is a long-standing bug in decommission where the migrated shuffle files can't be cleaned up from the executor. Normally, the shuffle files are tracked by `taskIdMapsForShuffle` during the map task execution. Upon receiving the `RemoveShuffle(shuffleId)` request from driver, executor can clean up those shuffle files by searching `taskIdMapsForShuffle`. However, for the migrated shuffle files by decommission, they lose the track in the destination executor's  `taskIdMapsForShuffle` and can't be deleted as a result.

Note this bug only affects shuffle removal on the executor. For shuffle removal on the external shuffle service (when `spark.shuffle.service.removeShuffle` enabled and the executor stores the shuffle files has gone), we don't rely on `taskIdMapsForShuffle` but using the specific shuffle block id to locate the shuffle file directly. So it won't be an issue there.

### Does this PR introduce _any_ user-facing change?

No. (Common users won't see the difference underlying.)

### How was this patch tested?

Add unit test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47122 from Ngone51/SPARK-46957-3.5.

Authored-by: Yi Wu <yi.wu@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../LocalDiskShuffleExecutorComponents.java   |  6 +-
 .../shuffle/IndexShuffleBlockResolver.scala   | 20 ++++-
 .../shuffle/sort/SortShuffleManager.scala     |  3 +-
 .../sort/UnsafeShuffleWriterSuite.java        |  6 +-
 ...kManagerDecommissionIntegrationSuite.scala | 76 +++++++++++++++++++
 5 files changed, 106 insertions(+), 5 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
index eb4d9d9abc8e3..38f0a60f8b0dd 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/io/LocalDiskShuffleExecutorComponents.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.shuffle.sort.io;
 
+import java.util.Collections;
 import java.util.Map;
 import java.util.Optional;
 
@@ -56,7 +57,10 @@ public void initializeExecutor(String appId, String execId, Map<String, String>
     if (blockManager == null) {
       throw new IllegalStateException("No blockManager available from the SparkEnv.");
     }
-    blockResolver = new IndexShuffleBlockResolver(sparkConf, blockManager);
+    blockResolver =
+      new IndexShuffleBlockResolver(
+        sparkConf, blockManager, Collections.emptyMap() /* Shouldn't be accessed */
+      );
   }
 
   @Override
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 919b0f5f7c135..34eea575bbfd2 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -21,6 +21,7 @@ import java.io._
 import java.nio.ByteBuffer
 import java.nio.channels.Channels
 import java.nio.file.Files
+import java.util.{Collections, Map => JMap}
 
 import scala.collection.mutable.ArrayBuffer
 
@@ -37,6 +38,7 @@ import org.apache.spark.serializer.SerializerManager
 import org.apache.spark.shuffle.IndexShuffleBlockResolver.NOOP_REDUCE_ID
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.OpenHashSet
 
 /**
  * Create and maintain the shuffle blocks' mapping between logic block and physical file location.
@@ -52,7 +54,8 @@ import org.apache.spark.util.Utils
 private[spark] class IndexShuffleBlockResolver(
     conf: SparkConf,
     // var for testing
-    var _blockManager: BlockManager = null)
+    var _blockManager: BlockManager = null,
+    val taskIdMapsForShuffle: JMap[Int, OpenHashSet[Long]] = Collections.emptyMap())
   extends ShuffleBlockResolver
   with Logging with MigratableResolver {
 
@@ -270,6 +273,21 @@ private[spark] class IndexShuffleBlockResolver(
             throw SparkCoreErrors.failedRenameTempFileError(fileTmp, file)
           }
         }
+        blockId match {
+          case ShuffleIndexBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case ShuffleDataBlockId(shuffleId, mapId, _) =>
+            val mapTaskIds = taskIdMapsForShuffle.computeIfAbsent(
+              shuffleId, _ => new OpenHashSet[Long](8)
+            )
+            mapTaskIds.add(mapId)
+
+          case _ => // Unreachable
+        }
         blockManager.reportBlockStatus(blockId, BlockStatus(StorageLevel.DISK_ONLY, 0, diskSize))
       }
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 79dff6f87534a..4234d0ec5fd04 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -87,7 +87,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
 
   private lazy val shuffleExecutorComponents = loadShuffleExecutorComponents(conf)
 
-  override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
+  override val shuffleBlockResolver =
+    new IndexShuffleBlockResolver(conf, taskIdMapsForShuffle = taskIdMapsForShuffle)
 
   /**
    * Obtains a [[ShuffleHandle]] to pass to tasks.
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 1fa17b908699f..472d03baeae05 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -314,7 +314,8 @@ public void writeWithoutSpilling() throws Exception {
 
   @Test
   public void writeChecksumFileWithoutSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Collections.emptyMap());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
@@ -344,7 +345,8 @@ public void writeChecksumFileWithoutSpill() throws Exception {
 
   @Test
   public void writeChecksumFileWithSpill() throws Exception {
-    IndexShuffleBlockResolver blockResolver = new IndexShuffleBlockResolver(conf, blockManager);
+    IndexShuffleBlockResolver blockResolver =
+      new IndexShuffleBlockResolver(conf, blockManager, Collections.emptyMap());
     ShuffleChecksumBlockId checksumBlockId =
       new ShuffleChecksumBlockId(0, 0, IndexShuffleBlockResolver.NOOP_REDUCE_ID());
     String checksumAlgorithm = conf.get(package$.MODULE$.SHUFFLE_CHECKSUM_ALGORITHM());
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index d9d2e6102f120..2ba348222f7be 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.storage
 
+import java.io.File
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, Semaphore, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.duration._
 
+import org.apache.commons.io.FileUtils
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
@@ -352,4 +354,78 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     import scala.language.reflectiveCalls
     assert(listener.removeReasonValidated)
   }
+
+  test("SPARK-46957: Migrated shuffle files should be able to cleanup from executor") {
+
+    val sparkTempDir = System.getProperty("java.io.tmpdir")
+
+    def shuffleFiles: Seq[File] = {
+      FileUtils
+        .listFiles(new File(sparkTempDir), Array("data", "index"), true)
+        .asScala
+        .toSeq
+    }
+
+    val existingShuffleFiles = shuffleFiles
+
+    val conf = new SparkConf()
+      .setAppName("SPARK-46957")
+      .setMaster("local-cluster[2,1,1024]")
+      .set(config.DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_ENABLED, true)
+      .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
+    sc = new SparkContext(conf)
+    TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
+    val shuffleBlockUpdates = new ArrayBuffer[BlockId]()
+    var isDecommissionedExecutorRemoved = false
+    val execToDecommission = sc.getExecutorIds().head
+    sc.addSparkListener(new SparkListener {
+      override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
+        if (blockUpdated.blockUpdatedInfo.blockId.isShuffle) {
+          shuffleBlockUpdates += blockUpdated.blockUpdatedInfo.blockId
+        }
+      }
+
+      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+        assert(execToDecommission === executorRemoved.executorId)
+        isDecommissionedExecutorRemoved = true
+      }
+    })
+
+    // Run a job to create shuffle data
+    val result = sc.parallelize(1 to 1000, 10)
+      .map { i => (i % 2, i) }
+      .reduceByKey(_ + _).collect()
+
+    assert(result.head === (0, 250500))
+    assert(result.tail.head === (1, 250000))
+    sc.schedulerBackend
+      .asInstanceOf[StandaloneSchedulerBackend]
+      .decommissionExecutor(
+        execToDecommission,
+        ExecutorDecommissionInfo("test", None),
+        adjustTargetNumExecutors = true
+      )
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(isDecommissionedExecutorRemoved)
+      // Ensure there are shuffle data have been migrated
+      assert(shuffleBlockUpdates.size >= 2)
+    }
+
+    val shuffleId = shuffleBlockUpdates
+      .find(_.isInstanceOf[ShuffleIndexBlockId])
+      .map(_.asInstanceOf[ShuffleIndexBlockId].shuffleId)
+      .get
+
+    val newShuffleFiles = shuffleFiles.diff(existingShuffleFiles)
+    assert(newShuffleFiles.size >= shuffleBlockUpdates.size)
+
+    // Remove the shuffle data
+    sc.shuffleDriverComponents.removeShuffle(shuffleId, true)
+
+    eventually(timeout(1.minute), interval(10.milliseconds)) {
+      assert(newShuffleFiles.intersect(shuffleFiles).isEmpty)
+    }
+  }
 }

From 3d7a20a08ec104e98f15fa0926547086904a0fe8 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 28 Jun 2024 00:12:13 +0800
Subject: [PATCH 373/521] [SPARK-48709][SQL][3.5] Fix varchar type resolution
 mismatch for DataSourceV2 CTAS

Backport of #47082.

### What changes were proposed in this pull request?

This PR fixes varchar type resolution mismatch for DataSourceV2 CTAS. For example:
```sql
set spark.sql.storeAssignmentPolicy=LEGACY;
CREATE TABLE testcat.ns.t1 (d1 string, d2 varchar(200)) USING parquet;
CREATE TABLE testcat.ns.t2 USING foo as select * from testcat.ns.t1
```
Error message:
```
org.apache.spark.sql.AnalysisException: LEGACY store assignment policy is disallowed in Spark data source V2. Please set the configuration spark.sql.storeAssignmentPolicy to other values.
```

### Why are the changes needed?

Avoid query failures.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Unit test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47103 from wangyum/SPARK-48709-branch-3.5.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/catalyst/plans/logical/v2Commands.scala  |  3 ++-
 .../spark/sql/connector/DataSourceV2SQLSuite.scala     | 10 ++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 739ffa487e393..805f277cf9f6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -62,10 +62,11 @@ trait V2WriteCommand extends UnaryCommand with KeepAnalyzedQuery {
     table.skipSchemaResolution || (query.output.size == table.output.size &&
       query.output.zip(table.output).forall {
         case (inAttr, outAttr) =>
+          val inType = CharVarcharUtils.getRawType(inAttr.metadata).getOrElse(inAttr.dataType)
           val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
           // names and types must match, nullability must be compatible
           inAttr.name == outAttr.name &&
-            DataType.equalsIgnoreCompatibleNullability(inAttr.dataType, outType) &&
+            DataType.equalsIgnoreCompatibleNullability(inType, outType) &&
             (outAttr.nullable || !inAttr.nullable)
       })
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 6df71d08ef726..1db63f3197288 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1736,6 +1736,16 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-48709: varchar resolution mismatch for DataSourceV2 CTAS") {
+    withSQLConf(
+      SQLConf.STORE_ASSIGNMENT_POLICY.key -> SQLConf.StoreAssignmentPolicy.LEGACY.toString) {
+      withTable("testcat.ns.t1", "testcat.ns.t2") {
+        sql("CREATE TABLE testcat.ns.t1 (d1 string, d2 varchar(200)) USING parquet")
+        sql("CREATE TABLE testcat.ns.t2 USING foo as select * from testcat.ns.t1")
+      }
+    }
+  }
+
   test("ShowCurrentNamespace: basic tests") {
     def testShowCurrentNamespace(expectedCatalogName: String, expectedNamespace: String): Unit = {
       val schema = new StructType()

From ade9dbfc504c52dc7a05989bdede599aaca2462d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=C3=B8rn=20J=C3=B8rgensen?= <bjornjorgensen@gmail.com>
Date: Fri, 28 Jun 2024 11:10:22 +0800
Subject: [PATCH 374/521] [SPARK-48494][BUILD][3.5] Update
 `airlift:aircompressor` to 0.27
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
upgrade airlift:aircompressor from 0.26 to 0.27
For branch 3.5

### Why are the changes needed?
[CVE-2024-36114](https://www.cve.org/CVERecord?id=CVE-2024-36114)

[Decompressors can crash the JVM and leak memory content](https://github.com/airlift/aircompressor/security/advisories/GHSA-973x-65j7-xcf4)

The fix https://github.com/airlift/aircompressor/commit/d01ecb779375a092d00e224abe7869cdf49ddc3e

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
pass GA

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47128 from bjornjorgensen/branch3.5aircompressor0.27.

Authored-by: Bjørn Jørgensen <bjornjorgensen@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 378cdb121150f..6f8054ae900b3 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -4,7 +4,7 @@ JTransforms/3.1//JTransforms-3.1.jar
 RoaringBitmap/0.9.45//RoaringBitmap-0.9.45.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.26//aircompressor-0.26.jar
+aircompressor/0.27//aircompressor-0.27.jar
 algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
 aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
 aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
diff --git a/pom.xml b/pom.xml
index 6bb764e0c28c2..f1a7a1618073a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2586,7 +2586,7 @@
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.26</version>
+        <version>0.27</version>
       </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>

From 686f59c802105ea7bc1f9af50f9a1bdbd84e336d Mon Sep 17 00:00:00 2001
From: Riya Verma <riya.verma@databricks.com>
Date: Fri, 28 Jun 2024 16:59:21 +0900
Subject: [PATCH 375/521] [SPARK-48586][SS][3.5] Remove lock acquisition in
 doMaintenance() by making a deep copy of file mappings in RocksDBFileManager
 in load()

Backports #46942 to 3.5

### What changes were proposed in this pull request?
When change log checkpointing is enabled, the lock of the **RocksDB** state store is acquired when uploading the snapshot inside maintenance tasks, which causes lock contention between query processing tasks and state maintenance thread. This PR fixes lock contention issue introduced by https://github.com/apache/spark/pull/45724.

The changes include:
1. Removing lock acquisition in `doMaintenance()`
2. Adding a `copyFileMappings()` method to **RocksDBFileManager**,  and using this method to deep copy the file manager state, specifically the file mappings `versionToRocksDBFiles` and `localFilesToDfsFiles`,  in `load()`
3. Capture the reference to the file mappings  in `commit()`.

### Why are the changes needed?
We want to eliminate lock contention to decrease latency of streaming queries so lock acquisition inside maintenance tasks should be avoided. This can introduce race conditions between task and maintenance threads. By making a deep copy of `versionToRocksDBFiles` and `localFilesToDfsFiles` in **RocksDBFileManager**, we can ensure that the file manager state is not updated by task thread when background snapshot uploading tasks attempt to upload a snapshot.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?

Added unit test cases.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47130 from riyaverm-db/remove-lock-contention-between-maintenance-and-task-3.5.

Lead-authored-by: Riya Verma <riya.verma@databricks.com>
Co-authored-by: Riya Verma <170376104+riyaverm-db@users.noreply.github.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../execution/streaming/state/RocksDB.scala   | 58 +++++++-----
 .../streaming/state/RocksDBFileManager.scala  | 88 ++++++++++++------
 .../streaming/state/RocksDBSuite.scala        | 90 ++++++++++++++++++-
 3 files changed, 183 insertions(+), 53 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 301d978c90386..6c0447e1a325a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.{mutable, Map}
+import scala.collection.mutable.ListBuffer
 import scala.ref.WeakReference
 import scala.util.Try
 
@@ -57,7 +58,11 @@ class RocksDB(
     hadoopConf: Configuration = new Configuration,
     loggingId: String = "") extends Logging {
 
-  case class RocksDBSnapshot(checkpointDir: File, version: Long, numKeys: Long) {
+  case class RocksDBSnapshot(
+      checkpointDir: File,
+      version: Long,
+      numKeys: Long,
+      capturedFileMappings: RocksDBFileMappings) {
     def close(): Unit = {
       silentDeleteRecursively(checkpointDir, s"Free up local checkpoint of snapshot $version")
     }
@@ -65,6 +70,7 @@ class RocksDB(
 
   @volatile private var latestSnapshot: Option[RocksDBSnapshot] = None
   @volatile private var lastSnapshotVersion = 0L
+  private val oldSnapshots = new ListBuffer[RocksDBSnapshot]
 
   RocksDBLoader.loadLibrary()
 
@@ -148,6 +154,9 @@ class RocksDB(
     try {
       if (loadedVersion != version) {
         closeDB()
+        // deep copy is needed to avoid race condition
+        // between maintenance and task threads
+        fileManager.copyFileMapping()
         val latestSnapshotVersion = fileManager.getLatestSnapshotVersion(version)
         val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion, workingDir)
         loadedVersion = latestSnapshotVersion
@@ -156,7 +165,6 @@ class RocksDB(
         if (lastSnapshotVersion > latestSnapshotVersion) {
           // discard any newer snapshots
           lastSnapshotVersion = 0L
-          latestSnapshot = None
         }
         openDB()
 
@@ -368,10 +376,17 @@ class RocksDB(
           // inside the uploadSnapshot() called below.
           // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
           // during state store maintenance.
-          latestSnapshot.foreach(_.close())
-          latestSnapshot = Some(
-            RocksDBSnapshot(checkpointDir, newVersion, numKeysOnWritingVersion))
-          lastSnapshotVersion = newVersion
+          synchronized {
+            if (latestSnapshot.isDefined) {
+              oldSnapshots += latestSnapshot.get
+            }
+            latestSnapshot = Some(
+              RocksDBSnapshot(checkpointDir,
+                newVersion,
+                numKeysOnWritingVersion,
+                fileManager.captureFileMapReference()))
+            lastSnapshotVersion = newVersion
+          }
         }
       }
 
@@ -421,22 +436,34 @@ class RocksDB(
   }
 
   private def uploadSnapshot(): Unit = {
+    var oldSnapshotsImmutable: List[RocksDBSnapshot] = Nil
     val localCheckpoint = synchronized {
       val checkpoint = latestSnapshot
       latestSnapshot = None
+
+      // Convert mutable list buffer to immutable to prevent
+      // race condition with commit where old snapshot is added
+      oldSnapshotsImmutable = oldSnapshots.toList
+      oldSnapshots.clear()
+
       checkpoint
     }
     localCheckpoint match {
-      case Some(RocksDBSnapshot(localDir, version, numKeys)) =>
+      case Some(RocksDBSnapshot(localDir, version, numKeys, capturedFileMappings)) =>
         try {
           val uploadTime = timeTakenMs {
-            fileManager.saveCheckpointToDfs(localDir, version, numKeys)
+            fileManager.saveCheckpointToDfs(localDir, version, numKeys, capturedFileMappings)
             fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
           }
           logInfo(s"$loggingId: Upload snapshot of version $version," +
             s" time taken: $uploadTime ms")
         } finally {
           localCheckpoint.foreach(_.close())
+
+          // Clean up old latestSnapshots
+          for (snapshot <- oldSnapshotsImmutable) {
+            snapshot.close()
+          }
         }
       case _ =>
     }
@@ -457,20 +484,7 @@ class RocksDB(
 
   def doMaintenance(): Unit = {
     if (enableChangelogCheckpointing) {
-      // There is race to update latestSnapshot between load(), commit()
-      // and uploadSnapshot().
-      // The load method will reset latestSnapshot to discard any snapshots taken
-      // from newer versions (when a old version is reloaded).
-      // commit() method deletes the existing snapshot while creating a new snapshot.
-      // In order to ensure that the snapshot being uploaded would not be modified
-      // concurrently, we need to synchronize the snapshot access between task thread
-      // and maintenance thread.
-      acquire()
-      try {
-        uploadSnapshot()
-      } finally {
-        release()
-      }
+      uploadSnapshot()
     }
     val cleanupTime = timeTakenMs {
       fileManager.deleteOldVersions(conf.minVersionsToRetain)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index c527a6a03ae97..b4fe3e22e8882 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -131,16 +131,6 @@ class RocksDBFileManager(
 
   import RocksDBImmutableFile._
 
-  private val versionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
-
-
-  // used to keep a mapping of the exact Dfs file that was used to create a local SST file.
-  // The reason this is a separate map because versionToRocksDBFiles can contain multiple similar
-  // SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst in v1 and
-  // 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID compatibility
-  // across SST files and RocksDB manifest.
-  private[sql] val localFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
-
   private lazy val fm = CheckpointFileManager.create(new Path(dfsRootDir), hadoopConf)
   private val fs = new Path(dfsRootDir).getFileSystem(hadoopConf)
   private val onlyZipFiles = new PathFilter {
@@ -154,6 +144,30 @@ class RocksDBFileManager(
 
   private def codec = CompressionCodec.createCodec(sparkConf, codecName)
 
+  @volatile private var fileMappings = RocksDBFileMappings(
+    new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
+    new ConcurrentHashMap[String, RocksDBImmutableFile]
+  )
+
+  /**
+   * Make a deep copy of versionToRocksDBFiles and localFilesToDfsFiles to avoid
+   * current task thread from overwriting the file mapping whenever background maintenance
+   * thread attempts to upload a snapshot
+   */
+  def copyFileMapping() : Unit = {
+    val newVersionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
+    val newLocalFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
+
+    newVersionToRocksDBFiles.putAll(fileMappings.versionToRocksDBFiles)
+    newLocalFilesToDfsFiles.putAll(fileMappings.localFilesToDfsFiles)
+
+    fileMappings = RocksDBFileMappings(newVersionToRocksDBFiles, newLocalFilesToDfsFiles)
+  }
+
+  def captureFileMapReference(): RocksDBFileMappings = {
+    fileMappings
+  }
+
   def getChangeLogWriter(version: Long): StateStoreChangelogWriter = {
     val rootDir = new Path(dfsRootDir)
     val changelogFile = dfsChangelogFile(version)
@@ -185,10 +199,14 @@ class RocksDBFileManager(
   def latestSaveCheckpointMetrics: RocksDBFileManagerMetrics = saveCheckpointMetrics
 
   /** Save all the files in given local checkpoint directory as a committed version in DFS */
-  def saveCheckpointToDfs(checkpointDir: File, version: Long, numKeys: Long): Unit = {
+  def saveCheckpointToDfs(
+      checkpointDir: File,
+      version: Long,
+      numKeys: Long,
+      capturedFileMappings: RocksDBFileMappings): Unit = {
     logFilesInDir(checkpointDir, s"Saving checkpoint files for version $version")
     val (localImmutableFiles, localOtherFiles) = listRocksDBFiles(checkpointDir)
-    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles)
+    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles, capturedFileMappings)
     val metadata = RocksDBCheckpointMetadata(rocksDBFiles, numKeys)
     val metadataFile = localMetadataFile(checkpointDir)
     metadata.writeToFile(metadataFile)
@@ -219,10 +237,10 @@ class RocksDBFileManager(
     // The unique ids of SST files are checked when opening a rocksdb instance. The SST files
     // in larger versions can't be reused even if they have the same size and name because
     // they belong to another rocksdb instance.
-    versionToRocksDBFiles.keySet().removeIf(_ >= version)
+    fileMappings.versionToRocksDBFiles.keySet().removeIf(_ >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
-      localFilesToDfsFiles.clear()
+      fileMappings.localFilesToDfsFiles.clear()
       localDir.mkdirs()
       RocksDBCheckpointMetadata(Seq.empty, 0)
     } else {
@@ -235,7 +253,7 @@ class RocksDBFileManager(
       val metadata = RocksDBCheckpointMetadata.readFromFile(metadataFile)
       logInfo(s"Read metadata for version $version:\n${metadata.prettyJson}")
       loadImmutableFilesFromDfs(metadata.immutableFiles, localDir)
-      versionToRocksDBFiles.put(version, metadata.immutableFiles)
+      fileMappings.versionToRocksDBFiles.put(version, metadata.immutableFiles)
       metadataFile.delete()
       metadata
     }
@@ -389,9 +407,9 @@ class RocksDBFileManager(
     // Resolve RocksDB files for all the versions and find the max version each file is used
     val fileToMaxUsedVersion = new mutable.HashMap[String, Long]
     sortedSnapshotVersions.foreach { version =>
-      val files = Option(versionToRocksDBFiles.get(version)).getOrElse {
+      val files = Option(fileMappings.versionToRocksDBFiles.get(version)).getOrElse {
         val newResolvedFiles = getImmutableFilesFromVersionZip(version)
-        versionToRocksDBFiles.put(version, newResolvedFiles)
+        fileMappings.versionToRocksDBFiles.put(version, newResolvedFiles)
         newResolvedFiles
       }
       files.foreach(f => fileToMaxUsedVersion(f.dfsFileName) =
@@ -436,7 +454,7 @@ class RocksDBFileManager(
       val versionFile = dfsBatchZipFile(version)
       try {
         fm.delete(versionFile)
-        versionToRocksDBFiles.remove(version)
+        fileMappings.versionToRocksDBFiles.remove(version)
         logDebug(s"Deleted version $version")
       } catch {
         case e: Exception =>
@@ -455,7 +473,8 @@ class RocksDBFileManager(
   /** Save immutable files to DFS directory */
   private def saveImmutableFilesToDfs(
       version: Long,
-      localFiles: Seq[File]): Seq[RocksDBImmutableFile] = {
+      localFiles: Seq[File],
+      capturedFileMappings: RocksDBFileMappings): Seq[RocksDBImmutableFile] = {
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
     logInfo(s"Saving RocksDB files to DFS for $version")
@@ -465,7 +484,8 @@ class RocksDBFileManager(
     var filesReused = 0L
 
     val immutableFiles = localFiles.map { localFile =>
-      val existingDfsFile = localFilesToDfsFiles.asScala.get(localFile.getName)
+      val existingDfsFile =
+        capturedFileMappings.localFilesToDfsFiles.asScala.get(localFile.getName)
       if (existingDfsFile.isDefined && existingDfsFile.get.sizeBytes == localFile.length()) {
         val dfsFile = existingDfsFile.get
         filesReused += 1
@@ -487,14 +507,14 @@ class RocksDBFileManager(
         bytesCopied += localFileSize
 
         val immutableDfsFile = RocksDBImmutableFile(localFile.getName, dfsFileName, localFileSize)
-        localFilesToDfsFiles.put(localFileName, immutableDfsFile)
+        capturedFileMappings.localFilesToDfsFiles.put(localFileName, immutableDfsFile)
 
         immutableDfsFile
       }
     }
     logInfo(s"Copied $filesCopied files ($bytesCopied bytes) from local to" +
       s" DFS for version $version. $filesReused files reused without copying.")
-    versionToRocksDBFiles.put(version, immutableFiles)
+    capturedFileMappings.versionToRocksDBFiles.put(version, immutableFiles)
 
     // Cleanup locally deleted files from the localFilesToDfsFiles map
     // Locally, SST Files can be deleted due to RocksDB compaction. These files need
@@ -534,7 +554,7 @@ class RocksDBFileManager(
       .foreach { existingFile =>
         val existingFileSize = existingFile.length()
         val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
-        val prevDfsFile = localFilesToDfsFiles.asScala.get(existingFile.getName)
+        val prevDfsFile = fileMappings.localFilesToDfsFiles.asScala.get(existingFile.getName)
         val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
           requiredFile.get.dfsFileName == prevDfsFile.get.dfsFileName &&
             existingFile.length() == requiredFile.get.sizeBytes
@@ -544,7 +564,7 @@ class RocksDBFileManager(
 
         if (!isSameFile) {
           existingFile.delete()
-          localFilesToDfsFiles.remove(existingFile.getName)
+          fileMappings.localFilesToDfsFiles.remove(existingFile.getName)
           logInfo(s"Deleted local file $existingFile with size $existingFileSize mapped" +
             s" to previous dfsFile ${prevDfsFile.getOrElse("null")}")
         } else {
@@ -574,7 +594,7 @@ class RocksDBFileManager(
         }
         filesCopied += 1
         bytesCopied += localFileSize
-        localFilesToDfsFiles.put(localFileName, file)
+        fileMappings.localFilesToDfsFiles.put(localFileName, file)
         logInfo(s"Copied $dfsFile to $localFile - $localFileSize bytes")
       } else {
         filesReused += 1
@@ -592,13 +612,13 @@ class RocksDBFileManager(
   private def removeLocallyDeletedSSTFilesFromDfsMapping(localFiles: Seq[File]): Unit = {
     // clean up deleted SST files from the localFilesToDfsFiles Map
     val currentLocalFiles = localFiles.map(_.getName).toSet
-    val mappingsToClean = localFilesToDfsFiles.asScala
+    val mappingsToClean = fileMappings.localFilesToDfsFiles.asScala
       .keys
       .filterNot(currentLocalFiles.contains)
 
     mappingsToClean.foreach { f =>
       logInfo(s"cleaning $f from the localFilesToDfsFiles map")
-      localFilesToDfsFiles.remove(f)
+      fileMappings.localFilesToDfsFiles.remove(f)
     }
   }
 
@@ -705,6 +725,20 @@ class RocksDBFileManager(
   }
 }
 
+/**
+ * Track file mappings in RocksDB across local and remote directories
+ * @param versionToRocksDBFiles Mapping of RocksDB files used across versions for maintenance
+ * @param localFilesToDfsFiles Mapping of the exact Dfs file used to create a local SST file
+ * The reason localFilesToDfsFiles is a separate map because versionToRocksDBFiles can contain
+ *  multiple similar SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst
+ * in v1 and 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID
+ * compatibility across SST files and RocksDB manifest.
+ */
+
+case class RocksDBFileMappings(
+    versionToRocksDBFiles: ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
+    localFilesToDfsFiles: ConcurrentHashMap[String, RocksDBImmutableFile])
+
 /**
  * Metrics regarding RocksDB file sync between local and DFS.
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 89b4925db707b..973c1e0cb3b0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -19,8 +19,11 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
 import java.nio.charset.Charset
+import java.util.concurrent.Executors
 
 import scala.collection.mutable
+import scala.concurrent.{ExecutionContext, Future}
+import scala.concurrent.duration._
 import scala.language.implicitConversions
 
 import org.apache.commons.io.FileUtils
@@ -477,6 +480,41 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithChangelogCheckpointingEnabled("RocksDBFileManager: " +
+    "background snapshot upload doesn't acquire RocksDB instance lock") {
+    // Create a custom ExecutionContext
+    implicit val ec: ExecutionContext = ExecutionContext
+      .fromExecutor(Executors.newSingleThreadExecutor())
+
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(lockAcquireTimeoutMs = 10000, minDeltasForSnapshot = 0)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+
+    withDB(remoteDir, conf = conf) { db =>
+      db.load(0)
+      db.put("0", "0")
+      db.commit()
+
+      // Acquire lock
+      db.load(1)
+      db.put("1", "1")
+
+      // Run doMaintenance in another thread
+      val maintenanceFuture = Future {
+        db.doMaintenance()
+      }
+
+      val timeout = 5.seconds
+
+      // Ensure that maintenance task runs without being blocked by task thread
+      ThreadUtils.awaitResult(maintenanceFuture, timeout)
+      assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+      // Release lock
+      db.commit()
+    }
+  }
+
   testWithChangelogCheckpointingEnabled("RocksDBFileManager: read and write changelog") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
     val fileManager = new RocksDBFileManager(
@@ -1290,7 +1328,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
   }
 
   testWithChangelogCheckpointingEnabled("time travel 4 -" +
-    " validate successful RocksDB load") {
+    " validate successful RocksDB load when metadata file is overwritten") {
     val remoteDir = Utils.createTempDir().toString
     val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
     new File(remoteDir).delete() // to make sure that the directory gets created
@@ -1305,8 +1343,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       db.load(1)
       db.put("3", "3")
 
-      // do maintenance - upload any latest snapshots so far
-      // would fail to acquire lock and no snapshots would be uploaded
+      // upload any latest snapshots so far
       db.doMaintenance()
       db.commit()
       // upload newly created snapshot 2.zip
@@ -1318,6 +1355,47 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithChangelogCheckpointingEnabled("time travel 5 -" +
+    "validate successful RocksDB load when metadata file is not overwritten") {
+    // Ensure commit doesn't modify the latestSnapshot that doMaintenance will upload
+    val fmClass = "org.apache.spark.sql.execution.streaming.state." +
+      "NoOverwriteFileSystemBasedCheckpointFileManager"
+    withTempDir { dir =>
+      val conf = dbConf.copy(minDeltasForSnapshot = 0) // create snapshot every commit
+      val hadoopConf = new Configuration()
+      hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir, conf = conf, hadoopConf = hadoopConf) { db =>
+        db.load(0)
+        db.put("a", "1")
+        db.commit()
+
+        // load previous version, and recreate the snapshot
+        db.load(0)
+        db.put("a", "1")
+
+        // upload version 1 snapshot created above
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        db.commit() // create snapshot again
+
+        // load version 1 - should succeed
+        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+        }
+
+        // upload recently created snapshot
+        db.doMaintenance()
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        // load version 1 again - should succeed
+        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+        }
+      }
+    }
+  }
+
   test("validate Rocks DB SST files do not have a VersionIdMismatch" +
     " when metadata file is not overwritten - scenario 1") {
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
@@ -1614,7 +1692,11 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       numKeys: Int): Unit = {
     val checkpointDir = Utils.createTempDir().getAbsolutePath // local dir to create checkpoints
     generateFiles(checkpointDir, fileToLengths)
-    fileManager.saveCheckpointToDfs(checkpointDir, version, numKeys)
+    fileManager.saveCheckpointToDfs(
+      checkpointDir,
+      version,
+      numKeys,
+      fileManager.captureFileMapReference())
   }
 
   def loadAndVerifyCheckpointFiles(

From df70cc1797c60e913885106aa155f4047a070b2a Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Mon, 1 Jul 2024 13:22:52 -0700
Subject: [PATCH 376/521] [SPARK-48292][CORE][3.5] Revert [SPARK-39195][SQL]
 Spark OutputCommitCoordinator should abort stage when committed file not
 consistent with task status

This is a backport of #46696

### What changes were proposed in this pull request?
Revert #36564 According to discuss https://github.com/apache/spark/pull/36564#discussion_r1607575927

When spark commit task will commit to committedTaskPath
`${outputpath}/_temporary//${appAttempId}/${taskId}`
So in #36564 's case, since before #38980, each task's job id's date is not the same,  when the task writes data success but fails to send back TaskSuccess RPC, the task rerun will commit to a different committedTaskPath then causing data duplicated.

After #38980, for the same task's different attempts, the TaskId is the same now, when re-run task commit, will commit to the same committedTaskPath, and hadoop CommitProtocol will handle such case then data won't be duplicated.

Note: The taskAttemptPath is not same since in the path contains the taskAttemptId.

### Why are the changes needed?
No need anymore

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Existed UT

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47166 from dongjoon-hyun/SPARK-48292.

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/SparkContext.scala |  7 +------
 .../scala/org/apache/spark/SparkEnv.scala     | 12 +-----------
 .../scheduler/OutputCommitCoordinator.scala   | 12 ++++--------
 ...putCommitCoordinatorIntegrationSuite.scala | 11 +++++------
 .../OutputCommitCoordinatorSuite.scala        | 19 +++++++++----------
 5 files changed, 20 insertions(+), 41 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 4aea442bc3ce1..115f0663ef2b7 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -281,12 +281,7 @@ class SparkContext(config: SparkConf) extends Logging {
       conf: SparkConf,
       isLocal: Boolean,
       listenerBus: LiveListenerBus): SparkEnv = {
-    SparkEnv.createDriverEnv(
-      conf,
-      isLocal,
-      listenerBus,
-      SparkContext.numDriverCores(master, conf),
-      this)
+    SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master, conf))
   }
 
   private[spark] def env: SparkEnv = _env
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index 272a0a6332bbe..edad91a0c6f0d 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -169,7 +169,6 @@ object SparkEnv extends Logging {
       isLocal: Boolean,
       listenerBus: LiveListenerBus,
       numCores: Int,
-      sparkContext: SparkContext,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
     assert(conf.contains(DRIVER_HOST_ADDRESS),
       s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
@@ -192,7 +191,6 @@ object SparkEnv extends Logging {
       numCores,
       ioEncryptionKey,
       listenerBus = listenerBus,
-      Option(sparkContext),
       mockOutputCommitCoordinator = mockOutputCommitCoordinator
     )
   }
@@ -237,7 +235,6 @@ object SparkEnv extends Logging {
   /**
    * Helper method to create a SparkEnv for a driver or an executor.
    */
-  // scalastyle:off argcount
   private def create(
       conf: SparkConf,
       executorId: String,
@@ -248,9 +245,7 @@ object SparkEnv extends Logging {
       numUsableCores: Int,
       ioEncryptionKey: Option[Array[Byte]],
       listenerBus: LiveListenerBus = null,
-      sc: Option[SparkContext] = None,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
-    // scalastyle:on argcount
 
     val isDriver = executorId == SparkContext.DRIVER_IDENTIFIER
 
@@ -396,12 +391,7 @@ object SparkEnv extends Logging {
     }
 
     val outputCommitCoordinator = mockOutputCommitCoordinator.getOrElse {
-      if (isDriver) {
-        new OutputCommitCoordinator(conf, isDriver, sc)
-      } else {
-        new OutputCommitCoordinator(conf, isDriver)
-      }
-
+      new OutputCommitCoordinator(conf, isDriver)
     }
     val outputCommitCoordinatorRef = registerOrLookupEndpoint("OutputCommitCoordinator",
       new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator))
diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
index cd5d6b8f9c90d..a5858ebf9cdcc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
@@ -44,10 +44,7 @@ private case class AskPermissionToCommitOutput(
  * This class was introduced in SPARK-4879; see that JIRA issue (and the associated pull requests)
  * for an extensive design discussion.
  */
-private[spark] class OutputCommitCoordinator(
-    conf: SparkConf,
-    isDriver: Boolean,
-    sc: Option[SparkContext] = None) extends Logging {
+private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) extends Logging {
 
   // Initialized by SparkEnv
   var coordinatorRef: Option[RpcEndpointRef] = None
@@ -158,10 +155,9 @@ private[spark] class OutputCommitCoordinator(
         val taskId = TaskIdentifier(stageAttempt, attemptNumber)
         stageState.failures.getOrElseUpdate(partition, mutable.Set()) += taskId
         if (stageState.authorizedCommitters(partition) == taskId) {
-          sc.foreach(_.dagScheduler.stageFailed(stage, s"Authorized committer " +
-            s"(attemptNumber=$attemptNumber, stage=$stage, partition=$partition) failed; " +
-            s"but task commit success, data duplication may happen. " +
-            s"reason=$reason"))
+          logDebug(s"Authorized committer (attemptNumber=$attemptNumber, stage=$stage, " +
+            s"partition=$partition) failed; clearing lock")
+          stageState.authorizedCommitters(partition) = null
         }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index 45da750768fa9..7d063c3b3ac53 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -19,8 +19,9 @@ package org.apache.spark.scheduler
 
 import org.apache.hadoop.mapred.{FileOutputCommitter, TaskAttemptContext}
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
+import org.scalatest.time.{Seconds, Span}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite, TaskContext}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext}
 
 /**
  * Integration tests for the OutputCommitCoordinator.
@@ -44,15 +45,13 @@ class OutputCommitCoordinatorIntegrationSuite
     sc = new SparkContext("local[2, 4]", "test", conf)
   }
 
-  test("SPARK-39195: exception thrown in OutputCommitter.commitTask()") {
+  test("exception thrown in OutputCommitter.commitTask()") {
     // Regression test for SPARK-10381
-    val e = intercept[SparkException] {
+    failAfter(Span(60, Seconds)) {
       withTempDir { tempDir =>
         sc.parallelize(1 to 4, 2).map(_.toString).saveAsTextFile(tempDir.getAbsolutePath + "/out")
       }
-    }.getCause.getMessage
-    assert(e.contains("failed; but task commit success, data duplication may happen.") &&
-      e.contains("Intentional exception"))
+    }
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 44dc9a5f97dab..d84892be14af5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -87,12 +87,11 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
           isLocal: Boolean,
           listenerBus: LiveListenerBus): SparkEnv = {
         outputCommitCoordinator =
-          spy[OutputCommitCoordinator](
-            new OutputCommitCoordinator(conf, isDriver = true, Option(this)))
+          spy[OutputCommitCoordinator](new OutputCommitCoordinator(conf, isDriver = true))
         // Use Mockito.spy() to maintain the default infrastructure everywhere else.
         // This mocking allows us to control the coordinator responses in test cases.
         SparkEnv.createDriverEnv(conf, isLocal, listenerBus,
-          SparkContext.numDriverCores(master), this, Some(outputCommitCoordinator))
+          SparkContext.numDriverCores(master), Some(outputCommitCoordinator))
       }
     }
     // Use Mockito.spy() to maintain the default infrastructure everywhere else
@@ -190,9 +189,12 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // The authorized committer now fails, clearing the lock
     outputCommitCoordinator.taskCompleted(stage, stageAttempt, partition,
       attemptNumber = authorizedCommitter, reason = TaskKilled("test"))
-    // A new task should not be allowed to become stage failed because of potential data duplication
-    assert(!outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
+    // A new task should now be allowed to become the authorized committer
+    assert(outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
       nonAuthorizedCommitter + 2))
+    // There can only be one authorized committer
+    assert(!outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
+      nonAuthorizedCommitter + 3))
   }
 
   test("SPARK-19631: Do not allow failed attempts to be authorized for committing") {
@@ -226,8 +228,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(outputCommitCoordinator.canCommit(stage, 2, partition, taskAttempt))
 
     // Commit the 1st attempt, fail the 2nd attempt, make sure 3rd attempt cannot commit,
-    // then fail the 1st attempt and since stage failed because of potential data duplication,
-    // make sure fail the 4th attempt.
+    // then fail the 1st attempt and make sure the 4th one can commit again.
     stage += 1
     outputCommitCoordinator.stageStart(stage, maxPartitionId = 1)
     assert(outputCommitCoordinator.canCommit(stage, 1, partition, taskAttempt))
@@ -236,9 +237,7 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!outputCommitCoordinator.canCommit(stage, 3, partition, taskAttempt))
     outputCommitCoordinator.taskCompleted(stage, 1, partition, taskAttempt,
       ExecutorLostFailure("0", exitCausedByApp = true, None))
-    // A new task should not be allowed to become the authorized committer since stage failed
-    // because of potential data duplication
-    assert(!outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
+    assert(outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
   }
 
   test("SPARK-24589: Make sure stage state is cleaned up") {

From ef4e45646205ec7fb08485a348d6bb5879477cb8 Mon Sep 17 00:00:00 2001
From: Amanda Liu <amanda.liu@databricks.com>
Date: Tue, 2 Jul 2024 22:22:22 +0800
Subject: [PATCH 377/521] [SPARK-48759][SQL] Add migration doc for CREATE TABLE
 AS SELECT behavior change behavior change since Spark 3.4 (branch-3.5)

### What changes were proposed in this pull request?

This PR is a follow-up to #47152 against `branch-3.5`.

Add migration guide for `CREATE TABLE AS SELECT...` behavior change.

SPARK-41859 changes the behaviour for `CREATE TABLE AS SELECT ...` from OVERWRITE to APPEND when `spark.sql.legacy.allowNonEmptyLocationInCTAS` is set to `true`:

```
drop table if exists test_table;
create table test_table location '/tmp/test_table' stored as parquet as select 1 as col union all select 2 as col;
drop table if exists test_table;
create table test_table location '/tmp/test_table' stored as parquet as select 3 as col union all select 4 as col;
select * from test_table;

```
This produces {3, 4} in Spark <3.4.0 and {1, 2, 3, 4} in Spark 3.4.0 and later. This is a silent change in `spark.sql.legacy.allowNonEmptyLocationInCTAS` behaviour which introduces wrong results in the user application.

### Why are the changes needed?
This documents a behavior change starting in Spark 3.4 for `CREATE TABLE AS SELECT`

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
`doc build
`
### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47178 from asl3/allowNonEmptyLocationInCTAS-3.5.

Authored-by: Amanda Liu <amanda.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 docs/sql-migration-guide.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 3bb83750ef927..964f7de637e8b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -58,6 +58,7 @@ license: |
   - Since Spark 3.4, `BinaryType` is not supported in CSV datasource. In Spark 3.3 or earlier, users can write binary columns in CSV datasource, but the output content in CSV files is `Object.toString()` which is meaningless; meanwhile, if users read CSV tables with binary columns, Spark will throw an `Unsupported type: binary` exception.
   - Since Spark 3.4, bloom filter joins are enabled by default. To restore the legacy behavior, set `spark.sql.optimizer.runtime.bloomFilter.enabled` to `false`.
   - Since Spark 3.4, when schema inference on external Parquet files, INT64 timestamps with annotation `isAdjustedToUTC=false` will be inferred as TimestampNTZ type instead of Timestamp type. To restore the legacy behavior, set `spark.sql.parquet.inferTimestampNTZ.enabled` to `false`.
+  - Since Spark 3.4, the behavior for `CREATE TABLE AS SELECT ...` is changed from OVERWRITE to APPEND when `spark.sql.legacy.allowNonEmptyLocationInCTAS` is set to `true`. Users are recommended to avoid CTAS with a non-empty table location.
 
 ## Upgrading from Spark SQL 3.2 to 3.3
 

From 44eba46cc8b90be990177450141c48746fa5b67d Mon Sep 17 00:00:00 2001
From: Patrick Marx <6949483+codesorcery@users.noreply.github.com>
Date: Wed, 3 Jul 2024 09:22:11 +0900
Subject: [PATCH 378/521] [SPARK-48710][PYTHON][3.5] Limit NumPy version to
 supported range (>=1.15,<2)

### What changes were proposed in this pull request?
 * Add a constraint for `numpy<2` to the PySpark package

### Why are the changes needed?

PySpark references some code which was removed with NumPy 2.0. Thus, if `numpy>=2` is installed, executing PySpark may fail.

https://github.com/apache/spark/pull/47083 updates the `master` branch to be compatible with NumPy 2. This PR adds a version bound for older releases, where it won't be applied.

### Does this PR introduce _any_ user-facing change?
NumPy will be limited to `numpy<2` when installing `pypspark` with extras `ml`, `mllib`, `sql`, `pandas_on_spark` or `connect`.

### How was this patch tested?
Via existing CI jobs.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47175 from codesorcery/SPARK-48710-numpy-upper-bound.

Authored-by: Patrick Marx <6949483+codesorcery@users.noreply.github.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/setup.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/setup.py b/python/setup.py
index 2110c2839ca70..ddd961d0412b5 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -307,17 +307,17 @@ def run(self):
         # if you're updating the versions or dependencies.
         install_requires=["py4j==0.10.9.7"],
         extras_require={
-            "ml": ["numpy>=1.15"],
-            "mllib": ["numpy>=1.15"],
+            "ml": ["numpy>=1.15,<2"],
+            "mllib": ["numpy>=1.15,<2"],
             "sql": [
                 "pandas>=%s" % _minimum_pandas_version,
                 "pyarrow>=%s" % _minimum_pyarrow_version,
-                "numpy>=1.15",
+                "numpy>=1.15,<2",
             ],
             "pandas_on_spark": [
                 "pandas>=%s" % _minimum_pandas_version,
                 "pyarrow>=%s" % _minimum_pyarrow_version,
-                "numpy>=1.15",
+                "numpy>=1.15,<2",
             ],
             "connect": [
                 "pandas>=%s" % _minimum_pandas_version,
@@ -325,7 +325,7 @@ def run(self):
                 "grpcio>=%s" % _minimum_grpc_version,
                 "grpcio-status>=%s" % _minimum_grpc_version,
                 "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
-                "numpy>=1.15",
+                "numpy>=1.15,<2",
             ],
         },
         python_requires=">=3.8",

From 1cc0043fe549dcdf918d9dad21462a1e4714bb5d Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Thu, 4 Jul 2024 22:27:01 +0800
Subject: [PATCH 379/521] [SPARK-48806][SQL] Pass actual exception when
 url_decode fails

### What changes were proposed in this pull request?

Pass actual exception for url_decode.

Follow-up to https://issues.apache.org/jira/browse/SPARK-40156

### Why are the changes needed?

Currently url_decode function ignores actual exception, which contains information that is useful for quickly locating the problem.

Like executing this sql:
```
select url_decode('https%3A%2F%2spark.apache.org');
```
We only get the error message:
```
org.apache.spark.SparkIllegalArgumentException: [CANNOT_DECODE_URL] The provided URL cannot be decoded: https%3A%2F%2spark.apache.org. Please ensure that the URL is properly formatted and try again.
    at org.apache.spark.sql.errors.QueryExecutionErrors$.illegalUrlError(QueryExecutionErrors.scala:376)
    at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:118)
    at org.apache.spark.sql.catalyst.expressions.UrlCodec.decode(urlExpressions.scala)
```
However, the actual useful exception information is ignored:
```
java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 1 in: "2s"
```

After this pr we will get:

```
org.apache.spark.SparkIllegalArgumentException: [CANNOT_DECODE_URL] The provided URL cannot be decoded: https%3A%2F%2spark.apache.org. Please ensure that the URL is properly formatted and try again. SQLSTATE: 22546
	at org.apache.spark.sql.errors.QueryExecutionErrors$.illegalUrlError(QueryExecutionErrors.scala:372)
	at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:119)
	at org.apache.spark.sql.catalyst.expressions.UrlCodec.decode(urlExpressions.scala)
	...
Caused by: java.lang.IllegalArgumentException: URLDecoder: Illegal hex characters in escape (%) pattern - Error at index 1 in: "2s"
	at java.base/java.net.URLDecoder.decode(URLDecoder.java:237)
	at java.base/java.net.URLDecoder.decode(URLDecoder.java:147)
	at org.apache.spark.sql.catalyst.expressions.UrlCodec$.decode(urlExpressions.scala:116)
	... 135 more
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

unit test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47211 from wForget/SPARK-48806.

Lead-authored-by: wforget <643348094@qq.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 310f8ea2456dad7cec0f22bfed05a679764c3d7e)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/catalyst/expressions/urlExpressions.scala     |  2 +-
 .../spark/sql/errors/QueryExecutionErrors.scala       |  5 +++--
 .../org/apache/spark/sql/StringFunctionsSuite.scala   | 11 ++++++++++-
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
index 47b37a5edeba8..0dd14d313e17e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -115,7 +115,7 @@ object UrlCodec {
       UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString))
     } catch {
       case e: IllegalArgumentException =>
-        throw QueryExecutionErrors.illegalUrlError(src)
+        throw QueryExecutionErrors.illegalUrlError(src, e)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index f4968cd005708..798146839464c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -372,10 +372,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       cause = e)
   }
 
-  def illegalUrlError(url: UTF8String): Throwable = {
+  def illegalUrlError(url: UTF8String, e: IllegalArgumentException): Throwable = {
     new SparkIllegalArgumentException(
       errorClass = "CANNOT_DECODE_URL",
-      messageParameters = Map("url" -> url.toString)
+      messageParameters = Map("url" -> url.toString),
+      cause = e
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 4709c2b5e192c..fa1a64460fcb4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{SPARK_DOC_ROOT, SparkRuntimeException}
+import org.apache.spark.{SPARK_DOC_ROOT, SparkIllegalArgumentException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.execution.FormattedMode
 import org.apache.spark.sql.functions._
@@ -1191,4 +1191,13 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       )
     )
   }
+
+  test("SPARK-48806: url_decode exception") {
+    val e = intercept[SparkIllegalArgumentException] {
+      sql("select url_decode('https%3A%2F%2spark.apache.org')").collect()
+    }
+    assert(e.getCause.isInstanceOf[IllegalArgumentException] &&
+      e.getCause.getMessage
+        .startsWith("URLDecoder: Illegal hex characters in escape (%) pattern - "))
+  }
 }

From 67047cde8bb70e32f2b6f8162388240dee215aa4 Mon Sep 17 00:00:00 2001
From: Wei Guo <guow93@gmail.com>
Date: Mon, 8 Jul 2024 17:22:27 +0800
Subject: [PATCH 380/521] [SPARK-48719][SQL][3.5] Fix the calculation bug of
 RegrSlope & RegrIntercept when the first parameter is null

### What changes were proposed in this pull request?

This PR aims to fix the calculation bug of RegrSlope&RegrIntercept when the first parameter is null. Regardless of whether the first parameter(y) or the second parameter(x) is null, this tuple should be filtered out.

### Why are the changes needed?

Fix bug.

### Does this PR introduce _any_ user-facing change?

Yes, the calculation changes when the first value of a tuple is null, but the value is truly correct.

### How was this patch tested?

Pass GA and test with build/sbt "~sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z linear-regression.sql"

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47230 from wayneguow/SPARK-48719_3_5.

Authored-by: Wei Guo <guow93@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../aggregate/linearRegression.scala          | 20 ++++++++++++----
 .../linear-regression.sql.out                 |  4 ++--
 .../sql-tests/inputs/linear-regression.sql    |  2 +-
 .../results/linear-regression.sql.out         | 24 +++++++++----------
 4 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
index 40518982958cd..7d73cf211a6e7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
@@ -271,8 +271,14 @@ case class RegrSlope(left: Expression, right: Expression) extends DeclarativeAgg
 
   override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
 
-  override lazy val updateExpressions: Seq[Expression] =
-    covarPop.updateExpressions ++ varPop.updateExpressions
+  override lazy val updateExpressions: Seq[Expression] = {
+    // RegrSlope only handles paris where both y and x are non-empty, so we need additional
+    // judgment for calculating VariancePop.
+    val isNull = left.isNull || right.isNull
+    covarPop.updateExpressions ++ varPop.updateExpressions.zip(varPop.aggBufferAttributes).map {
+      case (newValue, oldValue) => If(isNull, oldValue, newValue)
+    }
+  }
 
   override lazy val mergeExpressions: Seq[Expression] =
     covarPop.mergeExpressions ++ varPop.mergeExpressions
@@ -324,8 +330,14 @@ case class RegrIntercept(left: Expression, right: Expression) extends Declarativ
 
   override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
 
-  override lazy val updateExpressions: Seq[Expression] =
-    covarPop.updateExpressions ++ varPop.updateExpressions
+  override lazy val updateExpressions: Seq[Expression] = {
+    // RegrIntercept only handles paris where both y and x are non-empty, so we need additional
+    // judgment for calculating VariancePop.
+    val isNull = left.isNull || right.isNull
+    covarPop.updateExpressions ++ varPop.updateExpressions.zip(varPop.aggBufferAttributes).map {
+      case (newValue, oldValue) => If(isNull, oldValue, newValue)
+    }
+  }
 
   override lazy val mergeExpressions: Seq[Expression] =
     covarPop.mergeExpressions ++ varPop.mergeExpressions
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
index 7c91139921b58..a791a4f35e98a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/linear-regression.sql.out
@@ -1,11 +1,11 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35), (2, null, 40)
 AS testRegression(k, y, x)
 -- !query analysis
 CreateViewCommand `testRegression`, SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35), (2, null, 40)
 AS testRegression(k, y, x), false, true, LocalTempView, true
    +- Project [k#x, y#x, x#x]
       +- SubqueryAlias testRegression
diff --git a/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql b/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql
index c7cb5bf1117a7..df286d2a9b0a9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql
@@ -1,6 +1,6 @@
 -- Test data.
 CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35), (2, null, 40)
 AS testRegression(k, y, x);
 
 -- SPARK-37613: Support ANSI Aggregate Function: regr_count
diff --git a/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out b/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out
index 1379713a9fb0d..e511ea75aae5a 100644
--- a/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out
@@ -1,7 +1,7 @@
 -- Automatically generated by SQLQueryTestSuite
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35), (2, null, 40)
 AS testRegression(k, y, x)
 -- !query schema
 struct<>
@@ -31,7 +31,7 @@ SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k
 struct<k:int,count(1):bigint,regr_count(y, x):bigint>
 -- !query output
 1	1	0
-2	4	3
+2	5	3
 
 
 -- !query
@@ -40,7 +40,7 @@ SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegre
 struct<k:int,count(1) FILTER (WHERE (x IS NOT NULL)):bigint,regr_count(y, x):bigint>
 -- !query output
 1	0	0
-2	3	3
+2	4	3
 
 
 -- !query
@@ -99,7 +99,7 @@ SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression G
 struct<k:int,avg(x):double,avg(y):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
 -- !query output
 1	NULL	10.0	NULL	NULL
-2	22.666666666666668	21.25	22.666666666666668	20.0
+2	27.0	21.25	22.666666666666668	20.0
 
 
 -- !query
@@ -116,7 +116,7 @@ SELECT regr_sxx(y, x) FROM testRegression
 -- !query schema
 struct<regr_sxx(y, x):double>
 -- !query output
-288.66666666666663
+288.6666666666667
 
 
 -- !query
@@ -124,7 +124,7 @@ SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
 -- !query schema
 struct<regr_sxx(y, x):double>
 -- !query output
-288.66666666666663
+288.6666666666667
 
 
 -- !query
@@ -133,7 +133,7 @@ SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k
 struct<k:int,regr_sxx(y, x):double>
 -- !query output
 1	NULL
-2	288.66666666666663
+2	288.6666666666667
 
 
 -- !query
@@ -141,7 +141,7 @@ SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NU
 -- !query schema
 struct<k:int,regr_sxx(y, x):double>
 -- !query output
-2	288.66666666666663
+2	288.6666666666667
 
 
 -- !query
@@ -215,7 +215,7 @@ SELECT regr_slope(y, x) FROM testRegression
 -- !query schema
 struct<regr_slope(y, x):double>
 -- !query output
-0.8314087759815244
+0.8314087759815242
 
 
 -- !query
@@ -223,7 +223,7 @@ SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NUL
 -- !query schema
 struct<regr_slope(y, x):double>
 -- !query output
-0.8314087759815244
+0.8314087759815242
 
 
 -- !query
@@ -232,7 +232,7 @@ SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k
 struct<k:int,regr_slope(y, x):double>
 -- !query output
 1	NULL
-2	0.8314087759815244
+2	0.8314087759815242
 
 
 -- !query
@@ -240,7 +240,7 @@ SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT
 -- !query schema
 struct<k:int,regr_slope(y, x):double>
 -- !query output
-2	0.8314087759815244
+2	0.8314087759815242
 
 
 -- !query

From 1e15e3f3eb2bd92f3a3c5365a99afea10c392084 Mon Sep 17 00:00:00 2001
From: Nemanja Boric <nemanja.boric@databricks.com>
Date: Wed, 10 Jul 2024 20:39:00 +0800
Subject: [PATCH 381/521] [SPARK-48843] Prevent infinite loop with
 BindParameters

### What changes were proposed in this pull request?

In order to resolve the named parameters on the subtree, BindParameters recurses into the subtrees and tries to match the pattern with the named parameters. If there's no named parameter in the current level, the rule tries to return the unchanged plan. However, instead of returning the current plan object, the rule always returns the captured root plan node, leading into the infinite recursion.

### Why are the changes needed?

Infinite recursion with the named parameters and the global limit.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Added unit tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47271 from nemanja-boric-databricks/fix-bind.

Lead-authored-by: Nemanja Boric <nemanja.boric@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit a39f70d0c8e85c9911d9a15445fd2a136a66ae4b)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 python/pyspark/sql/tests/connect/test_connect_basic.py     | 7 +++++++
 .../apache/spark/sql/catalyst/analysis/parameters.scala    | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 48e5248e28f53..d1f6994edba7b 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -1242,6 +1242,13 @@ def test_sql_with_named_args(self):
         df2 = self.spark.sql("SELECT * FROM range(10) WHERE id > :minId", args={"minId": 7})
         self.assert_eq(df.toPandas(), df2.toPandas())
 
+    def test_namedargs_with_global_limit(self):
+        sqlText = """SELECT * FROM VALUES (TIMESTAMP('2022-12-25 10:30:00'), 1) as tab(date, val)
+         where val = :val"""
+        df = self.connect.sql(sqlText, args={"val": 1})
+        df2 = self.spark.sql(sqlText, args={"val": 1})
+        self.assert_eq(df.toPandas(), df2.toPandas())
+
     def test_sql_with_pos_args(self):
         df = self.connect.sql("SELECT * FROM range(10) WHERE id > ?", args=[7])
         df2 = self.spark.sql("SELECT * FROM range(10) WHERE id > ?", args=[7])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index 2e3cabce24a4b..20cc9a2ab5450 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -136,7 +136,7 @@ object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
             args(posToIndex(pos))
         }
 
-      case _ => plan
+      case other => other
     }
   }
 }

From 8afce303a27cbc82d195e00966a51c9fecefa038 Mon Sep 17 00:00:00 2001
From: Ivan Sadikov <ivan.sadikov@databricks.com>
Date: Thu, 11 Jul 2024 15:02:57 +0900
Subject: [PATCH 382/521] [SPARK-48863][SQL] Fix ClassCastException when
 parsing JSON with "spark.sql.json.enablePartialResults" enabled

<!--
Thanks for sending a pull request!  Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://spark.apache.org/contributing.html
  2. Ensure you have added or run the appropriate tests for your PR: https://spark.apache.org/developer-tools.html
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][SPARK-XXXX] Your PR title ...'.
  4. Be sure to keep the PR description updated to reflect all changes.
  5. Please write your PR title to summarize what this PR proposes.
  6. If possible, provide a concise example to reproduce the issue for a faster review.
  7. If you want to add a new configuration, please read the guideline first for naming configurations in
     'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
  8. If you want to add or modify an error type or message, please read the guideline first in
     'common/utils/src/main/resources/error/README.md'.
-->

### What changes were proposed in this pull request?
<!--
Please clarify what changes you are proposing. The purpose of this section is to outline the changes and how this PR fixes the issue.
If possible, please consider writing useful notes for better and faster reviews in your PR. See the examples below.
  1. If you refactor some codes with changing classes, showing the class hierarchy will help reviewers.
  2. If you fix some SQL features, you can provide some references of other DBMSes.
  3. If there is design documentation, please add the link.
  4. If there is a discussion in the mailing list, please add the link.
-->

This PR fixes a bug in a corner case of JSON parsing when `spark.sql.json.enablePartialResults` is enabled.

When running the following query with the config set to true:
```
select from_json('{"a":"b","c":"d"}', 'array<struct<a:string, c:int>>')
```
the code would fail with
```
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 4.0 failed 1 times, most recent failure:
Lost task 0.0 in stage 4.0 (TID 4) (ip-10-110-51-101.us-west-2.compute.internal executor driver):
java.lang.ClassCastException: class org.apache.spark.unsafe.types.UTF8String cannot be cast to class
org.apache.spark.sql.catalyst.util.ArrayData (org.apache.spark.unsafe.types.UTF8String and
org.apache.spark.sql.catalyst.util.ArrayData are in unnamed module of loader 'app')
    at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray(rows.scala:53)
    at org.apache.spark.sql.catalyst.expressions.BaseGenericInternalRow.getArray$(rows.scala:53)
    at org.apache.spark.sql.catalyst.expressions.GenericInternalRow.getArray(rows.scala:172)
    at org.apache.spark.sql.catalyst.expressions.JsonToStructs.$anonfun$converter$2(jsonExpressions.scala:831)
    at org.apache.spark.sql.catalyst.expressions.JsonToStructs.nullSafeEval(jsonExpressions.scala:893)
```

The patch fixes the issue by re-throwing PartialArrayDataResultException if parsing fails in this special case.

### Why are the changes needed?
<!--
Please clarify why the changes are needed. For instance,
  1. If you propose a new API, clarify the use case for a new API.
  2. If you fix a bug, you can clarify why it is a bug.
-->

Fixes the bug that would prevent users from reading objects as arrays as introduced in SPARK-19595. This is more of a special case but it works with the flag off so it would be good to fix it when the flag is on.

### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such as the documentation fix.
If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
If possible, please also clarify if this is a user-facing change compared to the released Spark versions or within the unreleased branches such as master.
If no, write 'No'.
-->

Yes, but it is a bug fix so it would not have worked without this patch overall.
The parsing output will be different due to the partial results improvement:

Previously, we would get `null` (the partial results are disabled). With this patch and partial results enabled, this will return `Array([b, null])`. This is not specific to this patch but rather to the partial results feature in general.

### How was this patch tested?
<!--
If tests were added, say they were added here. Please make sure to add some test cases that check the changes thoroughly including negative and positive cases if possible.
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
If tests were not added, please describe why they were not added and/or why it was difficult to add.
If benchmark tests were added, please run the benchmarks in GitHub Actions for the consistent environment, and the instructions could accord to: https://spark.apache.org/developer-tools.html#github-workflow-benchmarks.
-->

I added a unit test.

### Was this patch authored or co-authored using generative AI tooling?
<!--
If generative AI tooling has been used in the process of authoring this patch, please include the
phrase: 'Generated-by: ' followed by the name of the tool and its version.
If no, write 'No'.
Please refer to the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) for details.
-->

No.

Closes #47292 from sadikovi/SPARK-48863.

Authored-by: Ivan Sadikov <ivan.sadikov@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 31d5ea1c3bc402596e604c335a77476d83f3edee)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/catalyst/json/JacksonParser.scala     | 13 +++++++-
 .../apache/spark/sql/JsonFunctionsSuite.scala | 32 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index 3f6ea9a174c0c..e043230c683c5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -180,7 +180,18 @@ class JacksonParser(
         //
         val st = at.elementType.asInstanceOf[StructType]
         val fieldConverters = st.map(_.dataType).map(makeConverter).toArray
-        Some(InternalRow(new GenericArrayData(convertObject(parser, st, fieldConverters).toArray)))
+
+        val res = try {
+          convertObject(parser, st, fieldConverters)
+        } catch {
+          case err: PartialResultException =>
+            throw PartialArrayDataResultException(
+              new GenericArrayData(Seq(err.partialResult)),
+              err.cause
+            )
+        }
+
+        Some(InternalRow(new GenericArrayData(res.toArray)))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index a76e102fe913f..0d30d8e95ee7b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -1144,6 +1144,38 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-48863: parse object as an array with partial results enabled") {
+    val schema = StructType(StructField("a", StringType) :: StructField("c", IntegerType) :: Nil)
+
+    // Value can be parsed correctly and should return the same result with or without the flag.
+    Seq(false, true).foreach { enabled =>
+      withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"${enabled}") {
+        checkAnswer(
+          Seq("""{"a": "b", "c": 1}""").toDF("c0")
+            .select(from_json($"c0", ArrayType(schema))),
+          Row(Seq(Row("b", 1)))
+        )
+      }
+    }
+
+    // Value does not match the schema.
+    val df = Seq("""{"a": "b", "c": "1"}""").toDF("c0")
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(schema))),
+        Row(Seq(Row("b", null)))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(schema))),
+        Row(null)
+      )
+    }
+  }
+
   test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") {
     val in = Seq("""{"a b": 1}""").toDS()
     val out = in.select(from_json($"value", schema_of_json("""{"a b": 100}""")) as "parsed")

From 2bb46e0296000f1be5123d428675927e695cd327 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 23 Apr 2024 07:41:38 -0700
Subject: [PATCH 383/521] [MINOR][DOCS] Add `docs/_generated/` to .gitignore

### What changes were proposed in this pull request?

This PR adds the auto-generated content `docs/_generated/` to .gitignore

### Why are the changes needed?

vcs improvement

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46178 from yaooqinn/minor.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 064b502175b79..ca22dd7845281 100644
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ dev/create-release/*final
 dev/create-release/*txt
 dev/pr-deps/
 dist/
+docs/_generated/
 docs/_site/
 docs/api
 docs/.local_ruby_bundle

From d517a6392b160f23c0fb633074f567c08000921a Mon Sep 17 00:00:00 2001
From: Wei Guo <guow93@gmail.com>
Date: Thu, 11 Jul 2024 08:16:37 -0700
Subject: [PATCH 384/521] [MINOR][SQL][TESTS] Remove a duplicate test case in
 `CSVExprUtilsSuite`

### What changes were proposed in this pull request?

This PR aims to remove a duplicate test case in `CSVExprUtilsSuite`.

### Why are the changes needed?

Clean duplicate code.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass GA.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47298 from wayneguow/csv_suite.

Authored-by: Wei Guo <guow93@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 297a9d2ac77373157473950f607728b6f4c1c542)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala   | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
index fcb10c98243d9..d94b74cff3032 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
@@ -76,8 +76,6 @@ class CSVExprUtilsSuite extends SparkFunSuite {
     // tab in the middle of some other letters
     ("""ba\tr""", Some("ba\tr"), None),
     // null character, expressed in Unicode literal syntax
-    ("\u0000", Some("\u0000"), None),
-    // and specified directly
     ("\u0000", Some("\u0000"), None)
   )
 

From b15a8725b25c0b3f78efcccfb1f69e8d7fbd9a72 Mon Sep 17 00:00:00 2001
From: "zhipeng.mao" <zhipeng.mao@databricks.com>
Date: Fri, 12 Jul 2024 14:51:47 +0800
Subject: [PATCH 385/521] =?UTF-8?q?[SPARK-48871]=20Fix=20INVALID=5FNON=5FD?=
 =?UTF-8?q?ETERMINISTIC=5FEXPRESSIONS=20validation=20in=E2=80=A6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

… CheckAnalysis

The PR added a trait that logical plans can extend to implement a method to decide whether there can be non-deterministic expressions for the operator, and check this method in checkAnalysis.

I encountered the `INVALID_NON_DETERMINISTIC_EXPRESSIONS` exception when attempting to use a non-deterministic udf in my query. The non-deterministic expression can be safely allowed for my custom LogicalPlan, but it is disabled in the checkAnalysis phase. The CheckAnalysis rule is too strict so that reasonable use cases of non-deterministic expressions are also disabled.

No

The test case `"SPARK-48871: AllowsNonDeterministicExpression allow lists non-deterministic expressions"` is added.

No

Closes #47304 from zhipengmao-db/zhipengmao-db/SPARK-48871-check-analysis.

Lead-authored-by: zhipeng.mao <zhipeng.mao@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 9cbd5dd4e7477294f7d4289880c7ea0dd67b38d3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala | 12 ++++++++++
 .../plans/logical/basicLogicalOperators.scala | 10 ++++++++
 .../analysis/AnalysisErrorSuite.scala         | 23 +++++++++++++++++++
 3 files changed, 45 insertions(+)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 485015f2efab4..bb399e41d7d01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -157,6 +157,17 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     visited(cteId) = true
   }
 
+  /**
+   * Checks whether the operator allows non-deterministic expressions.
+   */
+  private def operatorAllowsNonDeterministicExpressions(plan: LogicalPlan): Boolean = {
+    plan match {
+      case p: SupportsNonDeterministicExpression =>
+        p.allowNonDeterministicExpression
+      case _ => false
+    }
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
     val inlineCTE = InlineCTE(alwaysInline = true)
     val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])]
@@ -771,6 +782,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
                 "dataType" -> toSQLType(mapCol.dataType)))
 
           case o if o.expressions.exists(!_.deterministic) &&
+            !operatorAllowsNonDeterministicExpressions(o) &&
             !o.isInstanceOf[Project] && !o.isInstanceOf[Filter] &&
             !o.isInstanceOf[Aggregate] && !o.isInstanceOf[Window] &&
             !o.isInstanceOf[Expand] &&
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index ca2c6a8505617..f76e698a64005 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -1957,6 +1957,16 @@ case class DeduplicateWithinWatermark(keys: Seq[Attribute], child: LogicalPlan)
  */
 trait SupportsSubquery extends LogicalPlan
 
+/**
+ * Trait that logical plans can extend to check whether it can allow non-deterministic
+ * expressions and pass the CheckAnalysis rule.
+ */
+trait SupportsNonDeterministicExpression extends LogicalPlan {
+
+  /** Returns whether it allows non-deterministic expressions. */
+  def allowNonDeterministicExpression: Boolean
+}
+
 /**
  * Collect arbitrary (named) metrics from a dataset. As soon as the query reaches a completion
  * point (batch query completes or streaming query epoch completes) an event is emitted on the
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index a7df53db936f3..6b5f0fe3876da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -118,6 +118,13 @@ case class TestFunctionWithTypeCheckFailure(
 
 case class UnresolvedTestPlan() extends UnresolvedLeafNode
 
+case class SupportsNonDeterministicExpressionTestOperator(
+    actions: Seq[Expression],
+    allowNonDeterministicExpression: Boolean)
+  extends LeafNode with SupportsNonDeterministicExpression {
+  override def output: Seq[Attribute] = Seq()
+}
+
 class AnalysisErrorSuite extends AnalysisTest {
   import TestRelations._
 
@@ -1327,4 +1334,20 @@ class AnalysisErrorSuite extends AnalysisTest {
       )
     }
   }
+
+  test("SPARK-48871: SupportsNonDeterministicExpression allows non-deterministic expressions") {
+    val nonDeterministicExpressions = Seq(new Rand())
+    val tolerantPlan =
+      SupportsNonDeterministicExpressionTestOperator(
+        nonDeterministicExpressions, allowNonDeterministicExpression = true)
+    assertAnalysisSuccess(tolerantPlan)
+
+    val intolerantPlan =
+      SupportsNonDeterministicExpressionTestOperator(
+        nonDeterministicExpressions, allowNonDeterministicExpression = false)
+    assertAnalysisError(
+      intolerantPlan,
+      "INVALID_NON_DETERMINISTIC_EXPRESSIONS" :: Nil
+    )
+  }
 }

From 596f680ea37c8fae77d2ba29d79cbc9339a04ca9 Mon Sep 17 00:00:00 2001
From: jackylee-ch <lijunqing@baidu.com>
Date: Fri, 12 Jul 2024 18:15:56 +0800
Subject: [PATCH 386/521] [SPARK-48845][SQL] GenericUDF catch exceptions from
 children

### What changes were proposed in this pull request?
This pr is trying to fix the syntax issues with GenericUDF since 3.5.0. The problem arose from DeferredObject currently passing a value instead of a function, which prevented users from catching exceptions in GenericUDF, resulting in semantic differences.

Here is an example case we encountered. Originally, the semantics were that udf_exception would throw an exception, while udf_catch_exception could catch the exception and return a null value. However, currently, any exception encountered by udf_exception will cause the program to fail.
```
select udf_catch_exception(udf_exception(col1)) from table
```

### Why are the changes needed?
For before Spark 3.5, we directly made the GenericUDF's DeferredObject lazy and evaluated the children in `function.evaluate(deferredObjects)`.
Now, we would run the children's code first. If an exception is thrown, we would make it lazy to GenericUDF's DeferredObject.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Newly added UT.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47268 from jackylee-ch/generic_udf_catch_exception_from_child_func.

Lead-authored-by: jackylee-ch <lijunqing@baidu.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 236d95738b6e50bc9ec54955e86d01b6dcf11c0e)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/hive/hiveUDFEvaluators.scala    | 12 +++--
 .../org/apache/spark/sql/hive/hiveUDFs.scala  | 22 +++++---
 .../sql/hive/execution/UDFCatchException.java | 51 +++++++++++++++++++
 .../sql/hive/execution/UDFThrowException.java | 26 ++++++++++
 .../sql/hive/execution/HiveUDFSuite.scala     | 23 +++++++++
 5 files changed, 124 insertions(+), 10 deletions(-)
 create mode 100644 sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFCatchException.java
 create mode 100644 sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFThrowException.java

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFEvaluators.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFEvaluators.scala
index 094f8ba7a0f89..fc1c795a1aa1c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFEvaluators.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFEvaluators.scala
@@ -129,7 +129,11 @@ class HiveGenericUDFEvaluator(
   override def returnType: DataType = inspectorToDataType(returnInspector)
 
   def setArg(index: Int, arg: Any): Unit =
-    deferredObjects(index).asInstanceOf[DeferredObjectAdapter].set(arg)
+    deferredObjects(index).asInstanceOf[DeferredObjectAdapter].set(() => arg)
+
+  def setException(index: Int, exp: Throwable): Unit = {
+    deferredObjects(index).asInstanceOf[DeferredObjectAdapter].set(() => throw exp)
+  }
 
   override def doEvaluate(): Any = unwrapper(function.evaluate(deferredObjects))
 }
@@ -139,10 +143,10 @@ private[hive] class DeferredObjectAdapter(oi: ObjectInspector, dataType: DataTyp
   extends DeferredObject with HiveInspectors {
 
   private val wrapper = wrapperFor(oi, dataType)
-  private var func: Any = _
-  def set(func: Any): Unit = {
+  private var func: () => Any = _
+  def set(func: () => Any): Unit = {
     this.func = func
   }
   override def prepare(i: Int): Unit = {}
-  override def get(): AnyRef = wrapper(func).asInstanceOf[AnyRef]
+  override def get(): AnyRef = wrapper(func()).asInstanceOf[AnyRef]
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 01684f52ab82b..0c8305b3ccb24 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -136,7 +136,13 @@ private[hive] case class HiveGenericUDF(
 
   override def eval(input: InternalRow): Any = {
     children.zipWithIndex.foreach {
-      case (child, idx) => evaluator.setArg(idx, child.eval(input))
+      case (child, idx) =>
+        try {
+          evaluator.setArg(idx, child.eval(input))
+        } catch {
+          case t: Throwable =>
+            evaluator.setException(idx, t)
+        }
     }
     evaluator.evaluate()
   }
@@ -157,10 +163,15 @@ private[hive] case class HiveGenericUDF(
     val setValues = evals.zipWithIndex.map {
       case (eval, i) =>
         s"""
-           |if (${eval.isNull}) {
-           |  $refEvaluator.setArg($i, null);
-           |} else {
-           |  $refEvaluator.setArg($i, ${eval.value});
+           |try {
+           |  ${eval.code}
+           |  if (${eval.isNull}) {
+           |    $refEvaluator.setArg($i, null);
+           |  } else {
+           |    $refEvaluator.setArg($i, ${eval.value});
+           |  }
+           |} catch (Throwable t) {
+           |  $refEvaluator.setException($i, t);
            |}
            |""".stripMargin
     }
@@ -169,7 +180,6 @@ private[hive] case class HiveGenericUDF(
     val resultTerm = ctx.freshName("result")
     ev.copy(code =
       code"""
-         |${evals.map(_.code).mkString("\n")}
          |${setValues.mkString("\n")}
          |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate();
          |boolean ${ev.isNull} = $resultTerm == null;
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFCatchException.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFCatchException.java
new file mode 100644
index 0000000000000..242dbeaa63c94
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFCatchException.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+
+public class UDFCatchException extends GenericUDF {
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
+    if (args.length != 1) {
+      throw new UDFArgumentException("Exactly one argument is expected.");
+    }
+    return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
+  }
+
+  @Override
+  public Object evaluate(GenericUDF.DeferredObject[] args) {
+    if (args == null) {
+      return null;
+    }
+    try {
+      return args[0].get();
+    } catch (Exception e) {
+      return null;
+    }
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return null;
+  }
+}
diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFThrowException.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFThrowException.java
new file mode 100644
index 0000000000000..5d6ff6ca40ae5
--- /dev/null
+++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFThrowException.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+
+public class UDFThrowException extends UDF {
+  public String evaluate(String data) {
+    return Integer.valueOf(data).toString();
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index d12ebae0f5fc7..f3be79f902294 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -35,6 +35,7 @@ import org.apache.hadoop.io.{LongWritable, Writable}
 
 import org.apache.spark.{SparkException, SparkFiles, TestUtils}
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.functions.{call_function, max}
@@ -791,6 +792,28 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-48845: GenericUDF catch exceptions from child UDFs") {
+    withTable("test_catch_exception") {
+      withUserDefinedFunction("udf_throw_exception" -> true, "udf_catch_exception" -> true) {
+        Seq("9", "9-1").toDF("a").write.saveAsTable("test_catch_exception")
+        sql("CREATE TEMPORARY FUNCTION udf_throw_exception AS " +
+          s"'${classOf[UDFThrowException].getName}'")
+        sql("CREATE TEMPORARY FUNCTION udf_catch_exception AS " +
+          s"'${classOf[UDFCatchException].getName}'")
+        Seq(
+          CodegenObjectFactoryMode.FALLBACK.toString,
+          CodegenObjectFactoryMode.NO_CODEGEN.toString
+        ).foreach { codegenMode =>
+          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode) {
+            val df = sql(
+              "SELECT udf_catch_exception(udf_throw_exception(a)) FROM test_catch_exception")
+            checkAnswer(df, Seq(Row("9"), Row(null)))
+          }
+        }
+      }
+    }
+  }
 }
 
 class TestPair(x: Int, y: Int) extends Writable with Serializable {

From 56dec397e26d44e2b578ecea92be4c5e343c2a50 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Fri, 12 Jul 2024 19:08:24 +0900
Subject: [PATCH 387/521] [SPARK-48666][SQL] Do not push down filter if it
 contains PythonUDFs

This PR proposes to prevent pushing down Python UDFs. This PR uses the same approach as https://github.com/apache/spark/pull/47033, therefore added the author as a co-author, but simplifies the change.

Extracting filters to push down happens first

https://github.com/apache/spark/blob/cbe6846c477bc8b6d94385ddd0097c4e97b05d41/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala#L46

https://github.com/apache/spark/blob/cbe6846c477bc8b6d94385ddd0097c4e97b05d41/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala#L211

https://github.com/apache/spark/blob/cbe6846c477bc8b6d94385ddd0097c4e97b05d41/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala#L51

Before extracting Python UDFs

https://github.com/apache/spark/blob/cbe6846c477bc8b6d94385ddd0097c4e97b05d41/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala#L80

Here is full stacktrace:

```
[INTERNAL_ERROR] Cannot evaluate expression: pyUDF(cast(input[0, bigint, true] as string)) SQLSTATE: XX000
org.apache.spark.SparkException: [INTERNAL_ERROR] Cannot evaluate expression: pyUDF(cast(input[0, bigint, true] as string)) SQLSTATE: XX000
	at org.apache.spark.SparkException$.internalError(SparkException.scala:92)
	at org.apache.spark.SparkException$.internalError(SparkException.scala:96)
	at org.apache.spark.sql.errors.QueryExecutionErrors$.cannotEvaluateExpressionError(QueryExecutionErrors.scala:65)
	at org.apache.spark.sql.catalyst.expressions.FoldableUnevaluable.eval(Expression.scala:387)
	at org.apache.spark.sql.catalyst.expressions.FoldableUnevaluable.eval$(Expression.scala:386)
	at org.apache.spark.sql.catalyst.expressions.PythonUDF.eval(PythonUDF.scala:72)
	at org.apache.spark.sql.catalyst.expressions.UnaryExpression.eval(Expression.scala:563)
	at org.apache.spark.sql.catalyst.expressions.IsNotNull.eval(nullExpressions.scala:403)
	at org.apache.spark.sql.catalyst.expressions.InterpretedPredicate.eval(predicates.scala:53)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$1(ExternalCatalogUtils.scala:189)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.$anonfun$prunePartitionsByFilter$1$adapted(ExternalCatalogUtils.scala:188)
	at scala.collection.immutable.List.filter(List.scala:516)
	at scala.collection.immutable.List.filter(List.scala:79)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.prunePartitionsByFilter(ExternalCatalogUtils.scala:188)
	at org.apache.spark.sql.catalyst.catalog.InMemoryCatalog.listPartitionsByFilter(InMemoryCatalog.scala:604)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.listPartitionsByFilter(ExternalCatalogWithListener.scala:262)
	at org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitionsByFilter(SessionCatalog.scala:1358)
	at org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils$.listPartitionsByFilter(ExternalCatalogUtils.scala:168)
	at org.apache.spark.sql.execution.datasources.CatalogFileIndex.filterPartitions(CatalogFileIndex.scala:74)
	at org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(PruneFileSourcePartitions.scala:72)
	at org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(PruneFileSourcePartitions.scala:50)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:84)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:470)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:330)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:326)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$3(TreeNode.scala:475)
	at org.apache.spark.sql.catalyst.trees.BinaryLike.mapChildren(TreeNode.scala:1251)
	at org.apache.spark.sql.catalyst.trees.BinaryLike.mapChildren$(TreeNode.scala:1250)
	at org.apache.spark.sql.catalyst.plans.logical.Join.mapChildren(basicLogicalOperators.scala:552)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:475)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:330)
	at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:326)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:37)
	at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:446)
	at org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$.apply(PruneFileSourcePartitions.scala:50)
	at org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$.apply(PruneFileSourcePartitions.scala:35)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$2(RuleExecutor.scala:226)
	at scala.collection.LinearSeqOps.foldLeft(LinearSeq.scala:183)
	at scala.collection.LinearSeqOps.foldLeft$(LinearSeq.scala:179)
	at scala.collection.immutable.List.foldLeft(List.scala:79)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1(RuleExecutor.scala:223)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$execute$1$adapted(RuleExecutor.scala:215)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:215)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.$anonfun$executeAndTrack$1(RuleExecutor.scala:186)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker$.withTracker(QueryPlanningTracker.scala:89)
	at org.apache.spark.sql.catalyst.rules.RuleExecutor.executeAndTrack(RuleExecutor.scala:186)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$optimizedPlan$1(QueryExecution.scala:167)
	at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:138)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$2(QueryExecution.scala:234)
	at org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:608)
	at org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:234)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:923)
	at org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:233)
	at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:163)
	at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:159)
	at org.apache.spark.sql.execution.python.PythonUDFSuite.$anonfun$new$19(PythonUDFSuite.scala:136)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
	at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:99)
	at org.apache.spark.sql.test.SQLTestUtilsBase.withTable(SQLTestUtils.scala:307)
	at org.apache.spark.sql.test.SQLTestUtilsBase.withTable$(SQLTestUtils.scala:305)
	at org.apache.spark.sql.execution.python.PythonUDFSuite.withTable(PythonUDFSuite.scala:25)
	at org.apache.spark.sql.execution.python.PythonUDFSuite.$anonfun$new$18(PythonUDFSuite.scala:130)
	at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.scala:18)
	at org.scalatest.enablers.Timed$$anon$1.timeoutAfter(Timed.scala:127)
	at org.scalatest.concurrent.TimeLimits$.failAfterImpl(TimeLimits.scala:282)
	at org.scalatest.concurrent.TimeLimits.failAfter(TimeLimits.scala:231)
	at org.scalatest.concurrent.TimeLimits.failAfter$(TimeLimits.scala:230)
	at org.apache.spark.SparkFunSuite.failAfter(SparkFunSuite.scala:69)
	at org.apache.spark.SparkFunSuite.$anonfun$test$2(SparkFunSuite.scala:155)
	at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
	at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
	at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
	at org.scalatest.Transformer.apply(Transformer.scala:22)
	at org.scalatest.Transformer.apply(Transformer.scala:20)
	at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226)
	at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:227)
	at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236)
	at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218)
	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:69)
	at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
	at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
	at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:69)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269)
	at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
	at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
	at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:269)
	at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:268)
	at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1564)
	at org.scalatest.Suite.run(Suite.scala:1114)
	at org.scalatest.Suite.run$(Suite.scala:1096)
	at org.scalatest.funsuite.AnyFunSuite.org$scalatest$funsuite$AnyFunSuiteLike$$super$run(AnyFunSuite.scala:1564)
	at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$run$1(AnyFunSuiteLike.scala:273)
	at org.scalatest.SuperEngine.runImpl(Engine.scala:535)
	at org.scalatest.funsuite.AnyFunSuiteLike.run(AnyFunSuiteLike.scala:273)
	at org.scalatest.funsuite.AnyFunSuiteLike.run$(AnyFunSuiteLike.scala:272)
	at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:69)
	at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213)
	at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210)
	at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208)
	at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:69)
	at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:47)
	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1321)
	at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1315)
	at scala.collection.immutable.List.foreach(List.scala:334)
	at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1315)
	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:992)
	at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:970)
	at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1481)
	at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:970)
	at org.scalatest.tools.Runner$.run(Runner.scala:798)
	at org.scalatest.tools.Runner.run(Runner.scala)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2or3(ScalaTestRunner.java:43)
	at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:26)
```

In order for end users to use Python UDFs against partitioned columns.

Yes, this fixes a bug - this PR allows to use Python UDF in partitioned columns.

Unittest added.

No.

Closes #47033

Closes #47313 from HyukjinKwon/SPARK-48666.

Lead-authored-by: Hyukjin Kwon <gurwls223@apache.org>
Co-authored-by: Wei Zheng <weiz@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit d74785359c50bf966cfe892d3a9eae1a06341db2)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../datasources/PruneFileSourcePartitions.scala  |  7 ++++++-
 .../datasources/v2/FileScanBuilder.scala         |  7 +++++--
 .../sql/execution/python/PythonUDFSuite.scala    | 16 ++++++++++++++--
 .../execution/PruneHiveTablePartitions.scala     |  9 +++++++--
 4 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index 1dffea4e1bc87..d5923a577daac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -63,7 +63,12 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
             _))
         if filters.nonEmpty && fsRelation.partitionSchema.nonEmpty =>
       val normalizedFilters = DataSourceStrategy.normalizeExprs(
-        filters.filter(f => f.deterministic && !SubqueryExpression.hasSubquery(f)),
+        filters.filter { f =>
+          f.deterministic &&
+            !SubqueryExpression.hasSubquery(f) &&
+            // Python UDFs might exist because this rule is applied before ``ExtractPythonUDFs``.
+            !f.exists(_.isInstanceOf[PythonUDF])
+        },
         logicalRelation.output)
       val (partitionKeyFilters, _) = DataSourceUtils
         .getPartitionFiltersAndDataFilters(partitionSchema, normalizedFilters)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index 447a36fe622c9..7e0bc25a9a1e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.spark.sql.{sources, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.expressions.{Expression, PythonUDF, SubqueryExpression}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownRequiredColumns}
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, DataSourceUtils, PartitioningAwareFileIndex, PartitioningUtils}
@@ -73,7 +73,10 @@ abstract class FileScanBuilder(
     val (deterministicFilters, nonDeterminsticFilters) = filters.partition(_.deterministic)
     val (partitionFilters, dataFilters) =
       DataSourceUtils.getPartitionFiltersAndDataFilters(partitionSchema, deterministicFilters)
-    this.partitionFilters = partitionFilters
+    this.partitionFilters = partitionFilters.filter { f =>
+      // Python UDFs might exist because this rule is applied before ``ExtractPythonUDFs``.
+      !SubqueryExpression.hasSubquery(f) && !f.exists(_.isInstanceOf[PythonUDF])
+    }
     this.dataFilters = dataFilters
     val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
     for (filterExpr <- dataFilters) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
index d86faec1a7bbd..9a168dc80a03a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.execution.python
 
-import org.apache.spark.sql.{IntegratedUDFTestUtils, QueryTest}
-import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.{IntegratedUDFTestUtils, QueryTest, Row}
+import org.apache.spark.sql.functions.{col, count}
 import org.apache.spark.sql.test.SharedSparkSession
 
 class PythonUDFSuite extends QueryTest with SharedSparkSession {
@@ -111,4 +111,16 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
     val pandasTestUDF = TestGroupedAggPandasUDF(name = udfName)
     assert(df.agg(pandasTestUDF(df("id"))).schema.fieldNames.exists(_.startsWith(udfName)))
   }
+
+  test("SPARK-48666: Python UDF execution against partitioned column") {
+    assume(shouldTestPythonUDFs)
+    withTable("t") {
+      spark.range(1).selectExpr("id AS t", "(id + 1) AS p").write.partitionBy("p").saveAsTable("t")
+      val table = spark.table("t")
+      val newTable = table.withColumn("new_column", pythonTestUDF(table("p")))
+      val df = newTable.as("t1").join(
+        newTable.as("t2"), col("t1.new_column") === col("t2.new_column"))
+      checkAnswer(df, Row(0, 1, 1, 0, 1, 1))
+    }
+  }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index 395ee86579e57..779562bed5b0f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -22,7 +22,7 @@ import org.apache.hadoop.hive.common.StatsSetupConst
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.CastSupport
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{And, AttributeSet, Expression, ExpressionSet, PredicateHelper, SubqueryExpression}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeSet, Expression, ExpressionSet, PredicateHelper, PythonUDF, SubqueryExpression}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.FilterEstimation
@@ -50,7 +50,12 @@ private[sql] class PruneHiveTablePartitions(session: SparkSession)
       filters: Seq[Expression],
       relation: HiveTableRelation): ExpressionSet = {
     val normalizedFilters = DataSourceStrategy.normalizeExprs(
-      filters.filter(f => f.deterministic && !SubqueryExpression.hasSubquery(f)), relation.output)
+      filters.filter { f =>
+        f.deterministic &&
+          !SubqueryExpression.hasSubquery(f) &&
+          // Python UDFs might exist because this rule is applied before ``ExtractPythonUDFs``.
+          !f.exists(_.isInstanceOf[PythonUDF])
+      }, relation.output)
     val partitionColumnSet = AttributeSet(relation.partitionCols)
     ExpressionSet(
       normalizedFilters.flatMap(extractPredicatesWithinOutputSet(_, partitionColumnSet)))

From 4d1bbfd0c40ec89ef99ae58912b71dcbd764f348 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 16 Jul 2024 10:13:44 +0800
Subject: [PATCH 388/521] [SPARK-47172][DOCS][FOLLOWUP] Fix
 spark.network.crypto.ciphersince version field on security page

### What changes were proposed in this pull request?

Given that SPARK-47172 was an improvement but got merged into 3.4/3.5, we need to fix the since version to eliminate misunderstandings.

### Why are the changes needed?

doc fix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

doc build

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #47353 from yaooqinn/SPARK-47172.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit d8820a07eb82acd35a1c9d4ff6ee4f65fc6aac29)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/security.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/security.md b/docs/security.md
index a9065a225017e..10201e6ed5406 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -182,7 +182,7 @@ The following table describes the different options available for configuring th
     Cipher mode to use. Defaults "AES/CTR/NoPadding" for backward compatibility, which is not authenticated. 
     Recommended to use "AES/GCM/NoPadding", which is an authenticated encryption mode.
   </td>
-  <td>4.0.0</td>
+  <td>4.0.0, 3.5.2, 3.4.4</td>
 </tr>
 <tr>
   <td><code>spark.network.crypto.authEngineVersion</code></td>

From f1f5bb6ee40b655e334e90c0f81042c632601d41 Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Tue, 16 Jul 2024 10:35:27 +0800
Subject: [PATCH 389/521] [SPARK-47307][SQL][3.5] Add a config to optionally
 chunk base64 strings

Backports #47303 to 3.5

### What changes were proposed in this pull request?

[[SPARK-47307](https://issues.apache.org/jira/browse/SPARK-47307)] Add a config to optionally chunk base64 strings

### Why are the changes needed?

In #35110, it was incorrectly asserted that:

> ApacheCommonBase64 obeys http://www.ietf.org/rfc/rfc2045.txt

This is not true as the previous code called:

```java
public static byte[] encodeBase64(byte[] binaryData)
```

Which states:

> Encodes binary data using the base64 algorithm but does not chunk the output.

However, the RFC 2045 (MIME) base64 encoder does chunk by default. This now means that any Spark encoded base64 strings cannot be decoded by encoders that do not implement RFC 2045. The docs state RFC 4648.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing test suite.

### Was this patch authored or co-authored using generative AI tooling?

 No

Closes #47325 from wForget/SPARK-47307_3.5.

Lead-authored-by: wforget <643348094@qq.com>
Co-authored-by: Ted Jenks <tedcj@palantir.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../explain-results/function_base64.explain   |  2 +-
 .../expressions/stringExpressions.scala       | 40 +++++++++++++------
 .../apache/spark/sql/internal/SQLConf.scala   | 11 +++++
 .../expressions/ExpressionEvalHelper.scala    |  7 +++-
 .../expressions/StringExpressionsSuite.scala  | 13 ++++++
 5 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
index bc3c6e4bb2bcf..99d842189c659 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
@@ -1,2 +1,2 @@
-Project [base64(cast(g#0 as binary)) AS base64(g)#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Base64, StringType, encode, cast(g#0 as binary), false, BinaryType, BooleanType, true, true, true) AS base64(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 46f8e1a9d673d..98a3b71a28627 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2419,24 +2419,40 @@ case class Chr(child: Expression)
   """,
   since = "1.5.0",
   group = "string_funcs")
-case class Base64(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class Base64(child: Expression, chunkBase64: Boolean)
+  extends UnaryExpression with RuntimeReplaceable with ImplicitCastInputTypes {
+
+  def this(expr: Expression) = this(expr, SQLConf.get.chunkBase64StringEnabled)
 
   override def dataType: DataType = StringType
   override def inputTypes: Seq[DataType] = Seq(BinaryType)
 
-  protected override def nullSafeEval(bytes: Any): Any = {
-    UTF8String.fromBytes(JBase64.getMimeEncoder.encode(bytes.asInstanceOf[Array[Byte]]))
-  }
+  override def replacement: Expression = StaticInvoke(
+    classOf[Base64],
+    dataType,
+    "encode",
+    Seq(child, Literal(chunkBase64, BooleanType)),
+    Seq(BinaryType, BooleanType))
 
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (child) => {
-      s"""${ev.value} = UTF8String.fromBytes(
-            ${classOf[JBase64].getName}.getMimeEncoder().encode($child));
-       """})
-  }
+  override def toString: String = s"$prettyName($child)"
 
-  override protected def withNewChildInternal(newChild: Expression): Base64 = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
+}
+
+object Base64 {
+  def apply(expr: Expression): Base64 = new Base64(expr)
+
+  private lazy val nonChunkEncoder = JBase64.getMimeEncoder(-1, Array())
+
+  def encode(input: Array[Byte], chunkBase64: Boolean): UTF8String = {
+    val encoder = if (chunkBase64) {
+      JBase64.getMimeEncoder
+    } else {
+      nonChunkEncoder
+    }
+    UTF8String.fromBytes(encoder.encode(input))
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 275ec71cb0615..55f80645228db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3229,6 +3229,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.legacy.chunkBase64String.enabled")
+    .internal()
+    .doc("Whether to truncate string generated by the `Base64` function. When true, base64" +
+      " strings generated by the base64 function are chunked into lines of at most 76" +
+      " characters. When false, the base64 strings are not chunked.")
+    .version("3.5.2")
+    .booleanConf
+    .createWithDefault(false)
+
   val ENABLE_DEFAULT_COLUMNS =
     buildConf("spark.sql.defaultColumn.enabled")
       .internal()
@@ -5111,6 +5120,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def ansiRelationPrecedence: Boolean = ansiEnabled && getConf(ANSI_RELATION_PRECEDENCE)
 
+  def chunkBase64StringEnabled: Boolean = getConf(CHUNK_BASE64_STRING_ENABLED)
+
   def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
     case "TIMESTAMP_LTZ" =>
       // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 5be0cae4a22f1..84520846a8d48 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -71,10 +71,15 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     new ArrayBasedMapData(keyArray, valueArray)
   }
 
+  protected def replace(expr: Expression): Expression = expr match {
+    case r: RuntimeReplaceable => replace(r.replacement)
+    case _ => expr.mapChildren(replace)
+  }
+
   private def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance
     val resolver = ResolveTimeZone
-    val expr = resolver.resolveTimeZones(expression)
+    val expr = resolver.resolveTimeZones(replace(expression))
     assert(expr.resolved)
     serializer.deserialize(serializer.serialize(expr))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 006c4a7805688..75224bf33f53e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -506,6 +506,19 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     GenerateUnsafeProjection.generate(StringDecode(b, Literal("\"quote")) :: Nil)
   }
 
+  test("SPARK-47307: base64 encoding without chunking") {
+    val longString = "a" * 58
+    val encoded = "YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYQ=="
+    withSQLConf(SQLConf.CHUNK_BASE64_STRING_ENABLED.key -> "false") {
+      checkEvaluation(Base64(Literal(longString.getBytes)), encoded)
+    }
+    val chunkEncoded =
+      s"YWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFhYWFh\r\nYQ=="
+    withSQLConf(SQLConf.CHUNK_BASE64_STRING_ENABLED.key -> "true") {
+      checkEvaluation(Base64(Literal(longString.getBytes)), chunkEncoded)
+    }
+  }
+
   test("initcap unit test") {
     checkEvaluation(InitCap(Literal.create(null, StringType)), null)
     checkEvaluation(InitCap(Literal("a b")), "A B")

From 033f6a30189f4568c0bf25a20b8694f3291ea519 Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Tue, 16 Jul 2024 19:36:50 -0700
Subject: [PATCH 390/521] [SPARK-47307][DOCS][FOLLOWUP] Add a migration guide
 for the behavior change of base64 function

<!--
Thanks for sending a pull request!  Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://spark.apache.org/contributing.html
  2. Ensure you have added or run the appropriate tests for your PR: https://spark.apache.org/developer-tools.html
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][SPARK-XXXX] Your PR title ...'.
  4. Be sure to keep the PR description updated to reflect all changes.
  5. Please write your PR title to summarize what this PR proposes.
  6. If possible, provide a concise example to reproduce the issue for a faster review.
  7. If you want to add a new configuration, please read the guideline first for naming configurations in
     'core/src/main/scala/org/apache/spark/internal/config/ConfigEntry.scala'.
  8. If you want to add or modify an error type or message, please read the guideline first in
     'common/utils/src/main/resources/error/README.md'.
-->

### What changes were proposed in this pull request?

Follow up to #47303

Add a migration guide for the behavior change of `base64` function

### Why are the changes needed?
<!--
Please clarify why the changes are needed. For instance,
  1. If you propose a new API, clarify the use case for a new API.
  2. If you fix a bug, you can clarify why it is a bug.
-->

### Does this PR introduce _any_ user-facing change?
<!--
Note that it means *any* user-facing change including all aspects such as the documentation fix.
If yes, please clarify the previous behavior and the change this PR proposes - provide the console output, description and/or an example to show the behavior difference if possible.
If possible, please also clarify if this is a user-facing change compared to the released Spark versions or within the unreleased branches such as master.
If no, write 'No'.
-->
No

### How was this patch tested?
<!--
If tests were added, say they were added here. Please make sure to add some test cases that check the changes thoroughly including negative and positive cases if possible.
If it was tested in a way different from regular unit tests, please clarify how you tested step by step, ideally copy and paste-able, so that other reviewers can test and check, and descendants can verify in the future.
If tests were not added, please describe why they were not added and/or why it was difficult to add.
If benchmark tests were added, please run the benchmarks in GitHub Actions for the consistent environment, and the instructions could accord to: https://spark.apache.org/developer-tools.html#github-workflow-benchmarks.
-->
doc change

### Was this patch authored or co-authored using generative AI tooling?
<!--
If generative AI tooling has been used in the process of authoring this patch, please include the
phrase: 'Generated-by: ' followed by the name of the tool and its version.
If no, write 'No'.
Please refer to the [ASF Generative Tooling Guidance](https://www.apache.org/legal/generative-tooling.html) for details.
-->
No

Closes #47371 from wForget/SPARK-47307_doc.

Authored-by: wforget <643348094@qq.com>
Signed-off-by: allisonwang-db <allison.wang@databricks.com>
(cherry picked from commit b2e0a4dfb8d265635ef6199d24be8fa036d25786)
Signed-off-by: allisonwang-db <allison.wang@databricks.com>
---
 docs/sql-migration-guide.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 964f7de637e8b..a9b9765e87fe2 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -25,6 +25,7 @@ license: |
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType.
+- Since 3.5.2, the `base64` function will return a non-chunked string. To restore the behavior of chunking base64 encoded strings into lines of at most 76 characters, set `spark.sql.legacy.chunkBase64String.enabled` to `true`.
 
 ## Upgrading from Spark SQL 3.5.0 to 3.5.1
 

From ef1f377a39381557172a142cdca915f5cad1be51 Mon Sep 17 00:00:00 2001
From: Siying Dong <siying.dong@databricks.com>
Date: Wed, 17 Jul 2024 12:26:59 +0900
Subject: [PATCH 391/521] [SPARK-48889][SS] testStream to unload state stores
 before finishing

### What changes were proposed in this pull request?
In the end of each testStream() call, unload all state stores from the executor

### Why are the changes needed?
Currently, after a test, we don't unload state store or disable maintenance task. So after a test, the maintenance task can run and fail as the checkpoint directory is already deleted. This might cause an issue and fail the next test.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
See existing tests to pass

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47339 from siying/SPARK-48889.

Authored-by: Siying Dong <siying.dong@databricks.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 3a245558be882ae94f507976e4e4fb8c1d9bf344)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../scala/org/apache/spark/sql/streaming/StreamTest.scala  | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index cb7995abcd092..69e404a473834 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -809,6 +809,13 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
         case (key, None) => sparkSession.conf.unset(key)
       }
       sparkSession.streams.removeListener(listener)
+      // The state store is stopped here to unload all state stores and terminate all maintenance
+      // threads. It is necessary because the temp directory used by the checkpoint directory
+      // may be deleted soon after, and the maintenance thread may see unexpected error and
+      // cause unexpected behavior. Doing it after a test finishes might be too late because
+      // sometimes the checkpoint directory is under `withTempDir`, and in this case the temp
+      // directory is deleted before the test finishes.
+      StateStore.stop()
     }
   }
 

From 443825a60e0bae3c4a565122efd1d4ca1a3f2a55 Mon Sep 17 00:00:00 2001
From: William Hyun <william@apache.org>
Date: Tue, 16 Jul 2024 21:08:24 -0700
Subject: [PATCH 392/521] [SPARK-48920][BUILD][3.5] Upgrade ORC to 1.9.4

### What changes were proposed in this pull request?
This PR aims to upgrade ORC to 1.9.4 for Apache Spark 3.5.2.

### Why are the changes needed?
This is the latest version of ORC 1.9.x.
- https://orc.apache.org/news/2024/07/16/ORC-1.9.4/
- https://github.com/apache/orc/releases/tag/v1.9.4

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47379 from williamhyun/orc194.

Authored-by: William Hyun <william@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++---
 pom.xml                               | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 6f8054ae900b3..0ea9d120f0808 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.9.3/shaded-protobuf/orc-core-1.9.3-shaded-protobuf.jar
-orc-mapreduce/1.9.3/shaded-protobuf/orc-mapreduce-1.9.3-shaded-protobuf.jar
-orc-shims/1.9.3//orc-shims-1.9.3.jar
+orc-core/1.9.4/shaded-protobuf/orc-core-1.9.4-shaded-protobuf.jar
+orc-mapreduce/1.9.4/shaded-protobuf/orc-mapreduce-1.9.4-shaded-protobuf.jar
+orc-shims/1.9.4//orc-shims-1.9.4.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
diff --git a/pom.xml b/pom.xml
index f1a7a1618073a..e2af66989c8df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -141,7 +141,7 @@
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
     <parquet.version>1.13.1</parquet.version>
-    <orc.version>1.9.3</orc.version>
+    <orc.version>1.9.4</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
     <jetty.version>9.4.54.v20240208</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>

From 44f8766f77597c68a5ba5a6300bbc507f57290b4 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 17 Jul 2024 15:57:03 -0700
Subject: [PATCH 393/521] [SPARK-48930][CORE] Redact `awsAccessKeyId` by
 including `accesskey` pattern

### What changes were proposed in this pull request?

This PR aims to redact `awsAccessKeyId` by including `accesskey` pattern.

- **Apache Spark 4.0.0-preview1**
There is no point to redact `fs.s3a.access.key` because the same value is exposed via `fs.s3.awsAccessKeyId` like the following. We need to redact all.

```
$ AWS_ACCESS_KEY_ID=A AWS_SECRET_ACCESS_KEY=B bin/spark-shell
```

![Screenshot 2024-07-17 at 12 45 44](https://github.com/user-attachments/assets/e3040c5d-3eb9-4944-a6d6-5179b7647426)

### Why are the changes needed?

Since Apache Spark 1.1.0, `AWS_ACCESS_KEY_ID` is propagated like the following. However, Apache Spark does not redact them all consistently.
- #450

https://github.com/apache/spark/blob/5d16c3134c442a5546251fd7c42b1da9fdf3969e/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala#L481-L486

### Does this PR introduce _any_ user-facing change?

Users may see more redactions on configurations whose name contains `accesskey` case-insensitively. However, those configurations are highly likely to be related to the credentials.

### How was this patch tested?

Pass the CIs with the newly added test cases.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47392 from dongjoon-hyun/SPARK-48930.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 1e17c392b4def939d04e556084e7b48cca86412b)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../main/scala/org/apache/spark/internal/config/package.scala   | 2 +-
 core/src/test/scala/org/apache/spark/util/UtilsSuite.scala      | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index c5e23cae1f847..938d6ec2e01b0 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -1155,7 +1155,7 @@ package object config {
         "like YARN and event logs.")
       .version("2.1.2")
       .regexConf
-      .createWithDefault("(?i)secret|password|token|access[.]key".r)
+      .createWithDefault("(?i)secret|password|token|access[.]?key".r)
 
   private[spark] val STRING_REDACTION_PATTERN =
     ConfigBuilder("spark.redaction.string.regex")
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 7923e81949db6..1a7bfc64c23c7 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1093,6 +1093,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     // Set some secret keys
     val secretKeys = Seq(
       "spark.executorEnv.HADOOP_CREDSTORE_PASSWORD",
+      "spark.hadoop.fs.s3.awsAccessKeyId",
       "spark.hadoop.fs.s3a.access.key",
       "spark.my.password",
       "spark.my.sECreT")

From b1510127df952af90b7971684b91f6c0e804de24 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 18 Jul 2024 04:20:46 +0000
Subject: [PATCH 394/521] Preparing Spark release v3.5.2-rc1

---
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 2 +-
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 44 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 21330058f77dc..da05ae8eba11d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 9df20f8facf5c..e7e172fa7a136 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 27a53b0f9f3bd..80f09efb00486 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 93410815e6c05..33769c5c718b4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index a99b8b96402a6..dfa0da4bd1d2a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 98bc41807a3b7..101730cf66734 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 69cf167704742..158a5a14cac07 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 92d0a7e0b55f1..9b918826c7b45 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 6f7419316c6f4..f6d8ceb426739 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index c8fc16cac01a3..de4196fc73277 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 6febc5ee6bd64..4aa353992d5c9 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 3c07b63c50a53..669de24b8c131 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 804ff2ff15a15..d02193fc28bd8 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 053c14d1d35c8..f5132e5c8ae69 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index a06ae2ba9ea94..97de1b9f47936 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 29c5fb056b39f..ce6646241db40 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index f96ad6f8efb85..197a3b3df5848 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index fa950e028e95b..4f7c4e80466f6 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 4dda8b4833423..33b412061e359 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index c8607c3a1ae96..2ab75e151c988 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 5152fb19f761e..9526226882e70 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index c51636093f2ef..221dc8d9961c0 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d5a46afdf17ae..688b4d11c8814 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 472bd73db0d3c..03c391eaad7d9 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,7 +19,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2-SNAPSHOT
+SPARK_VERSION: 3.5.2
 SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
diff --git a/examples/pom.xml b/examples/pom.xml
index bf1e79eea3bc3..3b7b63dd68da0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index f55a607de2216..cacb8684e99e6 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index e6c704d74460a..5a4130b8f218f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 0833607007cb2..c9b3bbc284272 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 356d4c99c6720..2d0425dabf847 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 879354a6bce64..39b977c60635b 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index e2af66989c8df..1739a9a051aba 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2-SNAPSHOT</version>
+  <version>3.5.2</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 20ee673c93364..6756acf033661 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2.dev0"
+__version__: str = "3.5.2"
diff --git a/repl/pom.xml b/repl/pom.xml
index f1769b2eff14c..338a9779e9207 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 5a46f78a60b8a..1cb35dd8ef7f6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 58e4bb92f4409..74c0753867071 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index a62a3a6505e3f..da43510900d74 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 05cea60346667..862acd8affb8d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index e6269f9654fb0..1011a6239c2fd 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 6833f203b3fad..aa6bc4add957c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19826701fc137..2a1f09ed75e7f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 518e280ad2499..8cf30e4e19170 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index bf5e8676ec158..494fa6522ee43 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6d3f98bdd2674..72abd933503fc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index d49f2b94cae78..dfb594060e345 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From d556740b13b598639bdbc2f00ea7af9a21831134 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 18 Jul 2024 04:20:51 +0000
Subject: [PATCH 395/521] Preparing development version 3.5.3-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da05ae8eba11d..ed09ea1725da5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index e7e172fa7a136..12b57da00234a 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 80f09efb00486..e6e0a2660b45d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 33769c5c718b4..c39d849494e9c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dfa0da4bd1d2a..1c0173d35139c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 101730cf66734..58ec4e57e5158 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 158a5a14cac07..5512ed8012ae2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b918826c7b45..159064b0444c9 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index f6d8ceb426739..d31f3ea7b1ac1 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index de4196fc73277..b15c6dd36ba74 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 4aa353992d5c9..317e663af153f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 669de24b8c131..6f3d683d9cb12 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index d02193fc28bd8..60c807b169699 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index f5132e5c8ae69..81c0826bdd189 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 97de1b9f47936..f16f47db18876 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index ce6646241db40..8348a32b2e238 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 197a3b3df5848..8db62e2020e35 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 4f7c4e80466f6..1d076d4d62f10 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 33b412061e359..c5ba0abd803d7 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 2ab75e151c988..3ba3ce45a726d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9526226882e70..a6cc9248f17a6 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 221dc8d9961c0..034f45fc2af88 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 688b4d11c8814..a3a02cbd1c565 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 03c391eaad7d9..b7abf37080d8d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 3b7b63dd68da0..6648ec2cc7cb1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cacb8684e99e6..22a8b916c4b14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 5a4130b8f218f..fca45d2df0c35 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c9b3bbc284272..75c85d66a935a 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 2d0425dabf847..9ef674c93081f 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 39b977c60635b..f6640e4705964 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 1739a9a051aba..be910d24193df 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2</version>
+  <version>3.5.3-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6756acf033661..c779e9442f6b3 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2"
+__version__: str = "3.5.3.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 338a9779e9207..e8c2436f109c3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 1cb35dd8ef7f6..e1ad633dff2ff 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 74c0753867071..91fc5a94fa747 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index da43510900d74..d5d0c0125fce9 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 862acd8affb8d..2d6f7f87247c4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1011a6239c2fd..818cb24cac80a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index aa6bc4add957c..89cc3b9560a76 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 2a1f09ed75e7f..199597a36349d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8cf30e4e19170..1e4ad6f456371 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 494fa6522ee43..ea65b59390c83 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 72abd933503fc..6601c459741f0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dfb594060e345..3f7721c9bcff5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From f07a54743c574bf703b406d0deba0b6e21a54273 Mon Sep 17 00:00:00 2001
From: Yi Wu <yi.wu@databricks.com>
Date: Thu, 18 Jul 2024 15:37:20 +0800
Subject: [PATCH 396/521] [SPARK-48791][CORE][3.5] Fix perf regression caused
 by the accumulators registration overhead using CopyOnWriteArrayList

This PR backports https://github.com/apache/spark/pull/47197 to branch-3.5.

### What changes were proposed in this pull request?

This PR proposes to use the `ArrayBuffer` together with the read/write lock rather than `CopyOnWriteArrayList` for `TaskMetrics._externalAccums`.

### Why are the changes needed?

Fix the perf regression that caused by the accumulators registration overhead using `CopyOnWriteArrayList`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Manually tested.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47297 from Ngone51/SPARK-48791-3.5.

Authored-by: Yi Wu <yi.wu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 core/pom.xml                                  |  4 ++
 .../apache/spark/util/ArrayImplicits.scala    | 35 +++++++++++++++
 .../apache/spark/util/ArrayImplicits.scala    | 35 +++++++++++++++
 .../apache/spark/executor/TaskMetrics.scala   | 43 ++++++++++++++-----
 mllib-local/pom.xml                           | 12 ++++++
 mllib/pom.xml                                 | 10 +++++
 pom.xml                                       |  6 +++
 7 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100644 core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala
 create mode 100644 core/src/main/scala-2.13/org/apache/spark/util/ArrayImplicits.scala

diff --git a/core/pom.xml b/core/pom.xml
index a3a02cbd1c565..a326e41b8e233 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -243,6 +243,10 @@
       <groupId>org.scala-lang.modules</groupId>
       <artifactId>scala-xml_${scala.binary.version}</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.scala-lang</groupId>
       <artifactId>scala-library</artifactId>
diff --git a/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala b/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala
new file mode 100644
index 0000000000000..82c6c75bd51a9
--- /dev/null
+++ b/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import scala.collection.compat.immutable
+
+/**
+ * Implicit methods related to Scala Array.
+ */
+private[spark] object ArrayImplicits {
+
+  implicit class SparkArrayOps[T](xs: Array[T]) {
+
+    /**
+     * Wraps an Array[T] as an immutable.ArraySeq[T] without copying.
+     */
+    def toImmutableArraySeq: immutable.ArraySeq[T] =
+      immutable.ArraySeq.unsafeWrapArray(xs)
+  }
+}
diff --git a/core/src/main/scala-2.13/org/apache/spark/util/ArrayImplicits.scala b/core/src/main/scala-2.13/org/apache/spark/util/ArrayImplicits.scala
new file mode 100644
index 0000000000000..38c2a415af3db
--- /dev/null
+++ b/core/src/main/scala-2.13/org/apache/spark/util/ArrayImplicits.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import scala.collection.immutable
+
+/**
+ * Implicit methods related to Scala Array.
+ */
+private[spark] object ArrayImplicits {
+
+  implicit class SparkArrayOps[T](xs: Array[T]) {
+
+    /**
+     * Wraps an Array[T] as an immutable.ArraySeq[T] without copying.
+     */
+    def toImmutableArraySeq: immutable.ArraySeq[T] =
+      immutable.ArraySeq.unsafeWrapArray(xs)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index d446104cb6421..468969b8d3322 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.executor
 
-import java.util.concurrent.CopyOnWriteArrayList
+import java.util.concurrent.locks.ReentrantReadWriteLock
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, LinkedHashMap}
@@ -29,6 +29,7 @@ import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.storage.{BlockId, BlockStatus}
 import org.apache.spark.util._
+import org.apache.spark.util.ArrayImplicits._
 
 
 /**
@@ -150,6 +151,11 @@ class TaskMetrics private[spark] () extends Serializable {
   private[spark] def setUpdatedBlockStatuses(v: Seq[(BlockId, BlockStatus)]): Unit =
     _updatedBlockStatuses.setValue(v.asJava)
 
+  private val (readLock, writeLock) = {
+    val lock = new ReentrantReadWriteLock()
+    (lock.readLock(), lock.writeLock())
+  }
+
   /**
    * Metrics related to reading data from a [[org.apache.spark.rdd.HadoopRDD]] or from persisted
    * data, defined only in tasks with input.
@@ -264,12 +270,32 @@ class TaskMetrics private[spark] () extends Serializable {
   /**
    * External accumulators registered with this task.
    */
-  @transient private[spark] lazy val _externalAccums = new CopyOnWriteArrayList[AccumulatorV2[_, _]]
+  @transient private[spark] lazy val _externalAccums = new ArrayBuffer[AccumulatorV2[_, _]]
 
-  private[spark] def externalAccums = _externalAccums.asScala
+  private[spark] def externalAccums: Seq[AccumulatorV2[_, _]] = withReadLock {
+    _externalAccums.toArray.toImmutableArraySeq
+  }
+
+  private def withReadLock[B](fn: => B): B = {
+    readLock.lock()
+    try {
+      fn
+    } finally {
+      readLock.unlock()
+    }
+  }
+
+  private def withWriteLock[B](fn: => B): B = {
+    writeLock.lock()
+    try {
+      fn
+    } finally {
+      writeLock.unlock()
+    }
+  }
 
-  private[spark] def registerAccumulator(a: AccumulatorV2[_, _]): Unit = {
-    _externalAccums.add(a)
+  private[spark] def registerAccumulator(a: AccumulatorV2[_, _]): Unit = withWriteLock {
+    _externalAccums += a
   }
 
   private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = internalAccums ++ externalAccums
@@ -328,19 +354,16 @@ private[spark] object TaskMetrics extends Logging {
    */
   def fromAccumulators(accums: Seq[AccumulatorV2[_, _]]): TaskMetrics = {
     val tm = new TaskMetrics
-    val externalAccums = new java.util.ArrayList[AccumulatorV2[Any, Any]]()
     for (acc <- accums) {
       val name = acc.name
-      val tmpAcc = acc.asInstanceOf[AccumulatorV2[Any, Any]]
       if (name.isDefined && tm.nameToAccums.contains(name.get)) {
         val tmAcc = tm.nameToAccums(name.get).asInstanceOf[AccumulatorV2[Any, Any]]
         tmAcc.metadata = acc.metadata
-        tmAcc.merge(tmpAcc)
+        tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
       } else {
-        externalAccums.add(tmpAcc)
+        tm._externalAccums += acc
       }
     }
-    tm._externalAccums.addAll(externalAccums)
     tm
   }
 }
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 9ef674c93081f..4e00a7b2dc9b8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -37,7 +37,19 @@
     <dependency>
       <groupId>org.scalanlp</groupId>
       <artifactId>breeze_${scala.binary.version}</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-math3</artifactId>
diff --git a/mllib/pom.xml b/mllib/pom.xml
index f6640e4705964..b9e6e40583461 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -96,10 +96,20 @@
       <groupId>org.scala-lang.modules</groupId>
       <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+    </dependency>
     --><!-- #endif scala-2.13 -->
     <dependency>
       <groupId>org.scalanlp</groupId>
       <artifactId>breeze_${scala.binary.version}</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
diff --git a/pom.xml b/pom.xml
index be910d24193df..57bae938891d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -174,6 +174,7 @@
     <commons.collections4.version>4.4</commons.collections4.version>
     <scala.version>2.12.18</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
+    <scala-collection-compat.version>2.7.0</scala-collection-compat.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
     <!-- dont update scala-maven-plugin to version 4.8.1 SPARK-42809 and SPARK-43595 -->   
     <scala-maven-plugin.version>4.8.0</scala-maven-plugin.version>
@@ -1102,6 +1103,11 @@
         <artifactId>scala-xml_${scala.binary.version}</artifactId>
         <version>2.1.0</version>
       </dependency>
+      <dependency>
+        <groupId>org.scala-lang.modules</groupId>
+        <artifactId>scala-collection-compat_${scala.binary.version}</artifactId>
+        <version>${scala-collection-compat.version}</version>
+      </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-compiler</artifactId>

From e2479159ee85ffdbadf5caf2fbf7b5e28add5f1f Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Thu, 18 Jul 2024 15:21:01 -0700
Subject: [PATCH 397/521] [SPARK-48921][SQL][3.5] ScalaUDF encoders in subquery
 should be resolved for MergeInto

### What changes were proposed in this pull request?

We got a customer issue that a `MergeInto` query on Iceberg table works earlier but cannot work after upgrading to Spark 3.4.

The error looks like

```
Caused by: org.apache.spark.SparkRuntimeException: Error while decoding: org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to nullable on unresolved object
upcast(getcolumnbyordinal(0, StringType), StringType, - root class: java.lang.String).toString.
```

The source table of `MergeInto` uses `ScalaUDF`. The error happens when Spark invokes the deserializer of input encoder of the `ScalaUDF` and the deserializer is not resolved yet.

The encoders of ScalaUDF are resolved by the rule `ResolveEncodersInUDF` which will be applied at the end of analysis phase.

During rewriting `MergeInto` to `ReplaceData` query, Spark creates an `Exists` subquery and `ScalaUDF` is part of the plan of the subquery. Note that the `ScalaUDF` is already resolved by the analyzer.

Then, in `ResolveSubquery` rule which resolves the subquery, it will resolve the subquery plan if it is not resolved yet. Because the subquery containing `ScalaUDF` is resolved, the rule skips it so `ResolveEncodersInUDF` won't be applied on it. So the analyzed `ReplaceData` query contains a `ScalaUDF` with encoders unresolved that cause the error.

This patch modifies `ResolveSubquery` so it will resolve subquery plan if it is not analyzed to make sure subquery plan is fully analyzed.

This patch moves `ResolveEncodersInUDF` rule before rewriting `MergeInto` to make sure the `ScalaUDF` in the subquery plan is fully analyzed.

### Why are the changes needed?

Fixing production query error.

### Does this PR introduce _any_ user-facing change?

Yes, fixing user-facing issue.

### How was this patch tested?

Manually test with `MergeInto` query and add an unit test.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47406 from viirya/fix_subquery_resolve_3.5.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 11 ++-
 .../analysis/ResolveEncodersInUDFSuite.scala  | 95 +++++++++++++++++++
 2 files changed, 103 insertions(+), 3 deletions(-)
 create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveEncodersInUDFSuite.scala

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index e63621d6a236b..4388740a409d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -325,9 +325,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       new ResolveIdentifierClause(earlyBatches) ::
       ResolveUnion ::
       ResolveRowLevelCommandAssignments ::
-      RewriteDeleteFromTable ::
-      RewriteUpdateTable ::
-      RewriteMergeIntoTable ::
       typeCoercionRules ++
       Seq(
         ResolveWithCTE,
@@ -351,6 +348,14 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       UpdateAttributeNullability),
     Batch("UDF", Once,
       ResolveEncodersInUDF),
+    // The rewrite rules might move resolved query plan into subquery. Once the resolved plan
+    // contains ScalaUDF, their encoders won't be resolved if `ResolveEncodersInUDF` is not
+    // applied before the rewrite rules. So we need to apply `ResolveEncodersInUDF` before the
+    // rewrite rules.
+    Batch("DML rewrite", fixedPoint,
+      RewriteDeleteFromTable,
+      RewriteUpdateTable,
+      RewriteMergeIntoTable),
     Batch("Subquery", Once,
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveEncodersInUDFSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveEncodersInUDFSuite.scala
new file mode 100644
index 0000000000000..c7391ad9a4305
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveEncodersInUDFSuite.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Exists, ScalaUDF}
+import org.apache.spark.sql.catalyst.plans.logical.{Assignment, Filter, MergeIntoTable, ReplaceData, UpdateAction}
+import org.apache.spark.sql.catalyst.trees.TreePattern
+import org.apache.spark.sql.connector.catalog.InMemoryRowLevelOperationTable
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.{DoubleType, IntegerType, StringType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class ResolveEncodersInUDFSuite extends AnalysisTest {
+  test("SPARK-48921: ScalaUDF encoders in subquery should be resolved for MergeInto") {
+    val table = new InMemoryRowLevelOperationTable("table",
+      StructType(StructField("a", IntegerType) ::
+        StructField("b", DoubleType) ::
+        StructField("c", StringType) :: Nil),
+      Array.empty,
+      new java.util.HashMap[String, String]()
+    )
+    val relation = DataSourceV2Relation(table,
+      Seq(AttributeReference("a", IntegerType)(),
+        AttributeReference("b", DoubleType)(),
+        AttributeReference("c", StringType)()),
+      None,
+      None,
+      CaseInsensitiveStringMap.empty()
+    )
+
+
+    val string = relation.output(2)
+    val udf = ScalaUDF((_: String) => "x", StringType, string :: Nil,
+      Option(ExpressionEncoder[String]()) :: Nil)
+
+    val mergeIntoSource =
+      relation
+        .where($"c" === udf)
+        .select($"a", $"b")
+        .limit(1)
+    val cond = mergeIntoSource.output(0) == relation.output(0) &&
+      mergeIntoSource.output(1) == relation.output(1)
+
+    val mergePlan = MergeIntoTable(
+      relation,
+      mergeIntoSource,
+      cond,
+      Seq(UpdateAction(None,
+        Seq(Assignment(relation.output(0), relation.output(0)),
+          Assignment(relation.output(1), relation.output(1)),
+          Assignment(relation.output(2), relation.output(2))))),
+      Seq.empty,
+      Seq.empty)
+
+    val replaceData = mergePlan.analyze.asInstanceOf[ReplaceData]
+
+    val existsPlans = replaceData.groupFilterCondition.map(_.collect {
+      case e: Exists =>
+        e.plan.collect {
+          case f: Filter if f.containsPattern(TreePattern.SCALA_UDF) => f
+        }
+    }.flatten)
+
+    assert(existsPlans.isDefined)
+
+    val udfs = existsPlans.get.map(_.expressions.flatMap(e => e.collect {
+      case s: ScalaUDF =>
+        assert(s.inputEncoders.nonEmpty)
+        val encoder = s.inputEncoders.head
+        assert(encoder.isDefined)
+        assert(encoder.get.objDeserializer.resolved)
+
+        s
+    })).flatten
+    assert(udfs.size == 1)
+  }
+}

From 66dce6d09c8832a0dabc3b79eaee8f6c3aef0d73 Mon Sep 17 00:00:00 2001
From: Siying Dong <siying.dong@databricks.com>
Date: Fri, 19 Jul 2024 07:30:33 +0900
Subject: [PATCH 398/521] [SPARK-48934][SS] Python datetime types converted
 incorrectly for setting timeout in applyInPandasWithState

Fix the way applyInPandasWithState's setTimeoutTimestamp() handles argument of datetime

In applyInPandasWithState(), when state.setTimeoutTimestamp() is passed in with datetime.datetime type, it doesn't function as expected. Fix it.
Also, fix another bug of reporting VALUE_NOT_POSITIVE. This issue will trigger when the converted value is 0.

No.

Add unit test coverage for thie scenario

No.

Closes #47398 from siying/state_set_timeout.

Lead-authored-by: Siying Dong <siying.dong@databricks.com>
Co-authored-by: Siying Dong <dong.sy@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit a54daa196c1c554e537435cf77c23001d16d9428)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 python/pyspark/sql/streaming/state.py         |   8 +-
 .../FlatMapGroupsInPandasWithStateSuite.scala | 166 ++++++++++--------
 2 files changed, 92 insertions(+), 82 deletions(-)

diff --git a/python/pyspark/sql/streaming/state.py b/python/pyspark/sql/streaming/state.py
index 8bf01b3ebd983..1d375de04b49b 100644
--- a/python/pyspark/sql/streaming/state.py
+++ b/python/pyspark/sql/streaming/state.py
@@ -18,7 +18,7 @@
 import json
 from typing import Tuple, Optional
 
-from pyspark.sql.types import DateType, Row, StructType
+from pyspark.sql.types import Row, StructType, TimestampType
 from pyspark.sql.utils import has_numpy
 from pyspark.errors import PySparkTypeError, PySparkValueError
 
@@ -195,7 +195,7 @@ def setTimeoutDuration(self, durationMs: int) -> None:
                 error_class="VALUE_NOT_POSITIVE",
                 message_parameters={
                     "arg_name": "durationMs",
-                    "arg_type": type(durationMs).__name__,
+                    "arg_value": type(durationMs).__name__,
                 },
             )
         self._timeout_timestamp = durationMs + self._batch_processing_time_ms
@@ -214,14 +214,14 @@ def setTimeoutTimestamp(self, timestampMs: int) -> None:
             )
 
         if isinstance(timestampMs, datetime.datetime):
-            timestampMs = DateType().toInternal(timestampMs)
+            timestampMs = TimestampType().toInternal(timestampMs) / 1000
 
         if timestampMs <= 0:
             raise PySparkValueError(
                 error_class="VALUE_NOT_POSITIVE",
                 message_parameters={
                     "arg_name": "timestampMs",
-                    "arg_type": type(timestampMs).__name__,
+                    "arg_value": type(timestampMs).__name__,
                 },
             )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
index 20fb17fe6ec2a..b3a4b8e3d3cbe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
@@ -366,91 +366,101 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
     )
   }
 
-  test("applyInPandasWithState - streaming w/ event time timeout + watermark") {
-    assume(shouldTestPandasUDFs)
+  Seq(true, false).map { ifUseDateTimeType =>
+    test("applyInPandasWithState - streaming w/ event time timeout + watermark " +
+      s"ifUseDateTimeType=$ifUseDateTimeType") {
+      assume(shouldTestPandasUDFs)
 
-    // timestamp_seconds assumes the base timezone is UTC. However, the provided function
-    // localizes it. Therefore, this test assumes the timezone is in UTC
-    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
-      val pythonScript =
-        """
-          |import calendar
-          |import os
-          |import datetime
-          |import pandas as pd
-          |from pyspark.sql.types import StructType, StringType, StructField, IntegerType
-          |
-          |tpe = StructType([
-          |    StructField("key", StringType()),
-          |    StructField("maxEventTimeSec", IntegerType())])
-          |
-          |def func(key, pdf_iter, state):
-          |    assert state.getCurrentProcessingTimeMs() >= 0
-          |    assert state.getCurrentWatermarkMs() >= -1
-          |
-          |    timeout_delay_sec = 5
-          |    if state.hasTimedOut:
-          |        state.remove()
-          |        yield pd.DataFrame({'key': [key[0]], 'maxEventTimeSec': [-1]})
-          |    else:
-          |        m = state.getOption
-          |        if m is None:
-          |            max_event_time_sec = 0
-          |        else:
-          |            max_event_time_sec = m[0]
-          |
-          |        for pdf in pdf_iter:
-          |            pser = pdf.eventTime.apply(
-          |                lambda dt: (int(calendar.timegm(dt.utctimetuple()) + dt.microsecond)))
-          |            max_event_time_sec = int(max(pser.max(), max_event_time_sec))
-          |
-          |        state.update((max_event_time_sec,))
-          |        timeout_timestamp_sec = max_event_time_sec + timeout_delay_sec
-          |        state.setTimeoutTimestamp(timeout_timestamp_sec * 1000)
-          |        yield pd.DataFrame({'key': [key[0]],
-          |                            'maxEventTimeSec': [max_event_time_sec]})
-          |""".stripMargin
-      val pythonFunc = TestGroupedMapPandasUDFWithState(
-        name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+      // timestamp_seconds assumes the base timezone is UTC. However, the provided function
+      // localizes it. Therefore, this test assumes the timezone is in UTC
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+        val timeoutMs = if (ifUseDateTimeType) {
+          "datetime.datetime.fromtimestamp(timeout_timestamp_sec)"
+        } else {
+          "timeout_timestamp_sec * 1000"
+        }
 
-      val inputData = MemoryStream[(String, Int)]
-      val inputDataDF =
-        inputData.toDF.select($"_1".as("key"), timestamp_seconds($"_2").as("eventTime"))
-      val outputStructType = StructType(
-        Seq(
-          StructField("key", StringType),
-          StructField("maxEventTimeSec", IntegerType)))
-      val stateStructType = StructType(Seq(StructField("maxEventTimeSec", LongType)))
-      val result =
-        inputDataDF
-          .withWatermark("eventTime", "10 seconds")
-          .groupBy("key")
-          .applyInPandasWithState(
-            pythonFunc(inputDataDF("key"), inputDataDF("eventTime")).expr.asInstanceOf[PythonUDF],
-            outputStructType,
-            stateStructType,
-            "Update",
-            "EventTimeTimeout")
+        val pythonScript =
+          s"""
+             |import calendar
+             |import os
+             |import datetime
+             |import pandas as pd
+             |from pyspark.sql.types import StructType, StringType, StructField, IntegerType
+             |
+             |tpe = StructType([
+             |    StructField("key", StringType()),
+             |    StructField("maxEventTimeSec", IntegerType())])
+             |
+             |def func(key, pdf_iter, state):
+             |    assert state.getCurrentProcessingTimeMs() >= 0
+             |    assert state.getCurrentWatermarkMs() >= -1
+             |
+             |    timeout_delay_sec = 5
+             |    if state.hasTimedOut:
+             |        state.remove()
+             |        yield pd.DataFrame({'key': [key[0]], 'maxEventTimeSec': [-1]})
+             |    else:
+             |        m = state.getOption
+             |        if m is None:
+             |            max_event_time_sec = 0
+             |        else:
+             |            max_event_time_sec = m[0]
+             |
+             |        for pdf in pdf_iter:
+             |            pser = pdf.eventTime.apply(
+             |                lambda dt: (int(calendar.timegm(dt.utctimetuple()) + dt.microsecond)))
+             |            max_event_time_sec = int(max(pser.max(), max_event_time_sec))
+             |
+             |        state.update((max_event_time_sec,))
+             |        timeout_timestamp_sec = max_event_time_sec + timeout_delay_sec
+             |        state.setTimeoutTimestamp($timeoutMs)
+             |        yield pd.DataFrame({'key': [key[0]],
+             |                            'maxEventTimeSec': [max_event_time_sec]})
+             |""".stripMargin.format("")
+        val pythonFunc = TestGroupedMapPandasUDFWithState(
+          name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
 
-      testStream(result, Update)(
-        StartStream(),
+        val inputData = MemoryStream[(String, Int)]
+        val inputDataDF =
+          inputData.toDF().select($"_1".as("key"), timestamp_seconds($"_2").as("eventTime"))
+        val outputStructType = StructType(
+          Seq(
+            StructField("key", StringType),
+            StructField("maxEventTimeSec", IntegerType)))
+        val stateStructType = StructType(Seq(StructField("maxEventTimeSec", LongType)))
+        val result =
+          inputDataDF
+            .withWatermark("eventTime", "10 seconds")
+            .groupBy("key")
+            .applyInPandasWithState(
+              pythonFunc(inputDataDF("key"), inputDataDF("eventTime")).expr.asInstanceOf[PythonUDF],
+              outputStructType,
+              stateStructType,
+              "Update",
+              "EventTimeTimeout")
 
-        AddData(inputData, ("a", 11), ("a", 13), ("a", 15)),
-        // Max event time = 15. Timeout timestamp for "a" = 15 + 5 = 20. Watermark = 15 - 10 = 5.
-        CheckNewAnswer(("a", 15)), // Output = max event time of a
+        testStream(result, Update)(
+          StartStream(),
 
-        AddData(inputData, ("a", 4)), // Add data older than watermark for "a"
-        CheckNewAnswer(), // No output as data should get filtered by watermark
+          AddData(inputData, ("a", 11), ("a", 13), ("a", 15)),
+          // Max event time = 15. Timeout timestamp for "a" = 15 + 5 = 20. Watermark = 15 - 10 = 5.
+          CheckNewAnswer(("a", 15)), // Output = max event time of a
 
-        AddData(inputData, ("a", 10)), // Add data newer than watermark for "a"
-        CheckNewAnswer(("a", 15)), // Max event time is still the same
-        // Timeout timestamp for "a" is still 20 as max event time for "a" is still 15.
-        // Watermark is still 5 as max event time for all data is still 15.
+          AddData(inputData, ("a", 4)), // Add data older than watermark for "a"
+          CheckNewAnswer(), // No output as data should get filtered by watermark
 
-        AddData(inputData, ("b", 31)), // Add data newer than watermark for "b", not "a"
-        // Watermark = 31 - 10 = 21, so "a" should be timed out as timeout timestamp for "a" is 20.
-        CheckNewAnswer(("a", -1), ("b", 31)) // State for "a" should timeout and emit -1
-      )
+          AddData(inputData, ("a", 10)), // Add data newer than watermark for "a"
+          CheckNewAnswer(("a", 15)), // Max event time is still the same
+          // Timeout timestamp for "a" is still 20 as max event time for "a" is still 15.
+          // Watermark is still 5 as max event time for all data is still 15.
+
+          AddData(inputData, ("b", 31)), // Add data newer than watermark for "b", not "a"
+          // Watermark = 31 - 10 = 21, so "a" should be timed out as timeout timestamp for "a" is
+          // 20.
+          CheckNewAnswer(("a", -1), ("b", 31)) // State for "a" should timeout and emit -1
+        )
+      }
     }
   }
 

From 5a09a7daeb8aedbfd0d88da2da6ca89e4fe83ed0 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 19 Jul 2024 10:27:13 +0800
Subject: [PATCH 399/521] Revert "[SPARK-47307][DOCS][FOLLOWUP] Add a migration
 guide for the behavior change of base64 function"

This reverts commit b2e0a4dfb8d265635ef6199d24be8fa036d25786.
---
 docs/sql-migration-guide.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index a9b9765e87fe2..964f7de637e8b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -25,7 +25,6 @@ license: |
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType.
-- Since 3.5.2, the `base64` function will return a non-chunked string. To restore the behavior of chunking base64 encoded strings into lines of at most 76 characters, set `spark.sql.legacy.chunkBase64String.enabled` to `true`.
 
 ## Upgrading from Spark SQL 3.5.0 to 3.5.1
 

From 28d33e37e2dfe525e7189b109be92c9eb24903a8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 19 Jul 2024 15:27:26 +0800
Subject: [PATCH 400/521] [SPARK-48498][SQL][FOLLOWUP] do padding for char-char
 comparison

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/46832 to handle a missing case: char-char comparison. We should pad both sides if `READ_SIDE_CHAR_PADDING` is not enabled.

### Why are the changes needed?

bug fix if people disable read side char padding

### Does this PR introduce _any_ user-facing change?

No because it's a followup and the original PR is not released yet

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47412 from cloud-fan/char.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/util/CharVarcharUtils.scala  | 14 ++++++----
 .../datasources/ApplyCharTypePadding.scala    |  5 ++--
 .../spark/sql/CharVarcharTestSuite.scala      | 28 ++++++++++++++++---
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index f3c272785a7be..87982e7a5f0bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -238,14 +238,14 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
    * attributes. When comparing two char type columns/fields, we need to pad the shorter one to
    * the longer length.
    */
-  def addPaddingInStringComparison(attrs: Seq[Attribute]): Seq[Expression] = {
+  def addPaddingInStringComparison(attrs: Seq[Attribute], alwaysPad: Boolean): Seq[Expression] = {
     val rawTypes = attrs.map(attr => getRawType(attr.metadata))
     if (rawTypes.exists(_.isEmpty)) {
       attrs
     } else {
       val typeWithTargetCharLength = rawTypes.map(_.get).reduce(typeWithWiderCharLength)
       attrs.zip(rawTypes.map(_.get)).map { case (attr, rawType) =>
-        padCharToTargetLength(attr, rawType, typeWithTargetCharLength).getOrElse(attr)
+        padCharToTargetLength(attr, rawType, typeWithTargetCharLength, alwaysPad).getOrElse(attr)
       }
     }
   }
@@ -268,9 +268,10 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
   private def padCharToTargetLength(
       expr: Expression,
       rawType: DataType,
-      typeWithTargetCharLength: DataType): Option[Expression] = {
+      typeWithTargetCharLength: DataType,
+      alwaysPad: Boolean): Option[Expression] = {
     (rawType, typeWithTargetCharLength) match {
-      case (CharType(len), CharType(target)) if target > len =>
+      case (CharType(len), CharType(target)) if alwaysPad || target > len =>
         Some(StringRPad(expr, Literal(target)))
 
       case (StructType(fields), StructType(targets)) =>
@@ -281,7 +282,8 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
         while (i < fields.length) {
           val field = fields(i)
           val fieldExpr = GetStructField(expr, i, Some(field.name))
-          val padded = padCharToTargetLength(fieldExpr, field.dataType, targets(i).dataType)
+          val padded = padCharToTargetLength(
+            fieldExpr, field.dataType, targets(i).dataType, alwaysPad)
           needPadding = padded.isDefined
           createStructExprs += Literal(field.name)
           createStructExprs += padded.getOrElse(fieldExpr)
@@ -291,7 +293,7 @@ object CharVarcharUtils extends Logging with SparkCharVarcharUtils {
 
       case (ArrayType(et, containsNull), ArrayType(target, _)) =>
         val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
-        padCharToTargetLength(param, et, target).map { padded =>
+        padCharToTargetLength(param, et, target, alwaysPad).map { padded =>
           val func = LambdaFunction(padded, Seq(param))
           ArrayTransform(expr, func)
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
index 1b7b0d702ab98..141767135a509 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -137,7 +137,8 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
             case (_, _: OuterReference) => Seq(right)
             case _ => Nil
           }
-          val newChildren = CharVarcharUtils.addPaddingInStringComparison(Seq(left, right))
+          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
+            Seq(left, right), padCharCol)
           if (outerRefs.nonEmpty) {
             b.withNewChildren(newChildren.map(_.transform {
               case a: Attribute if outerRefs.exists(_.semanticEquals(a)) => OuterReference(a)
@@ -148,7 +149,7 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
 
         case i @ In(e @ AttrOrOuterRef(attr), list) if list.forall(_.isInstanceOf[Attribute]) =>
           val newChildren = CharVarcharUtils.addPaddingInStringComparison(
-            attr +: list.map(_.asInstanceOf[Attribute]))
+            attr +: list.map(_.asInstanceOf[Attribute]), padCharCol)
           if (e.isInstanceOf[OuterReference]) {
             i.copy(
               value = newChildren.head.transform {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 6732e8d41fea4..c5d34a33a0abe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -1069,25 +1069,45 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
     import testImplicits._
     withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false") {
       withTempPath { dir =>
-        withTable("t") {
+        withTable("t1", "t2") {
           Seq(
             "12" -> "12",
             "12" -> "12 ",
             "12 " -> "12",
             "12 " -> "12 "
           ).toDF("c1", "c2").write.format(format).save(dir.toString)
-          sql(s"CREATE TABLE t (c1 CHAR(3), c2 STRING) USING $format LOCATION '$dir'")
+
+          sql(s"CREATE TABLE t1 (c1 CHAR(3), c2 STRING) USING $format LOCATION '$dir'")
           // Comparing CHAR column with STRING column directly compares the stored value.
           checkAnswer(
-            sql("SELECT c1 = c2 FROM t"),
+            sql("SELECT c1 = c2 FROM t1"),
+            Seq(Row(true), Row(false), Row(false), Row(true))
+          )
+          checkAnswer(
+            sql("SELECT c1 IN (c2) FROM t1"),
             Seq(Row(true), Row(false), Row(false), Row(true))
           )
           // No matter the CHAR type value is padded or not in the storage, we should always pad it
           // before comparison with STRING literals.
           checkAnswer(
-            sql("SELECT c1 = '12', c1 = '12 ', c1 = '12  ' FROM t WHERE c2 = '12'"),
+            sql("SELECT c1 = '12', c1 = '12 ', c1 = '12  ' FROM t1 WHERE c2 = '12'"),
             Seq(Row(true, true, true), Row(true, true, true))
           )
+          checkAnswer(
+            sql("SELECT c1 IN ('12'), c1 IN ('12 '), c1 IN ('12  ') FROM t1 WHERE c2 = '12'"),
+            Seq(Row(true, true, true), Row(true, true, true))
+          )
+
+          sql(s"CREATE TABLE t2 (c1 CHAR(3), c2 CHAR(5)) USING $format LOCATION '$dir'")
+          // Comparing CHAR column with CHAR column compares the padded values.
+          checkAnswer(
+            sql("SELECT c1 = c2, c2 = c1 FROM t2"),
+            Seq(Row(true, true), Row(true, true), Row(true, true), Row(true, true))
+          )
+          checkAnswer(
+            sql("SELECT c1 IN (c2), c2 IN (c1) FROM t2"),
+            Seq(Row(true, true), Row(true, true), Row(true, true), Row(true, true))
+          )
         }
       }
     }

From ea53ea71461508801586b1e5677aa6011df7cd95 Mon Sep 17 00:00:00 2001
From: wforget <643348094@qq.com>
Date: Sat, 20 Jul 2024 15:51:26 +0800
Subject: [PATCH 401/521] [SPARK-47307][SQL][FOLLOWUP][3.5] Promote
 spark.sql.legacy.chunkBase64String.enabled from a legacy/internal config to a
 regular/public one

Backports #47410 to 3.5

### What changes were proposed in this pull request?

+ Promote spark.sql.legacy.chunkBase64String.enabled from a legacy/internal config to a regular/public one.
+ Add test cases for unbase64

### Why are the changes needed?

Keep the same behavior as before. More details: https://github.com/apache/spark/pull/47303#issuecomment-2237785431

### Does this PR introduce _any_ user-facing change?

yes, revert behavior change introduced in #47303

### How was this patch tested?

existing unit test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47416 from wForget/SPARK-47307_followup_3.5.

Authored-by: wforget <643348094@qq.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../query-tests/explain-results/function_base64.explain      | 2 +-
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala   | 5 ++---
 .../sql/catalyst/expressions/StringExpressionsSuite.scala    | 4 ++++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
index 99d842189c659..3b4e69469c30b 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Base64, StringType, encode, cast(g#0 as binary), false, BinaryType, BooleanType, true, true, true) AS base64(g)#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Base64, StringType, encode, cast(g#0 as binary), true, BinaryType, BooleanType, true, true, true) AS base64(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 55f80645228db..44c58cd13ea84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3229,14 +3229,13 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.legacy.chunkBase64String.enabled")
-    .internal()
+  val CHUNK_BASE64_STRING_ENABLED = buildConf("spark.sql.chunkBase64String.enabled")
     .doc("Whether to truncate string generated by the `Base64` function. When true, base64" +
       " strings generated by the base64 function are chunked into lines of at most 76" +
       " characters. When false, the base64 strings are not chunked.")
     .version("3.5.2")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val ENABLE_DEFAULT_COLUMNS =
     buildConf("spark.sql.defaultColumn.enabled")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 75224bf33f53e..c172688383688 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -517,6 +517,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     withSQLConf(SQLConf.CHUNK_BASE64_STRING_ENABLED.key -> "true") {
       checkEvaluation(Base64(Literal(longString.getBytes)), chunkEncoded)
     }
+
+    // check if unbase64 works well for chunked and non-chunked encoded strings
+    checkEvaluation(StringDecode(UnBase64(Literal(encoded)), Literal("utf-8")), longString)
+    checkEvaluation(StringDecode(UnBase64(Literal(chunkEncoded)), Literal("utf-8")), longString)
   }
 
   test("initcap unit test") {

From 6d8f511430881fa7a3203405260da174df424103 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 22 Jul 2024 02:28:35 +0000
Subject: [PATCH 402/521] Preparing Spark release v3.5.2-rc2

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ed09ea1725da5..da05ae8eba11d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 12b57da00234a..e7e172fa7a136 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e6e0a2660b45d..80f09efb00486 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c39d849494e9c..33769c5c718b4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 1c0173d35139c..dfa0da4bd1d2a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 58ec4e57e5158..101730cf66734 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5512ed8012ae2..158a5a14cac07 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 159064b0444c9..9b918826c7b45 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index d31f3ea7b1ac1..f6d8ceb426739 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index b15c6dd36ba74..de4196fc73277 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 317e663af153f..4aa353992d5c9 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6f3d683d9cb12..669de24b8c131 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 60c807b169699..d02193fc28bd8 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 81c0826bdd189..f5132e5c8ae69 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f16f47db18876..97de1b9f47936 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8348a32b2e238..ce6646241db40 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 8db62e2020e35..197a3b3df5848 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1d076d4d62f10..4f7c4e80466f6 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index c5ba0abd803d7..33b412061e359 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 3ba3ce45a726d..2ab75e151c988 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index a6cc9248f17a6..9526226882e70 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 034f45fc2af88..221dc8d9961c0 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index a326e41b8e233..2e7f729ca2168 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b7abf37080d8d..03c391eaad7d9 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.2
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 6648ec2cc7cb1..3b7b63dd68da0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 22a8b916c4b14..cacb8684e99e6 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index fca45d2df0c35..5a4130b8f218f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 75c85d66a935a..c9b3bbc284272 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 4e00a7b2dc9b8..0ae3e7eed0922 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b9e6e40583461..9d0999af741e0 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 57bae938891d8..c609f0daa3fbd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3-SNAPSHOT</version>
+  <version>3.5.2</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c779e9442f6b3..6756acf033661 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3.dev0"
+__version__: str = "3.5.2"
diff --git a/repl/pom.xml b/repl/pom.xml
index e8c2436f109c3..338a9779e9207 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e1ad633dff2ff..1cb35dd8ef7f6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91fc5a94fa747..74c0753867071 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index d5d0c0125fce9..da43510900d74 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 2d6f7f87247c4..862acd8affb8d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 818cb24cac80a..1011a6239c2fd 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 89cc3b9560a76..aa6bc4add957c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 199597a36349d..2a1f09ed75e7f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1e4ad6f456371..8cf30e4e19170 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ea65b59390c83..494fa6522ee43 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6601c459741f0..72abd933503fc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3f7721c9bcff5..dfb594060e345 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 9574cec92d4c8b259bff915025dc10e9c7f6f21e Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Mon, 22 Jul 2024 02:28:39 +0000
Subject: [PATCH 403/521] Preparing development version 3.5.3-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da05ae8eba11d..ed09ea1725da5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index e7e172fa7a136..12b57da00234a 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 80f09efb00486..e6e0a2660b45d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 33769c5c718b4..c39d849494e9c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dfa0da4bd1d2a..1c0173d35139c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 101730cf66734..58ec4e57e5158 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 158a5a14cac07..5512ed8012ae2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b918826c7b45..159064b0444c9 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index f6d8ceb426739..d31f3ea7b1ac1 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index de4196fc73277..b15c6dd36ba74 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 4aa353992d5c9..317e663af153f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 669de24b8c131..6f3d683d9cb12 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index d02193fc28bd8..60c807b169699 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index f5132e5c8ae69..81c0826bdd189 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 97de1b9f47936..f16f47db18876 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index ce6646241db40..8348a32b2e238 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 197a3b3df5848..8db62e2020e35 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 4f7c4e80466f6..1d076d4d62f10 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 33b412061e359..c5ba0abd803d7 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 2ab75e151c988..3ba3ce45a726d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9526226882e70..a6cc9248f17a6 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 221dc8d9961c0..034f45fc2af88 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 2e7f729ca2168..a326e41b8e233 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 03c391eaad7d9..b7abf37080d8d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 3b7b63dd68da0..6648ec2cc7cb1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cacb8684e99e6..22a8b916c4b14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 5a4130b8f218f..fca45d2df0c35 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c9b3bbc284272..75c85d66a935a 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 0ae3e7eed0922..4e00a7b2dc9b8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 9d0999af741e0..b9e6e40583461 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c609f0daa3fbd..57bae938891d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2</version>
+  <version>3.5.3-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6756acf033661..c779e9442f6b3 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2"
+__version__: str = "3.5.3.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 338a9779e9207..e8c2436f109c3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 1cb35dd8ef7f6..e1ad633dff2ff 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 74c0753867071..91fc5a94fa747 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index da43510900d74..d5d0c0125fce9 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 862acd8affb8d..2d6f7f87247c4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1011a6239c2fd..818cb24cac80a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index aa6bc4add957c..89cc3b9560a76 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 2a1f09ed75e7f..199597a36349d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8cf30e4e19170..1e4ad6f456371 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 494fa6522ee43..ea65b59390c83 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 72abd933503fc..6601c459741f0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dfb594060e345..3f7721c9bcff5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From ada5dc280bf2203b558efe6868142cf7697812e1 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 24 Jul 2024 19:47:29 +0800
Subject: [PATCH 404/521] [SPARK-48991][SQL] Move path initialization into
 try-catch block in FileStreamSink.hasMetadata

### What changes were proposed in this pull request?

This pull request proposed to move path initialization into try-catch block in FileStreamSink.hasMetadata. Then, exceptions from invalid paths can be handled consistently like other path-related exceptions in the current try-catch block. At last, we can make the errors fall into the correct code branches to be handled

### Why are the changes needed?

bugfix for improperly handled exceptions in FileStreamSink.hasMetadata

### Does this PR introduce _any_ user-facing change?

no, an invalid path is still invalid, but fails in the correct places

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #47471 from yaooqinn/SPARK-48991.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit d68cde812c6f904d6f01b7fde1eed10b12edd766)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/execution/streaming/FileStreamSink.scala    |  2 +-
 .../spark/sql/streaming/FileStreamSinkSuite.scala   | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 04a1de02ea587..23855db9d7f5b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -48,8 +48,8 @@ object FileStreamSink extends Logging {
 
     path match {
       case Seq(singlePath) =>
-        val hdfsPath = new Path(singlePath)
         try {
+          val hdfsPath = new Path(singlePath)
           val fs = hdfsPath.getFileSystem(hadoopConf)
           if (fs.isDirectory(hdfsPath)) {
             val metadataPath = getMetadataLogPath(fs, hdfsPath, sqlConf)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 75f440caefc34..1954cce7fdc2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -650,6 +650,19 @@ abstract class FileStreamSinkSuite extends StreamTest {
       }
     }
   }
+
+  test("SPARK-48991: Move path initialization into try-catch block") {
+    val logAppender = new LogAppender("Assume no metadata directory.")
+    Seq(null, "", "file:tmp").foreach { path =>
+      withLogAppender(logAppender) {
+        assert(!FileStreamSink.hasMetadata(Seq(path), spark.sessionState.newHadoopConf(), conf))
+      }
+
+      assert(logAppender.loggingEvents.map(_.getMessage.getFormattedMessage).contains(
+        "Assume no metadata directory. Error while looking for metadata directory in the path:" +
+        s" $path."))
+    }
+  }
 }
 
 object PendingCommitFilesTrackingManifestFileCommitProtocol {

From 66b7cb94c373610d9303a85912d7aeee156e4532 Mon Sep 17 00:00:00 2001
From: Wei Liu <wei.liu@databricks.com>
Date: Thu, 25 Jul 2024 09:38:25 +0900
Subject: [PATCH 405/521] [SPARK-48089][SS][CONNECT][FOLLOWUP][3.5] Disable
 Server Listener failed 3.5 <> 4.0 test

### What changes were proposed in this pull request?

Disable the listener test. This test would fail after https://github.com/apache/spark/pull/46921, which is now reverted. The reason was because with #46921, the server starts a server side python process which serializes the `StreamingQueryProgress` object with the new `StreamingQueryProgress` change. But in the client, the client tries to deserialize `StreamingQueryProgress` use the old `StreamingQueryProgress` without the change, which caused serde error.

However, as the change is going to spark 4.0, and is considered a generally good improvement and does more good than harm, we would like to disable this test to bring back #46921.

### Why are the changes needed?

Unblock bringing back #46921

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

No need

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47468 from WweiL/3.5-disable-server-listener-test-cross-version.

Authored-by: Wei Liu <wei.liu@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../sql/tests/connect/streaming/test_parity_listener.py      | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index eae2b01c55446..99da04315f0ba 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 import time
+import os
+import unittest
 
 import pyspark.cloudpickle
 from pyspark.sql.tests.streaming.test_streaming_listener import StreamingListenerTestsMixin
@@ -44,6 +46,9 @@ def onQueryTerminated(self, event):
 
 
 class StreamingListenerParityTests(StreamingListenerTestsMixin, ReusedConnectTestCase):
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Failed with different Client <> Server"
+    )
     def test_listener_events(self):
         test_listener = TestListenerSpark()
 

From c4ef321d5599349cd6a2a6d69f7cd532887d7bb6 Mon Sep 17 00:00:00 2001
From: Johan Lasperas <johan.lasperas@databricks.com>
Date: Thu, 25 Jul 2024 16:52:16 +0800
Subject: [PATCH 406/521] [SPARK-48308][CORE][3.5] Unify getting data schema
 without partition columns in FileSourceStrategy

### What changes were proposed in this pull request?

(Cherry-pick of 57948c865e064469a75c92f8b58c632b9b40fdd3 to branch-3.5)

Compute the schema of the data without partition columns only once in FileSourceStrategy.

### Why are the changes needed?

In FileSourceStrategy, the schema of the data excluding partition columns is computed 2 times in a slightly different way, using an AttributeSet (`partitionSet`) and using the attributes directly (`partitionColumns`) These don't have the exact same semantics, AttributeSet will only use expression ids for comparison while comparing with the actual attributes will use the name, type, nullability and metadata. We want to use the former here.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing tests

### Was this patch authored or co-authored using generative AI tooling?

No

Authored-by: Johan Lasperas <johan.lasperasdatabricks.com>

Closes #47483 from vkorukanti/partitionCols.

Authored-by: Johan Lasperas <johan.lasperas@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/execution/datasources/FileSourceStrategy.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index e4bf24ad88d1e..9fe42c6bcf2bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -210,9 +210,8 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       val requiredExpressions: Seq[NamedExpression] = filterAttributes.toSeq ++ projects
       val requiredAttributes = AttributeSet(requiredExpressions)
 
-      val readDataColumns = dataColumns
+      val readDataColumns = dataColumnsWithoutPartitionCols
         .filter(requiredAttributes.contains)
-        .filterNot(partitionColumns.contains)
 
       // Metadata attributes are part of a column of type struct up to this point. Here we extract
       // this column from the schema and specify a matcher for that.

From ebda6a6a97bf0b3932b970801f4c2f5dc6ae81d4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 25 Jul 2024 09:06:34 +0000
Subject: [PATCH 407/521] Preparing Spark release v3.5.2-rc3

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ed09ea1725da5..da05ae8eba11d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 12b57da00234a..e7e172fa7a136 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e6e0a2660b45d..80f09efb00486 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c39d849494e9c..33769c5c718b4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 1c0173d35139c..dfa0da4bd1d2a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 58ec4e57e5158..101730cf66734 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5512ed8012ae2..158a5a14cac07 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 159064b0444c9..9b918826c7b45 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index d31f3ea7b1ac1..f6d8ceb426739 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index b15c6dd36ba74..de4196fc73277 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 317e663af153f..4aa353992d5c9 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6f3d683d9cb12..669de24b8c131 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 60c807b169699..d02193fc28bd8 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 81c0826bdd189..f5132e5c8ae69 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f16f47db18876..97de1b9f47936 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8348a32b2e238..ce6646241db40 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 8db62e2020e35..197a3b3df5848 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1d076d4d62f10..4f7c4e80466f6 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index c5ba0abd803d7..33b412061e359 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 3ba3ce45a726d..2ab75e151c988 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index a6cc9248f17a6..9526226882e70 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 034f45fc2af88..221dc8d9961c0 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index a326e41b8e233..2e7f729ca2168 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b7abf37080d8d..03c391eaad7d9 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.2
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 6648ec2cc7cb1..3b7b63dd68da0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 22a8b916c4b14..cacb8684e99e6 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index fca45d2df0c35..5a4130b8f218f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 75c85d66a935a..c9b3bbc284272 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 4e00a7b2dc9b8..0ae3e7eed0922 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b9e6e40583461..9d0999af741e0 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 57bae938891d8..c609f0daa3fbd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3-SNAPSHOT</version>
+  <version>3.5.2</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c779e9442f6b3..6756acf033661 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3.dev0"
+__version__: str = "3.5.2"
diff --git a/repl/pom.xml b/repl/pom.xml
index e8c2436f109c3..338a9779e9207 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e1ad633dff2ff..1cb35dd8ef7f6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91fc5a94fa747..74c0753867071 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index d5d0c0125fce9..da43510900d74 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 2d6f7f87247c4..862acd8affb8d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 818cb24cac80a..1011a6239c2fd 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 89cc3b9560a76..aa6bc4add957c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 199597a36349d..2a1f09ed75e7f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1e4ad6f456371..8cf30e4e19170 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ea65b59390c83..494fa6522ee43 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6601c459741f0..72abd933503fc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3f7721c9bcff5..dfb594060e345 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 61a90e561d3512b39df79d744912fc951c5a0d3d Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 25 Jul 2024 09:06:38 +0000
Subject: [PATCH 408/521] Preparing development version 3.5.3-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da05ae8eba11d..ed09ea1725da5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index e7e172fa7a136..12b57da00234a 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 80f09efb00486..e6e0a2660b45d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 33769c5c718b4..c39d849494e9c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dfa0da4bd1d2a..1c0173d35139c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 101730cf66734..58ec4e57e5158 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 158a5a14cac07..5512ed8012ae2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b918826c7b45..159064b0444c9 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index f6d8ceb426739..d31f3ea7b1ac1 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index de4196fc73277..b15c6dd36ba74 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 4aa353992d5c9..317e663af153f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 669de24b8c131..6f3d683d9cb12 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index d02193fc28bd8..60c807b169699 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index f5132e5c8ae69..81c0826bdd189 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 97de1b9f47936..f16f47db18876 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index ce6646241db40..8348a32b2e238 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 197a3b3df5848..8db62e2020e35 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 4f7c4e80466f6..1d076d4d62f10 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 33b412061e359..c5ba0abd803d7 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 2ab75e151c988..3ba3ce45a726d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9526226882e70..a6cc9248f17a6 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 221dc8d9961c0..034f45fc2af88 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 2e7f729ca2168..a326e41b8e233 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 03c391eaad7d9..b7abf37080d8d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 3b7b63dd68da0..6648ec2cc7cb1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cacb8684e99e6..22a8b916c4b14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 5a4130b8f218f..fca45d2df0c35 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c9b3bbc284272..75c85d66a935a 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 0ae3e7eed0922..4e00a7b2dc9b8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 9d0999af741e0..b9e6e40583461 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c609f0daa3fbd..57bae938891d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2</version>
+  <version>3.5.3-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6756acf033661..c779e9442f6b3 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2"
+__version__: str = "3.5.3.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 338a9779e9207..e8c2436f109c3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 1cb35dd8ef7f6..e1ad633dff2ff 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 74c0753867071..91fc5a94fa747 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index da43510900d74..d5d0c0125fce9 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 862acd8affb8d..2d6f7f87247c4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1011a6239c2fd..818cb24cac80a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index aa6bc4add957c..89cc3b9560a76 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 2a1f09ed75e7f..199597a36349d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8cf30e4e19170..1e4ad6f456371 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 494fa6522ee43..ea65b59390c83 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 72abd933503fc..6601c459741f0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dfb594060e345..3f7721c9bcff5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 4c38e90488020fa64ca9eb1cf65db6770be2f5e6 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Thu, 25 Jul 2024 10:41:57 -0700
Subject: [PATCH 409/521] [SPARK-49005][K8S][3.5] Use `17-jammy` tag instead of
 `17` to prevent Python 12

### What changes were proposed in this pull request?

This PR aims to use `17-jammy` tag instead of `17` to prevent Python 12.

### Why are the changes needed?

Two days ago, `eclipse-temurin:17` switched its baseline OS to `Ubuntu 24.04` which brings `Python 3.12`.

```
$ docker run -it --rm eclipse-temurin:17 cat /etc/os-release | grep VERSION_ID
VERSION_ID="24.04"

$ docker run -it --rm eclipse-temurin:17-jammy cat /etc/os-release | grep VERSION_ID
VERSION_ID="22.04"
```

Since Python 3.12 supported is added only to Apache Spark 4.0.0, we need to keep using the previous OS, `Ubuntu 22.04`.

- #43184
- #43192

### Does this PR introduce _any_ user-facing change?

No. This aims to recover to the same OS for consistent behavior.

### How was this patch tested?

Pass the CIs with K8s IT. Currently, it's broken at Python image building phase.

- https://github.com/apache/spark/actions/workflows/build_branch35.yml

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47488 from dongjoon-hyun/SPARK-49005.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../kubernetes/docker/src/main/dockerfiles/spark/Dockerfile     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 22d8f1550128f..c30823de01360 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-ARG java_image_tag=17
+ARG java_image_tag=17-jammy
 
 FROM eclipse-temurin:${java_image_tag}
 

From 1edbddfadeb46581134fa477d35399ddc63b7163 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 26 Jul 2024 08:37:06 +0000
Subject: [PATCH 410/521] Preparing Spark release v3.5.2-rc4

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ed09ea1725da5..da05ae8eba11d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 12b57da00234a..e7e172fa7a136 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e6e0a2660b45d..80f09efb00486 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c39d849494e9c..33769c5c718b4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 1c0173d35139c..dfa0da4bd1d2a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 58ec4e57e5158..101730cf66734 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5512ed8012ae2..158a5a14cac07 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 159064b0444c9..9b918826c7b45 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index d31f3ea7b1ac1..f6d8ceb426739 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index b15c6dd36ba74..de4196fc73277 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 317e663af153f..4aa353992d5c9 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6f3d683d9cb12..669de24b8c131 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 60c807b169699..d02193fc28bd8 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 81c0826bdd189..f5132e5c8ae69 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f16f47db18876..97de1b9f47936 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8348a32b2e238..ce6646241db40 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 8db62e2020e35..197a3b3df5848 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1d076d4d62f10..4f7c4e80466f6 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index c5ba0abd803d7..33b412061e359 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 3ba3ce45a726d..2ab75e151c988 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index a6cc9248f17a6..9526226882e70 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 034f45fc2af88..221dc8d9961c0 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index a326e41b8e233..2e7f729ca2168 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b7abf37080d8d..03c391eaad7d9 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.2
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 6648ec2cc7cb1..3b7b63dd68da0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 22a8b916c4b14..cacb8684e99e6 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index fca45d2df0c35..5a4130b8f218f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 75c85d66a935a..c9b3bbc284272 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 4e00a7b2dc9b8..0ae3e7eed0922 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b9e6e40583461..9d0999af741e0 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 57bae938891d8..c609f0daa3fbd 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3-SNAPSHOT</version>
+  <version>3.5.2</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c779e9442f6b3..6756acf033661 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3.dev0"
+__version__: str = "3.5.2"
diff --git a/repl/pom.xml b/repl/pom.xml
index e8c2436f109c3..338a9779e9207 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e1ad633dff2ff..1cb35dd8ef7f6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91fc5a94fa747..74c0753867071 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index d5d0c0125fce9..da43510900d74 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 2d6f7f87247c4..862acd8affb8d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 818cb24cac80a..1011a6239c2fd 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 89cc3b9560a76..aa6bc4add957c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 199597a36349d..2a1f09ed75e7f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1e4ad6f456371..8cf30e4e19170 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ea65b59390c83..494fa6522ee43 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6601c459741f0..72abd933503fc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3f7721c9bcff5..dfb594060e345 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 75029e1aae257f79f3c284e796ded7cc6d49fb40 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 26 Jul 2024 08:37:10 +0000
Subject: [PATCH 411/521] Preparing development version 3.5.3-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da05ae8eba11d..ed09ea1725da5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index e7e172fa7a136..12b57da00234a 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 80f09efb00486..e6e0a2660b45d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 33769c5c718b4..c39d849494e9c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dfa0da4bd1d2a..1c0173d35139c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 101730cf66734..58ec4e57e5158 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 158a5a14cac07..5512ed8012ae2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b918826c7b45..159064b0444c9 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index f6d8ceb426739..d31f3ea7b1ac1 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index de4196fc73277..b15c6dd36ba74 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 4aa353992d5c9..317e663af153f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 669de24b8c131..6f3d683d9cb12 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index d02193fc28bd8..60c807b169699 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index f5132e5c8ae69..81c0826bdd189 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 97de1b9f47936..f16f47db18876 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index ce6646241db40..8348a32b2e238 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 197a3b3df5848..8db62e2020e35 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 4f7c4e80466f6..1d076d4d62f10 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 33b412061e359..c5ba0abd803d7 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 2ab75e151c988..3ba3ce45a726d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9526226882e70..a6cc9248f17a6 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 221dc8d9961c0..034f45fc2af88 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 2e7f729ca2168..a326e41b8e233 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 03c391eaad7d9..b7abf37080d8d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 3b7b63dd68da0..6648ec2cc7cb1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cacb8684e99e6..22a8b916c4b14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 5a4130b8f218f..fca45d2df0c35 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c9b3bbc284272..75c85d66a935a 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 0ae3e7eed0922..4e00a7b2dc9b8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 9d0999af741e0..b9e6e40583461 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c609f0daa3fbd..57bae938891d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2</version>
+  <version>3.5.3-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6756acf033661..c779e9442f6b3 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2"
+__version__: str = "3.5.3.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 338a9779e9207..e8c2436f109c3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 1cb35dd8ef7f6..e1ad633dff2ff 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 74c0753867071..91fc5a94fa747 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index da43510900d74..d5d0c0125fce9 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 862acd8affb8d..2d6f7f87247c4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1011a6239c2fd..818cb24cac80a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index aa6bc4add957c..89cc3b9560a76 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 2a1f09ed75e7f..199597a36349d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8cf30e4e19170..1e4ad6f456371 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 494fa6522ee43..ea65b59390c83 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 72abd933503fc..6601c459741f0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dfb594060e345..3f7721c9bcff5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 268c13e32d0737cf9f7fc8f61b34dbad7bdeb8e8 Mon Sep 17 00:00:00 2001
From: Gengliang Wang <gengliang@apache.org>
Date: Tue, 30 Jul 2024 13:23:09 -0700
Subject: [PATCH 412/521] [SPARK-49054][SQL][3.5] Column default value should
 support current_* functions

### What changes were proposed in this pull request?

This is a regression between Spark 3.5.0 and Spark 4.
The following queries work on Spark 3.5.0 while fails on latest master branch:
```
CREATE TABLE test_current_user(i int, s string) USING parquet;
ALTER TABLE test_current_user ALTER COLUMN s SET DEFAULT current_user()
```
```
CREATE TABLE test_current_user(i int, s string default current_user()) USING parquet
INSERT INTO test_current_user (i) VALUES ((0));
```

This PR is to complete fixing this by eagerly executing finish-analysis and constant-folding rules before checking whether the expression is foldable and resolved.
### Why are the changes needed?

Bug fix

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

New UTs
### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47538 from gengliangwang/pickFinishAnlysis.

Authored-by: Gengliang Wang <gengliang@apache.org>
Signed-off-by: Gengliang Wang <gengliang@apache.org>
---
 .../util/ResolveDefaultColumnsUtil.scala      | 11 ++++++++--
 .../sql/ResolveDefaultColumnsSuite.scala      | 21 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index f55fa2d8f5e85..0d947258e6555 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
-import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
+import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, Optimizer}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
@@ -285,7 +285,9 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU
       val analyzer: Analyzer = DefaultColumnAnalyzer
       val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), OneRowRelation()))
       analyzer.checkAnalysis(analyzed)
-      ConstantFolding(analyzed)
+      // Eagerly execute finish-analysis and constant-folding rules before checking whether the
+      // expression is foldable and resolved.
+      ConstantFolding(DefaultColumnOptimizer.FinishAnalysis(analyzed))
     } catch {
       case ex: AnalysisException =>
         throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
@@ -452,6 +454,11 @@ object ResolveDefaultColumns extends QueryErrorsBase with ResolveDefaultColumnsU
     new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
   }
 
+  /**
+   * This is an Optimizer for convert default column expressions to foldable literals.
+   */
+  object DefaultColumnOptimizer extends Optimizer(DefaultColumnAnalyzer.catalogManager)
+
   /**
    * This is a FunctionCatalog for performing analysis using built-in functions only. It is a helper
    * for the DefaultColumnAnalyzer above.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index 00529559a4853..79b2f517b060b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -215,4 +215,25 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-49054: Create table with current_user() default") {
+    val tableName = "test_current_user"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s string default current_user()) USING parquet")
+      sql(s"INSERT INTO $tableName (i) VALUES ((0))")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(0, user)))
+    }
+  }
+
+  test("SPARK-49054: Alter table with current_user() default") {
+    val tableName = "test_current_user"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s string) USING parquet")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN s SET DEFAULT current_user()")
+      sql(s"INSERT INTO $tableName (i) VALUES ((0))")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(0, user)))
+    }
+  }
 }

From c6df890798862d0863afbfff8fca0ee4df70354f Mon Sep 17 00:00:00 2001
From: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Date: Wed, 31 Jul 2024 22:37:42 +0800
Subject: [PATCH 413/521] [SPARK-49000][SQL] Fix "select count(distinct 1) from
 t" where t is empty table by expanding RewriteDistinctAggregates

Fix `RewriteDistinctAggregates` rule to deal properly with aggregation on DISTINCT literals. Physical plan for `select count(distinct 1) from t`:
```
-- count(distinct 1)
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- HashAggregate(keys=[], functions=[count(distinct 1)], output=[count(DISTINCT 1)#2L])
   +- HashAggregate(keys=[], functions=[partial_count(distinct 1)], output=[count#6L])
      +- HashAggregate(keys=[], functions=[], output=[])
         +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=20]
            +- HashAggregate(keys=[], functions=[], output=[])
               +- FileScan parquet spark_catalog.default.t[] Batched: false, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/nikola.mandic/oss-spark/spark-warehouse/org.apache.spark.s..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
```
Problem is happening when `HashAggregate(keys=[], functions=[], output=[])` node yields one row to `partial_count` node, which then captures one row. This four-node structure is constructed by `AggUtils.planAggregateWithOneDistinct`.

To fix the problem, we're adding `Expand` node which will force non-empty grouping expressions in `HashAggregateExec` nodes. This will in turn enable streaming zero rows to parent `partial_count` node, yielding correct final result.

Aggregation with DISTINCT literal gives wrong results. For example, when running on empty table `t`:
`select count(distinct 1) from t` returns 1, while the correct result should be 0.
For reference:
`select count(1) from t` returns 0, which is the correct and expected result.

Yes, this fixes a critical bug in Spark.

New e2e SQL tests for aggregates with DISTINCT literals.

No.

Closes #47525 from nikolamand-db/SPARK-49000-spark-expand-approach.

Lead-authored-by: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Co-authored-by: Nikola Mandic <nikola.mandic@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit dfa21332f20fff4aa6052ffa556d206497c066cf)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/RewriteDistinctAggregates.scala |  13 +-
 .../spark/sql/DataFrameAggregateSuite.scala   | 114 ++++++++++++++++++
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index da3cf782f6682..e91493188873e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -197,6 +197,15 @@ import org.apache.spark.util.collection.Utils
  * techniques.
  */
 object RewriteDistinctAggregates extends Rule[LogicalPlan] {
+  private def mustRewrite(
+      aggregateExpressions: Seq[AggregateExpression],
+      groupingExpressions: Seq[Expression]): Boolean = {
+    // If there are any AggregateExpressions with filter, we need to rewrite the query.
+    // Also, if there are no grouping expressions and all aggregate expressions are foldable,
+    // we need to rewrite the query, e.g. SELECT COUNT(DISTINCT 1).
+    aggregateExpressions.exists(_.filter.isDefined) || (groupingExpressions.isEmpty &&
+      aggregateExpressions.exists(_.aggregateFunction.children.forall(_.foldable)))
+  }
 
   private def mayNeedtoRewrite(a: Aggregate): Boolean = {
     val aggExpressions = collectAggregateExprs(a)
@@ -205,7 +214,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     // clause for this rule because aggregation strategy can handle a single distinct aggregate
     // group without filter clause.
     // This check can produce false-positives, e.g., SUM(DISTINCT a) & COUNT(DISTINCT a).
-    distinctAggs.size > 1 || distinctAggs.exists(_.filter.isDefined)
+    distinctAggs.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
@@ -236,7 +245,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Aggregation strategy can handle queries with a single distinct group without filter clause.
-    if (distinctAggGroups.size > 1 || distinctAggs.exists(_.filter.isDefined)) {
+    if (distinctAggGroups.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)) {
       // Create the attributes for the grouping id and the group by clause.
       val gid = AttributeReference("gid", IntegerType, nullable = false)()
       val groupByMap = a.groupingExpressions.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 1ba3f6c84d0ad..d8e3a046655f6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -24,6 +24,7 @@ import scala.util.Random
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.{SparkException, SparkThrowable}
+import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -2150,6 +2151,119 @@ class DataFrameAggregateSuite extends QueryTest
       checkAnswer(df, Row(1, 2, 2) :: Row(3, 1, 1) :: Nil)
     }
   }
+
+  test("aggregating with various distinct expressions") {
+    abstract class AggregateTestCaseBase(
+        val query: String,
+        val resultSeq: Seq[Seq[Row]],
+        val hasExpandNodeInPlan: Boolean)
+    case class AggregateTestCase(
+        override val query: String,
+        override val resultSeq: Seq[Seq[Row]],
+        override val hasExpandNodeInPlan: Boolean)
+      extends AggregateTestCaseBase(query, resultSeq, hasExpandNodeInPlan)
+    case class AggregateTestCaseDefault(
+        override val query: String)
+      extends AggregateTestCaseBase(
+        query,
+        Seq(Seq(Row(0)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = true)
+
+    val t = "t"
+    val testCases: Seq[AggregateTestCaseBase] = Seq(
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1 + 2) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1, 2, 1 + 2) FROM $t"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(1), COUNT(DISTINCT 1) FROM $t",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(2, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT 1, "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT collation("abc")) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT current_date()) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT array(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT map(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT struct(1, 2).col1) FROM $t"""
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t GROUP BY col",
+        Seq(Seq(), Seq(Row(1)), Seq(Row(1), Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 1"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 0",
+        Seq(Seq(Row(0)), Seq(Row(0)), Seq(Row(0))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT SUM(x) FROM (SELECT COUNT(DISTINCT 1) AS x FROM $t)"),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT "col") FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT col) FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 2))),
+        hasExpandNodeInPlan = true
+      )
+    )
+    withTable(t) {
+      sql(s"create table $t(col int) using parquet")
+      Seq(0, 1, 2).foreach(columnValue => {
+        if (columnValue != 0) {
+          sql(s"insert into $t(col) values($columnValue)")
+        }
+        testCases.foreach(testCase => {
+          val query = sql(testCase.query)
+          checkAnswer(query, testCase.resultSeq(columnValue))
+          val hasExpandNodeInPlan = query.queryExecution.optimizedPlan.collectFirst {
+            case _: Expand => true
+          }.nonEmpty
+          assert(hasExpandNodeInPlan == testCase.hasExpandNodeInPlan)
+        })
+      })
+    }
+  }
 }
 
 case class B(c: Option[Double])

From a0f88f5f452285e640b7e6c414a16c07287816ba Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 31 Jul 2024 08:59:13 -0700
Subject: [PATCH 414/521] [SPARK-44638][SQL][TESTS][3.5] Add test for
 Char/Varchar in JDBC customSchema option

### What changes were proposed in this pull request?

Char/Varchar in JDBC `customSchema` option once broke in Spark 3.1 ~ 3.4, but seem to be restored in master by some recent works in the JDBC area, this PR add a test to cover.

### Why are the changes needed?

test cov

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

test added

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47550 from yaooqinn/SPARK-44638-F.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/jdbc/MySQLIntegrationSuite.scala    | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index bc7302163d9a1..cefbe41b64bd3 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -220,6 +220,15 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     val nulls = spark.read.jdbc(jdbcUrl, "numbers", new Properties).tail(1).head
     assert(nulls === Row(null, null, null, null, null, null, null, null, null, null, null))
   }
+
+  test("SPARK-44638: Char/Varchar in Custom Schema") {
+    val df = spark.read.option("url", jdbcUrl)
+      .option("query", "SELECT c, d from strings")
+      .option("customSchema", "c CHAR(10), d VARCHAR(10)")
+      .format("jdbc")
+      .load()
+    assert(df.head === Row("brown     ", "fox"))
+  }
 }
 
 /**

From d23b70b84fa4ec2016171d6241a34f490b8aab06 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Wed, 31 Jul 2024 12:02:33 -0700
Subject: [PATCH 415/521] [SPARK-49066][SQL][TESTS] Refactor
 `OrcEncryptionSuite` and make
 `spark.hadoop.hadoop.security.key.provider.path` effective only within
 `OrcEncryptionSuite`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr moves the global scope test configuration `spark.hadoop.hadoop.security.key.provider.path`, which is configured in the parent `pom.xml` and `SparkBuild.scala`, to `OrcEncryptionSuite` to ensure that it is effective only within `OrcEncryptionSuite`.

To achieve this, the pr also refactors `OrcEncryptionSuite`:
1. Overrides `beforeAll` to back up the contents of `CryptoUtils#keyProviderCache`.
2. Overrides `afterAll` to restore the contents of `CryptoUtils#keyProviderCache`.

This ensures that `CryptoUtils#keyProviderCache` is isolated during the test process of `OrcEncryptionSuite`.

### Why are the changes needed?
The test configuration `spark.hadoop.hadoop.security.key.provider.path` in the parent `pom.xml` and `SparkBuild.scala` is effective globally, which leads to the possibility that other Orc writing test cases, besides `OrcEncryptionSuite`, might also be affected by this configuration and use `test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider.Factory`。

### Does this PR introduce _any_ user-facing change?
No, just for test.

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47543 from LuciferYang/SPARK-49066.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 06ed91ac6e75b3200eeb44fd84e5d634d3006a38)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml                                       |  3 --
 project/SparkBuild.scala                      |  1 -
 .../datasources/orc/OrcEncryptionSuite.scala  | 35 ++++++++++++++++++-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index 57bae938891d8..61780803afb57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3046,7 +3046,6 @@
               <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
               <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
               <spark.memory.debugFill>true</spark.memory.debugFill>
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>src</test.src.tables>
               <hive.conf.validation>false</hive.conf.validation>
@@ -3103,8 +3102,6 @@
               <spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
-              <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
             </systemProperties>
             <tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
             <tagsToInclude>${test.include.tags}</tagsToInclude>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 25f04f7bff318..e8c52dc0aff3b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1600,7 +1600,6 @@ object TestSettings {
     (Test / javaOptions) += "-Dspark.ui.enabled=false",
     (Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
     (Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
-    (Test / javaOptions) += "-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
     (Test / javaOptions) += "-Dhive.conf.validation=false",
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
index b7d29588f6bf4..a3ecbf07eccf5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -17,20 +17,53 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import java.lang.invoke.MethodHandles
+import java.util.{Map => JMap}
 import java.util.Random
 
-import org.apache.orc.impl.HadoopShimsFactory
+import scala.collection.mutable
 
+import org.apache.orc.impl.{CryptoUtils, HadoopShimsFactory, KeyProvider}
+
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSparkSession
 
 class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
   import testImplicits._
 
+  override def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.hadoop.hadoop.security.key.provider.path", "test:///")
+  }
+
+  override def beforeAll(): Unit = {
+    // Backup `CryptoUtils#keyProviderCache` and clear it.
+    keyProviderCacheRef.entrySet()
+      .forEach(e => keyProviderCacheBackup.put(e.getKey, e.getValue))
+    keyProviderCacheRef.clear()
+    super.beforeAll()
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    // Restore `CryptoUtils#keyProviderCache`.
+    keyProviderCacheRef.clear()
+    keyProviderCacheBackup.foreach { case (k, v) => keyProviderCacheRef.put(k, v) }
+  }
+
   val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
   val rowDataWithoutKey =
     Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
 
+  private val keyProviderCacheBackup: mutable.Map[String, KeyProvider] = mutable.Map.empty
+
+  private val keyProviderCacheRef: JMap[String, KeyProvider] = {
+    val clazz = classOf[CryptoUtils]
+    val lookup = MethodHandles.privateLookupIn(clazz, MethodHandles.lookup())
+    lookup.findStaticVarHandle(clazz, "keyProviderCache", classOf[JMap[_, _]])
+      .get().asInstanceOf[JMap[String, KeyProvider]]
+  }
+
   test("Write and read an encrypted file") {
     val conf = spark.sessionState.newHadoopConf()
     val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)

From 36f9a4b783eec99b73a2453c1f220969d3de9e52 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Wed, 31 Jul 2024 16:10:30 -0700
Subject: [PATCH 416/521] Revert "[SPARK-49066][SQL][TESTS] Refactor
 `OrcEncryptionSuite` and make
 `spark.hadoop.hadoop.security.key.provider.path` effective only within
 `OrcEncryptionSuite`"

This reverts commit d23b70b84fa4ec2016171d6241a34f490b8aab06.
---
 pom.xml                                       |  3 ++
 project/SparkBuild.scala                      |  1 +
 .../datasources/orc/OrcEncryptionSuite.scala  | 35 +------------------
 3 files changed, 5 insertions(+), 34 deletions(-)

diff --git a/pom.xml b/pom.xml
index 61780803afb57..57bae938891d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3046,6 +3046,7 @@
               <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
               <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
               <spark.memory.debugFill>true</spark.memory.debugFill>
+              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>src</test.src.tables>
               <hive.conf.validation>false</hive.conf.validation>
@@ -3102,6 +3103,8 @@
               <spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
+              <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
+              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
             </systemProperties>
             <tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
             <tagsToInclude>${test.include.tags}</tagsToInclude>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e8c52dc0aff3b..25f04f7bff318 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1600,6 +1600,7 @@ object TestSettings {
     (Test / javaOptions) += "-Dspark.ui.enabled=false",
     (Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
     (Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
+    (Test / javaOptions) += "-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
     (Test / javaOptions) += "-Dhive.conf.validation=false",
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
index a3ecbf07eccf5..b7d29588f6bf4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -17,53 +17,20 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
-import java.lang.invoke.MethodHandles
-import java.util.{Map => JMap}
 import java.util.Random
 
-import scala.collection.mutable
+import org.apache.orc.impl.HadoopShimsFactory
 
-import org.apache.orc.impl.{CryptoUtils, HadoopShimsFactory, KeyProvider}
-
-import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSparkSession
 
 class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
   import testImplicits._
 
-  override def sparkConf: SparkConf = {
-    super.sparkConf.set("spark.hadoop.hadoop.security.key.provider.path", "test:///")
-  }
-
-  override def beforeAll(): Unit = {
-    // Backup `CryptoUtils#keyProviderCache` and clear it.
-    keyProviderCacheRef.entrySet()
-      .forEach(e => keyProviderCacheBackup.put(e.getKey, e.getValue))
-    keyProviderCacheRef.clear()
-    super.beforeAll()
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    // Restore `CryptoUtils#keyProviderCache`.
-    keyProviderCacheRef.clear()
-    keyProviderCacheBackup.foreach { case (k, v) => keyProviderCacheRef.put(k, v) }
-  }
-
   val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
   val rowDataWithoutKey =
     Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
 
-  private val keyProviderCacheBackup: mutable.Map[String, KeyProvider] = mutable.Map.empty
-
-  private val keyProviderCacheRef: JMap[String, KeyProvider] = {
-    val clazz = classOf[CryptoUtils]
-    val lookup = MethodHandles.privateLookupIn(clazz, MethodHandles.lookup())
-    lookup.findStaticVarHandle(clazz, "keyProviderCache", classOf[JMap[_, _]])
-      .get().asInstanceOf[JMap[String, KeyProvider]]
-  }
-
   test("Write and read an encrypted file") {
     val conf = spark.sessionState.newHadoopConf()
     val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)

From 94558f6f5ecd98e7ff9a6ee7cd5034a7e8bc63f8 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Thu, 1 Aug 2024 13:45:09 +0800
Subject: [PATCH 417/521] Revert "[SPARK-49000][SQL] Fix "select count(distinct
 1) from t" where t is empty table by expanding RewriteDistinctAggregates"

This reverts commit c6df890798862d0863afbfff8fca0ee4df70354f.
---
 .../optimizer/RewriteDistinctAggregates.scala |  13 +-
 .../spark/sql/DataFrameAggregateSuite.scala   | 114 ------------------
 2 files changed, 2 insertions(+), 125 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index e91493188873e..da3cf782f6682 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -197,15 +197,6 @@ import org.apache.spark.util.collection.Utils
  * techniques.
  */
 object RewriteDistinctAggregates extends Rule[LogicalPlan] {
-  private def mustRewrite(
-      aggregateExpressions: Seq[AggregateExpression],
-      groupingExpressions: Seq[Expression]): Boolean = {
-    // If there are any AggregateExpressions with filter, we need to rewrite the query.
-    // Also, if there are no grouping expressions and all aggregate expressions are foldable,
-    // we need to rewrite the query, e.g. SELECT COUNT(DISTINCT 1).
-    aggregateExpressions.exists(_.filter.isDefined) || (groupingExpressions.isEmpty &&
-      aggregateExpressions.exists(_.aggregateFunction.children.forall(_.foldable)))
-  }
 
   private def mayNeedtoRewrite(a: Aggregate): Boolean = {
     val aggExpressions = collectAggregateExprs(a)
@@ -214,7 +205,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     // clause for this rule because aggregation strategy can handle a single distinct aggregate
     // group without filter clause.
     // This check can produce false-positives, e.g., SUM(DISTINCT a) & COUNT(DISTINCT a).
-    distinctAggs.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)
+    distinctAggs.size > 1 || distinctAggs.exists(_.filter.isDefined)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
@@ -245,7 +236,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Aggregation strategy can handle queries with a single distinct group without filter clause.
-    if (distinctAggGroups.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)) {
+    if (distinctAggGroups.size > 1 || distinctAggs.exists(_.filter.isDefined)) {
       // Create the attributes for the grouping id and the group by clause.
       val gid = AttributeReference("gid", IntegerType, nullable = false)()
       val groupByMap = a.groupingExpressions.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index d8e3a046655f6..1ba3f6c84d0ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -24,7 +24,6 @@ import scala.util.Random
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.{SparkException, SparkThrowable}
-import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -2151,119 +2150,6 @@ class DataFrameAggregateSuite extends QueryTest
       checkAnswer(df, Row(1, 2, 2) :: Row(3, 1, 1) :: Nil)
     }
   }
-
-  test("aggregating with various distinct expressions") {
-    abstract class AggregateTestCaseBase(
-        val query: String,
-        val resultSeq: Seq[Seq[Row]],
-        val hasExpandNodeInPlan: Boolean)
-    case class AggregateTestCase(
-        override val query: String,
-        override val resultSeq: Seq[Seq[Row]],
-        override val hasExpandNodeInPlan: Boolean)
-      extends AggregateTestCaseBase(query, resultSeq, hasExpandNodeInPlan)
-    case class AggregateTestCaseDefault(
-        override val query: String)
-      extends AggregateTestCaseBase(
-        query,
-        Seq(Seq(Row(0)), Seq(Row(1)), Seq(Row(1))),
-        hasExpandNodeInPlan = true)
-
-    val t = "t"
-    val testCases: Seq[AggregateTestCaseBase] = Seq(
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT "col") FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"SELECT COUNT(DISTINCT 1) FROM $t"
-      ),
-      AggregateTestCaseDefault(
-        s"SELECT COUNT(DISTINCT 1 + 2) FROM $t"
-      ),
-      AggregateTestCaseDefault(
-        s"SELECT COUNT(DISTINCT 1, 2, 1 + 2) FROM $t"
-      ),
-      AggregateTestCase(
-        s"SELECT COUNT(1), COUNT(DISTINCT 1) FROM $t",
-        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(2, 1))),
-        hasExpandNodeInPlan = true
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT 1, "col") FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT collation("abc")) FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT current_date()) FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT array(1, 2)[1]) FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT map(1, 2)[1]) FROM $t"""
-      ),
-      AggregateTestCaseDefault(
-        s"""SELECT COUNT(DISTINCT struct(1, 2).col1) FROM $t"""
-      ),
-      AggregateTestCase(
-        s"SELECT COUNT(DISTINCT 1) FROM $t GROUP BY col",
-        Seq(Seq(), Seq(Row(1)), Seq(Row(1), Row(1))),
-        hasExpandNodeInPlan = false
-      ),
-      AggregateTestCaseDefault(
-        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 1"
-      ),
-      AggregateTestCase(
-        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 0",
-        Seq(Seq(Row(0)), Seq(Row(0)), Seq(Row(0))),
-        hasExpandNodeInPlan = false
-      ),
-      AggregateTestCase(
-        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
-        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
-        hasExpandNodeInPlan = false
-      ),
-      AggregateTestCase(
-        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(1) FROM $t)",
-        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
-        hasExpandNodeInPlan = false
-      ),
-      AggregateTestCase(
-        s"SELECT SUM(1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
-        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
-        hasExpandNodeInPlan = false
-      ),
-      AggregateTestCaseDefault(
-        s"SELECT SUM(x) FROM (SELECT COUNT(DISTINCT 1) AS x FROM $t)"),
-      AggregateTestCase(
-        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT "col") FROM $t""",
-        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 1))),
-        hasExpandNodeInPlan = true
-      ),
-      AggregateTestCase(
-        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT col) FROM $t""",
-        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 2))),
-        hasExpandNodeInPlan = true
-      )
-    )
-    withTable(t) {
-      sql(s"create table $t(col int) using parquet")
-      Seq(0, 1, 2).foreach(columnValue => {
-        if (columnValue != 0) {
-          sql(s"insert into $t(col) values($columnValue)")
-        }
-        testCases.foreach(testCase => {
-          val query = sql(testCase.query)
-          checkAnswer(query, testCase.resultSeq(columnValue))
-          val hasExpandNodeInPlan = query.queryExecution.optimizedPlan.collectFirst {
-            case _: Expand => true
-          }.nonEmpty
-          assert(hasExpandNodeInPlan == testCase.hasExpandNodeInPlan)
-        })
-      })
-    }
-  }
 }
 
 case class B(c: Option[Double])

From a1e7fb18d0c4c4d51e2a5f5b050f528a045960d8 Mon Sep 17 00:00:00 2001
From: Sumeet Varma <sumeet.varma@databricks.com>
Date: Thu, 1 Aug 2024 22:29:08 +0800
Subject: [PATCH 418/521] [SPARK-49065][SQL] Rebasing in legacy
 formatters/parsers must support non JVM default time zones

### What changes were proposed in this pull request?

Explicitly pass the overridden timezone parameter to `rebaseJulianToGregorianMicros` and `rebaseGregorianToJulianMicros`.

### Why are the changes needed?

Currently, rebasing timestamp defaults to JVM timezone and so it produces incorrect results when the explicitly over-riden timezone in the TimestampFormatter library is not the same as JVM timezone.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
New UT to capture this scenario

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47541 from sumeet-db/rebase_time_zone.

Authored-by: Sumeet Varma <sumeet.varma@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 63c08632f2d6e559bc6a8396e646389e5402c757)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/util/SparkDateTimeUtils.scala    |  6 ++++
 .../catalyst/util/TimestampFormatter.scala    | 12 +++----
 .../util/TimestampFormatterSuite.scala        | 34 +++++++++++++++++++
 3 files changed, 46 insertions(+), 6 deletions(-)

diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 698e7b37a9ef0..980eee9390d09 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -237,6 +237,9 @@ trait SparkDateTimeUtils {
   def toJavaTimestamp(micros: Long): Timestamp =
     toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(micros))
 
+  def toJavaTimestamp(timeZoneId: String, micros: Long): Timestamp =
+    toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(timeZoneId, micros))
+
   /**
    * Converts microseconds since the epoch to an instance of `java.sql.Timestamp`.
    *
@@ -273,6 +276,9 @@ trait SparkDateTimeUtils {
   def fromJavaTimestamp(t: Timestamp): Long =
     rebaseJulianToGregorianMicros(fromJavaTimestampNoRebase(t))
 
+  def fromJavaTimestamp(timeZoneId: String, t: Timestamp): Long =
+    rebaseJulianToGregorianMicros(timeZoneId, fromJavaTimestampNoRebase(t))
+
   /**
    * Converts an instance of `java.sql.Timestamp` to the number of microseconds since
    * 1970-01-01T00:00:00.000000Z.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 0866cee9334c5..07b32af5c85e8 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -429,11 +429,11 @@ class LegacyFastTimestampFormatter(
     val micros = cal.getMicros()
     cal.set(Calendar.MILLISECOND, 0)
     val julianMicros = Math.addExact(millisToMicros(cal.getTimeInMillis), micros)
-    rebaseJulianToGregorianMicros(julianMicros)
+    rebaseJulianToGregorianMicros(TimeZone.getTimeZone(zoneId), julianMicros)
   }
 
   override def format(timestamp: Long): String = {
-    val julianMicros = rebaseGregorianToJulianMicros(timestamp)
+    val julianMicros = rebaseGregorianToJulianMicros(TimeZone.getTimeZone(zoneId), timestamp)
     cal.setTimeInMillis(Math.floorDiv(julianMicros, MICROS_PER_SECOND) * MILLIS_PER_SECOND)
     cal.setMicros(Math.floorMod(julianMicros, MICROS_PER_SECOND))
     fastDateFormat.format(cal)
@@ -443,7 +443,7 @@ class LegacyFastTimestampFormatter(
     if (ts.getNanos == 0) {
       fastDateFormat.format(ts)
     } else {
-      format(fromJavaTimestamp(ts))
+      format(fromJavaTimestamp(zoneId.getId, ts))
     }
   }
 
@@ -467,7 +467,7 @@ class LegacySimpleTimestampFormatter(
   }
 
   override def parse(s: String): Long = {
-    fromJavaTimestamp(new Timestamp(sdf.parse(s).getTime))
+    fromJavaTimestamp(zoneId.getId, new Timestamp(sdf.parse(s).getTime))
   }
 
   override def parseOptional(s: String): Option[Long] = {
@@ -475,12 +475,12 @@ class LegacySimpleTimestampFormatter(
     if (date == null) {
       None
     } else {
-      Some(fromJavaTimestamp(new Timestamp(date.getTime)))
+      Some(fromJavaTimestamp(zoneId.getId, new Timestamp(date.getTime)))
     }
   }
 
   override def format(us: Long): String = {
-    sdf.format(toJavaTimestamp(us))
+    sdf.format(toJavaTimestamp(zoneId.getId, us))
   }
 
   override def format(ts: Timestamp): String = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 27d60815766dc..8ff6c7b2ad705 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -297,6 +297,40 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
     }
   }
 
+  test("SPARK-49065: rebasing in legacy formatters/parsers with non-default time zone") {
+    val defaultTimeZone = LA
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) {
+      outstandingZoneIds.foreach { zoneId =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> defaultTimeZone.getId) {
+          withDefaultTimeZone(defaultTimeZone) {
+            withClue(s"zoneId = ${zoneId.getId}") {
+              val formatters = LegacyDateFormats.values.toSeq.map { legacyFormat =>
+                TimestampFormatter(
+                  TimestampFormatter.defaultPattern(),
+                  zoneId,
+                  TimestampFormatter.defaultLocale,
+                  legacyFormat,
+                  isParsing = false)
+              } :+ TimestampFormatter.getFractionFormatter(zoneId)
+              formatters.foreach { formatter =>
+                assert(microsToInstant(formatter.parse("1000-01-01 01:02:03"))
+                  .atZone(zoneId)
+                  .toLocalDateTime === LocalDateTime.of(1000, 1, 1, 1, 2, 3))
+
+                assert(formatter.format(
+                  LocalDateTime.of(1000, 1, 1, 1, 2, 3).atZone(zoneId).toInstant) ===
+                  "1000-01-01 01:02:03")
+                assert(formatter.format(instantToMicros(
+                  LocalDateTime.of(1000, 1, 1, 1, 2, 3)
+                    .atZone(zoneId).toInstant)) === "1000-01-01 01:02:03")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   test("parsing hour with various patterns") {
     def createFormatter(pattern: String): TimestampFormatter = {
       // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.

From 4f9dbc33627e956a83e757211a73a6895103d264 Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Thu, 1 Aug 2024 12:22:44 -0700
Subject: [PATCH 419/521] [SPARK-49066][SQL][TESTS][3.5] Refactor
 `OrcEncryptionSuite` and make
 `spark.hadoop.hadoop.security.key.provider.path` effective only within
 `OrcEncryptionSuite`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
This pr moves the global scope test configuration `spark.hadoop.hadoop.security.key.provider.path`, which is configured in the parent `pom.xml` and `SparkBuild.scala`, to `OrcEncryptionSuite` to ensure that it is effective only within `OrcEncryptionSuite`.

To achieve this, the pr also refactors `OrcEncryptionSuite`:
1. Overrides `beforeAll` to back up the contents of `CryptoUtils#keyProviderCache`.
2. Overrides `afterAll` to restore the contents of `CryptoUtils#keyProviderCache`.

This ensures that `CryptoUtils#keyProviderCache` is isolated during the test process of `OrcEncryptionSuite`.

### Why are the changes needed?
The test configuration `spark.hadoop.hadoop.security.key.provider.path` in the parent `pom.xml` and `SparkBuild.scala` is effective globally, which leads to the possibility that other Orc writing test cases, besides `OrcEncryptionSuite`, might also be affected by this configuration and use `test.org.apache.spark.sql.execution.datasources.orc.FakeKeyProvider.Factory`。

### Does this PR introduce _any_ user-facing change?
No, just for test.

### How was this patch tested?
Pass GitHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47561 from LuciferYang/SPARK-49066-3.5.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 pom.xml                                       |  3 --
 project/SparkBuild.scala                      |  1 -
 .../datasources/orc/OrcEncryptionSuite.scala  | 34 ++++++++++++++++++-
 3 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/pom.xml b/pom.xml
index 57bae938891d8..61780803afb57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3046,7 +3046,6 @@
               <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
               <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
               <spark.memory.debugFill>true</spark.memory.debugFill>
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>src</test.src.tables>
               <hive.conf.validation>false</hive.conf.validation>
@@ -3103,8 +3102,6 @@
               <spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
-              <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
             </systemProperties>
             <tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
             <tagsToInclude>${test.include.tags}</tagsToInclude>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 25f04f7bff318..e8c52dc0aff3b 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -1600,7 +1600,6 @@ object TestSettings {
     (Test / javaOptions) += "-Dspark.ui.enabled=false",
     (Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
     (Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
-    (Test / javaOptions) += "-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
     (Test / javaOptions) += "-Dhive.conf.validation=false",
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
index b7d29588f6bf4..575f230729ebd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -17,20 +17,52 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import java.util.{Map => JMap}
 import java.util.Random
 
-import org.apache.orc.impl.HadoopShimsFactory
+import scala.collection.mutable
 
+import org.apache.orc.impl.{CryptoUtils, HadoopShimsFactory, KeyProvider}
+
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSparkSession
 
 class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
   import testImplicits._
 
+  override def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.hadoop.hadoop.security.key.provider.path", "test:///")
+  }
+
+  override def beforeAll(): Unit = {
+    // Backup `CryptoUtils#keyProviderCache` and clear it.
+    keyProviderCacheRef.entrySet()
+      .forEach(e => keyProviderCacheBackup.put(e.getKey, e.getValue))
+    keyProviderCacheRef.clear()
+    super.beforeAll()
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    // Restore `CryptoUtils#keyProviderCache`.
+    keyProviderCacheRef.clear()
+    keyProviderCacheBackup.foreach { case (k, v) => keyProviderCacheRef.put(k, v) }
+  }
+
   val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
   val rowDataWithoutKey =
     Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
 
+  private val keyProviderCacheBackup: mutable.Map[String, KeyProvider] = mutable.Map.empty
+
+  private val keyProviderCacheRef: JMap[String, KeyProvider] = {
+    val clazz = classOf[CryptoUtils]
+    val field = clazz.getDeclaredField("keyProviderCache")
+    field.setAccessible(true)
+    field.get(null).asInstanceOf[JMap[String, KeyProvider]]
+  }
+
   test("Write and read an encrypted file") {
     val conf = spark.sessionState.newHadoopConf()
     val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)

From 0008bd1df41aabb155af6f38f4fc491b06d9f314 Mon Sep 17 00:00:00 2001
From: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Date: Fri, 2 Aug 2024 22:28:07 +0800
Subject: [PATCH 420/521] [SPARK-49000][SQL][3.5] Fix "select count(distinct 1)
 from t" where t is empty table by expanding RewriteDistinctAggregates

### What changes were proposed in this pull request?
Fix `RewriteDistinctAggregates` rule to deal properly with aggregation on DISTINCT literals. Physical plan for `select count(distinct 1) from t`:

```
-- count(distinct 1)
== Physical Plan ==
AdaptiveSparkPlan isFinalPlan=false
+- HashAggregate(keys=[], functions=[count(distinct 1)], output=[count(DISTINCT 1)#2L])
   +- HashAggregate(keys=[], functions=[partial_count(distinct 1)], output=[count#6L])
      +- HashAggregate(keys=[], functions=[], output=[])
         +- Exchange SinglePartition, ENSURE_REQUIREMENTS, [plan_id=20]
            +- HashAggregate(keys=[], functions=[], output=[])
               +- FileScan parquet spark_catalog.default.t[] Batched: false, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/Users/nikola.mandic/oss-spark/spark-warehouse/org.apache.spark.s..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
```

Problem is happening when `HashAggregate(keys=[], functions=[], output=[])` node yields one row to `partial_count` node, which then captures one row. This four-node structure is constructed by `AggUtils.planAggregateWithOneDistinct`.

To fix the problem, we're adding `Expand` node which will force non-empty grouping expressions in `HashAggregateExec` nodes. This will in turn enable streaming zero rows to parent `partial_count` node, yielding correct final result.

### Why are the changes needed?
Aggregation with DISTINCT literal gives wrong results. For example, when running on empty table `t`:
`select count(distinct 1) from t` returns 1, while the correct result should be 0.
For reference:
`select count(1) from t` returns 0, which is the correct and expected result.

### Does this PR introduce _any_ user-facing change?
Yes, this fixes a critical bug in Spark.

### How was this patch tested?
New e2e SQL tests for aggregates with DISTINCT literals.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47566 from uros-db/SPARK-49000-3.5.

Authored-by: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/RewriteDistinctAggregates.scala |  16 ++-
 .../spark/sql/DataFrameAggregateSuite.scala   | 111 ++++++++++++++++++
 2 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index da3cf782f6682..801bd2693af42 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -197,6 +197,17 @@ import org.apache.spark.util.collection.Utils
  * techniques.
  */
 object RewriteDistinctAggregates extends Rule[LogicalPlan] {
+  private def mustRewrite(
+      distinctAggs: Seq[AggregateExpression],
+      groupingExpressions: Seq[Expression]): Boolean = {
+    // If there are any distinct AggregateExpressions with filter, we need to rewrite the query.
+    // Also, if there are no grouping expressions and all distinct aggregate expressions are
+    // foldable, we need to rewrite the query, e.g. SELECT COUNT(DISTINCT 1). Without this case,
+    // non-grouping aggregation queries with distinct aggregate expressions will be incorrectly
+    // handled by the aggregation strategy, causing wrong results when working with empty tables.
+    distinctAggs.exists(_.filter.isDefined) || (groupingExpressions.isEmpty &&
+      distinctAggs.exists(_.aggregateFunction.children.forall(_.foldable)))
+  }
 
   private def mayNeedtoRewrite(a: Aggregate): Boolean = {
     val aggExpressions = collectAggregateExprs(a)
@@ -204,8 +215,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     // We need at least two distinct aggregates or the single distinct aggregate group exists filter
     // clause for this rule because aggregation strategy can handle a single distinct aggregate
     // group without filter clause.
-    // This check can produce false-positives, e.g., SUM(DISTINCT a) & COUNT(DISTINCT a).
-    distinctAggs.size > 1 || distinctAggs.exists(_.filter.isDefined)
+    distinctAggs.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
@@ -236,7 +246,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Aggregation strategy can handle queries with a single distinct group without filter clause.
-    if (distinctAggGroups.size > 1 || distinctAggs.exists(_.filter.isDefined)) {
+    if (distinctAggGroups.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)) {
       // Create the attributes for the grouping id and the group by clause.
       val gid = AttributeReference("gid", IntegerType, nullable = false)()
       val groupByMap = a.groupingExpressions.collect {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 1ba3f6c84d0ad..764b7a9719d29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -24,6 +24,7 @@ import scala.util.Random
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.{SparkException, SparkThrowable}
+import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -2150,6 +2151,116 @@ class DataFrameAggregateSuite extends QueryTest
       checkAnswer(df, Row(1, 2, 2) :: Row(3, 1, 1) :: Nil)
     }
   }
+
+  test("aggregating with various distinct expressions") {
+    abstract class AggregateTestCaseBase(
+        val query: String,
+        val resultSeq: Seq[Seq[Row]],
+        val hasExpandNodeInPlan: Boolean)
+    case class AggregateTestCase(
+        override val query: String,
+        override val resultSeq: Seq[Seq[Row]],
+        override val hasExpandNodeInPlan: Boolean)
+      extends AggregateTestCaseBase(query, resultSeq, hasExpandNodeInPlan)
+    case class AggregateTestCaseDefault(
+        override val query: String)
+      extends AggregateTestCaseBase(
+        query,
+        Seq(Seq(Row(0)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = true)
+
+    val t = "t"
+    val testCases: Seq[AggregateTestCaseBase] = Seq(
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1 + 2) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1, 2, 1 + 2) FROM $t"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(1), COUNT(DISTINCT 1) FROM $t",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(2, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT 1, "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT current_date()) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT array(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT map(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT struct(1, 2).col1) FROM $t"""
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t GROUP BY col",
+        Seq(Seq(), Seq(Row(1)), Seq(Row(1), Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 1"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 0",
+        Seq(Seq(Row(0)), Seq(Row(0)), Seq(Row(0))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT SUM(x) FROM (SELECT COUNT(DISTINCT 1) AS x FROM $t)"),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT "col") FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT col) FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 2))),
+        hasExpandNodeInPlan = true
+      )
+    )
+    withTable(t) {
+      sql(s"create table $t(col int) using parquet")
+      Seq(0, 1, 2).foreach(columnValue => {
+        if (columnValue != 0) {
+          sql(s"insert into $t(col) values($columnValue)")
+        }
+        testCases.foreach(testCase => {
+          val query = sql(testCase.query)
+          checkAnswer(query, testCase.resultSeq(columnValue))
+          val hasExpandNodeInPlan = query.queryExecution.optimizedPlan.collectFirst {
+            case _: Expand => true
+          }.nonEmpty
+          assert(hasExpandNodeInPlan == testCase.hasExpandNodeInPlan)
+        })
+      })
+    }
+  }
 }
 
 case class B(c: Option[Double])

From 98eaaa50c63bae0f28a75b79523025ddfd8ee4b6 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 2 Aug 2024 17:56:48 -0700
Subject: [PATCH 421/521] [SPARK-49094][SQL] Fix ignoreCorruptFiles
 non-functioning for hive orc impl with mergeSchema off

### What changes were proposed in this pull request?

ignoreCorruptFiles now applies to all file data sources except for hive orc implementation with mergeSchema off

### Why are the changes needed?

bugfix

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new tests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #47583 from yaooqinn/SPARK-49094.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 6631abc2f170b5bd79ba918d36e7ef9db3e8e7d4)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/sql/hive/orc/OrcFileFormat.scala    |  6 +++---
 .../sql/hive/orc/HiveOrcQuerySuite.scala      | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index a9314397dcf67..6a0b9686ffce1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -71,11 +71,10 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       SchemaMergeUtils.mergeSchemasInParallel(
         sparkSession, options, files, OrcFileOperator.readOrcSchemasInParallel)
     } else {
-      val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
       OrcFileOperator.readSchema(
         files.map(_.getPath.toString),
         Some(sparkSession.sessionState.newHadoopConfWithOptions(options)),
-        ignoreCorruptFiles
+        orcOptions.ignoreCorruptFiles
       )
     }
   }
@@ -146,7 +145,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+    val ignoreCorruptFiles =
+      new OrcOptions(options, sparkSession.sessionState.conf).ignoreCorruptFiles
 
     (file: PartitionedFile) => {
       val conf = broadcastedHadoopConf.value.value
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index e52d9b639dc4f..ccf1d3df83efe 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -381,4 +381,23 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-49094: ignoreCorruptFiles works for hive orc w/ mergeSchema off") {
+    withTempDir { dir =>
+      val basePath = dir.getCanonicalPath
+      spark.range(0, 1).toDF("a").write.orc(new Path(basePath, "foo=1").toString)
+      spark.range(0, 1).toDF("b").write.json(new Path(basePath, "foo=2").toString)
+
+      withSQLConf(
+        SQLConf.IGNORE_CORRUPT_FILES.key -> "false",
+        SQLConf.ORC_IMPLEMENTATION.key -> "hive") {
+        Seq(true, false).foreach { mergeSchema =>
+          checkAnswer(spark.read
+            .option("mergeSchema", value = mergeSchema)
+            .option("ignoreCorruptFiles", value = true)
+            .orc(basePath), Row(0L, 1))
+        }
+      }
+    }
+  }
 }

From b33a3ee7190ef907813ff67c6d05aa1e3082b210 Mon Sep 17 00:00:00 2001
From: Yi Wu <yi.wu@databricks.com>
Date: Mon, 5 Aug 2024 10:08:44 -0700
Subject: [PATCH 422/521] [SPARK-48791][CORE][FOLLOW-UP][3.5] Fix regression
 caused by immutable conversion on TaskMetrics#externalAccums

This PR backports https://github.com/apache/spark/pull/47578 to branch-3.5.

### What changes were proposed in this pull request?

This is a followup fix for https://github.com/apache/spark/pull/47197. We found that the perf regression still exists after that fix and located the culprit is the immutable conversion on `TaskMetrics#externalAccums`. This PR fixes it by avoiding the immutable conversion, and then enforce the read lock protection during the accessing on `TaskMetrics#externalAccums` to avoid the race issue (https://github.com/apache/spark/pull/40663).

### Why are the changes needed?

Fix perf regression.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Covered by existing tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47612 from Ngone51/SPARK-48791-followup-3.5.

Authored-by: Yi Wu <yi.wu@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/util/ArrayImplicits.scala    | 35 -------------------
 .../apache/spark/executor/TaskMetrics.scala   | 19 +++++++---
 .../org/apache/spark/scheduler/Task.scala     |  2 +-
 .../SpecificParquetRecordReaderBase.java      | 15 ++++----
 .../execution/ui/SQLAppStatusListener.scala   |  4 +--
 .../metric/SQLMetricsTestUtils.scala          |  4 +--
 6 files changed, 28 insertions(+), 51 deletions(-)
 delete mode 100644 core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala

diff --git a/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala b/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala
deleted file mode 100644
index 82c6c75bd51a9..0000000000000
--- a/core/src/main/scala-2.12/org/apache/spark/util/ArrayImplicits.scala
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util
-
-import scala.collection.compat.immutable
-
-/**
- * Implicit methods related to Scala Array.
- */
-private[spark] object ArrayImplicits {
-
-  implicit class SparkArrayOps[T](xs: Array[T]) {
-
-    /**
-     * Wraps an Array[T] as an immutable.ArraySeq[T] without copying.
-     */
-    def toImmutableArraySeq: immutable.ArraySeq[T] =
-      immutable.ArraySeq.unsafeWrapArray(xs)
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 468969b8d3322..e88b70eb655c5 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -29,8 +29,6 @@ import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.storage.{BlockId, BlockStatus}
 import org.apache.spark.util._
-import org.apache.spark.util.ArrayImplicits._
-
 
 /**
  * :: DeveloperApi ::
@@ -272,8 +270,17 @@ class TaskMetrics private[spark] () extends Serializable {
    */
   @transient private[spark] lazy val _externalAccums = new ArrayBuffer[AccumulatorV2[_, _]]
 
-  private[spark] def externalAccums: Seq[AccumulatorV2[_, _]] = withReadLock {
-    _externalAccums.toArray.toImmutableArraySeq
+  /**
+   * Perform an `op` conversion on the `_externalAccums` within the read lock.
+   *
+   * Note `op` is expected to not modify the `_externalAccums` and not being
+   * lazy evaluation for safe concern since `ArrayBuffer` is lazily evaluated.
+   * And we intentionally keeps `_externalAccums` as mutable instead of converting
+   * it to immutable for the performance concern.
+   */
+  private[spark] def withExternalAccums[T](op: ArrayBuffer[AccumulatorV2[_, _]] => T)
+    : T = withReadLock {
+    op(_externalAccums)
   }
 
   private def withReadLock[B](fn: => B): B = {
@@ -298,7 +305,9 @@ class TaskMetrics private[spark] () extends Serializable {
     _externalAccums += a
   }
 
-  private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = internalAccums ++ externalAccums
+  private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = withReadLock {
+    internalAccums ++ _externalAccums
+  }
 
   private[spark] def nonZeroInternalAccums(): Seq[AccumulatorV2[_, _]] = {
     // RESULT_SIZE accumulator is always zero at executor, we need to send it back as its
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 39667ea2364db..69ef094417b68 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -210,7 +210,7 @@ private[spark] abstract class Task[T](
       context.taskMetrics.nonZeroInternalAccums() ++
         // zero value external accumulators may still be useful, e.g. SQLMetrics, we should not
         // filter them out.
-        context.taskMetrics.externalAccums.filter(a => !taskFailed || a.countFailedValues)
+        context.taskMetrics.withExternalAccums(_.filter(a => !taskFailed || a.countFailedValues))
     } else {
       Seq.empty
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index 4f2b65f36120a..7dfea3d980c55 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -140,12 +140,15 @@ public void initialize(
     // in test case.
     TaskContext taskContext = TaskContext$.MODULE$.get();
     if (taskContext != null) {
-      Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics().externalAccums().lastOption();
-      if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
-        @SuppressWarnings("unchecked")
-        AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
-        intAccum.add(fileReader.getRowGroups().size());
-      }
+      taskContext.taskMetrics().withExternalAccums((accums) -> {
+        Option<AccumulatorV2<?, ?>> accu = accums.lastOption();
+        if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
+          @SuppressWarnings("unchecked")
+          AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
+          intAccum.add(fileReader.getRowGroups().size());
+        }
+        return null;
+      });
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 3fafc399dd828..067f7305ab113 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -178,14 +178,14 @@ class SQLAppStatusListener(
     // work around a race in the DAGScheduler. The metrics info does not contain accumulator info
     // when reading event logs in the SHS, so we have to rely on the accumulator in that case.
     val accums = if (live && event.taskMetrics != null) {
-      event.taskMetrics.externalAccums.flatMap { a =>
+      event.taskMetrics.withExternalAccums(_.flatMap { a =>
         // This call may fail if the accumulator is gc'ed, so account for that.
         try {
           Some(a.toInfo(Some(a.value), None))
         } catch {
           case _: IllegalAccessError => None
         }
-      }
+      })
     } else {
       info.accumulables
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 81667d52e16ae..46dc84c0582f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -311,7 +311,7 @@ object InputOutputMetricsHelper {
       res.shuffleRecordsRead += taskEnd.taskMetrics.shuffleReadMetrics.recordsRead
 
       var maxOutputRows = 0L
-      for (accum <- taskEnd.taskMetrics.externalAccums) {
+      taskEnd.taskMetrics.withExternalAccums(_.foreach { accum =>
         val info = accum.toInfo(Some(accum.value), None)
         if (info.name.toString.contains("number of output rows")) {
           info.update match {
@@ -322,7 +322,7 @@ object InputOutputMetricsHelper {
             case _ => // Ignore.
           }
         }
-      }
+      })
       res.sumMaxOutputRows += maxOutputRows
     }
   }

From f2e2601925425a65f9a20a0aa03966d8c81f6466 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Tue, 6 Aug 2024 16:50:26 +0800
Subject: [PATCH 423/521] [SPARK-49099][SQL] CatalogManager.setCurrentNamespace
 should respect custom session catalog

Refactor CatalogManager.setCurrentNamespace so it unifies the handling of `SupportsNamespaces` and SessionCatalog, thus it can respect custom session catalog.

This is a bug.

CatalogManager.setCurrentNamespace should respect custom session catalog. Without this PR, a custom session catalog won't work as Spark uses `spark_catalog` to see if the catalog is session log and then directly call V1SessionCatalog.

No

UT

No

Closes #47592 from amaliujia/refactor_setCurrentNamespace.

Lead-authored-by: Rui Wang <rui.wang@databricks.com>
Co-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../sql/catalyst/analysis/Analyzer.scala      | 18 +++++++++++--
 .../sql/catalyst/catalog/SessionCatalog.scala |  6 ++++-
 .../connector/catalog/CatalogManager.scala    | 27 ++++++++++++++-----
 .../analysis/LookupFunctionsSuite.scala       |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  |  9 ++++++-
 .../apache/spark/sql/test/SQLTestUtils.scala  |  5 ++--
 .../apache/spark/sql/hive/test/TestHive.scala |  1 +
 7 files changed, 54 insertions(+), 14 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4388740a409d9..bd917fd73e20a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
@@ -78,7 +79,7 @@ object SimpleAnalyzer extends Analyzer(
   override def resolver: Resolver = caseSensitiveResolution
 }
 
-object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog {
+object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog with SupportsNamespaces {
   private def fail() = throw new UnsupportedOperationException
   override def listTables(namespace: Array[String]): Array[Identifier] = fail()
   override def loadTable(ident: Identifier): Table = {
@@ -92,10 +93,23 @@ object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog {
   override def alterTable(ident: Identifier, changes: TableChange*): Table = fail()
   override def dropTable(ident: Identifier): Boolean = fail()
   override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = fail()
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = fail()
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
   override def name(): String = CatalogManager.SESSION_CATALOG_NAME
   override def listFunctions(namespace: Array[String]): Array[Identifier] = fail()
   override def loadFunction(ident: Identifier): UnboundFunction = fail()
+  override def listNamespaces(): Array[Array[String]] = fail()
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = fail()
+  override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = {
+    if (namespace.length == 1) {
+      mutable.HashMap[String, String]().asJava
+    } else {
+      throw new NoSuchNamespaceException(namespace)
+    }
+  }
+  override def createNamespace(
+    namespace: Array[String], metadata: util.Map[String, String]): Unit = fail()
+  override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = fail()
+  override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = fail()
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 392c911ddb8e0..cba928dfa924c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -330,12 +330,16 @@ class SessionCatalog(
   def getCurrentDatabase: String = synchronized { currentDb }
 
   def setCurrentDatabase(db: String): Unit = {
+    setCurrentDatabaseWithNameCheck(db, requireDbExists)
+  }
+
+  def setCurrentDatabaseWithNameCheck(db: String, nameCheck: String => Unit): Unit = {
     val dbName = format(db)
     if (dbName == globalTempViewManager.database) {
       throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError(
         globalTempViewManager.database)
     }
-    requireDbExists(dbName)
+    nameCheck(dbName)
     synchronized { currentDb = dbName }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index cf9dd7fdf4767..8080af2fb6b51 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -103,19 +104,31 @@ class CatalogManager(
     }
   }
 
-  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+  private def assertNamespaceExist(namespace: Array[String]): Unit = {
     currentCatalog match {
-      case _ if isSessionCatalog(currentCatalog) && namespace.length == 1 =>
-        v1SessionCatalog.setCurrentDatabase(namespace.head)
-      case _ if isSessionCatalog(currentCatalog) =>
-        throw QueryCompilationErrors.noSuchNamespaceError(namespace)
       case catalog: SupportsNamespaces if !catalog.namespaceExists(namespace) =>
-        throw QueryCompilationErrors.noSuchNamespaceError(namespace)
+        throw QueryCompilationErrors.noSuchNamespaceError(catalog.name() +: namespace)
       case _ =>
-        _currentNamespace = Some(namespace)
     }
   }
 
+  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+    if (isSessionCatalog(currentCatalog) && namespace.length == 1) {
+      v1SessionCatalog.setCurrentDatabaseWithNameCheck(
+        namespace.head,
+        name => {
+          currentCatalog match {
+            case catalog: SupportsNamespaces if !catalog.namespaceExists(namespace) =>
+              throw new NoSuchDatabaseException(name)
+            case _ =>
+          }
+        })
+    } else {
+      assertNamespaceExist(namespace)
+    }
+    _currentNamespace = Some(namespace)
+  }
+
   private var _currentCatalogName: Option[String] = None
 
   def currentCatalog: CatalogPlugin = synchronized {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index ae32365e69bbc..1fd81349ac720 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -47,7 +47,7 @@ class LookupFunctionsSuite extends PlanTest {
           ignoreIfExists = false)
         val catalog = new SessionCatalog(externalCatalog, new SimpleFunctionRegistry)
         val catalogManager = new CatalogManager(new CustomV2SessionCatalog(catalog), catalog)
-        catalog.setCurrentDatabase("db1")
+        catalogManager.setCurrentNamespace(Array("db1"))
         try {
           val analyzer = new Analyzer(catalogManager)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 1db63f3197288..ccfe8ff730717 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -1448,7 +1448,7 @@ class DataSourceV2SQLSuiteV1Filter
     }
     checkError(exception,
       errorClass = "SCHEMA_NOT_FOUND",
-      parameters = Map("schemaName" -> "`ns1`.`ns2`"))
+      parameters = Map("schemaName" -> "`testcat`.`ns1`.`ns2`"))
   }
 
   test("SPARK-31100: Use: v2 catalog that does not implement SupportsNameSpaces is used " +
@@ -3339,6 +3339,13 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-49099: Switch current schema with custom spark_catalog") {
+    withSQLConf(V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryCatalog].getName) {
+      sql("CREATE DATABASE test_db")
+      sql("USE test_db")
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index dd55fcfe42cac..e937173a590f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -423,8 +423,9 @@ private[sql] trait SQLTestUtilsBase
    * `f` returns.
    */
   protected def activateDatabase(db: String)(f: => Unit): Unit = {
-    spark.sessionState.catalog.setCurrentDatabase(db)
-    Utils.tryWithSafeFinally(f)(spark.sessionState.catalog.setCurrentDatabase("default"))
+    spark.sessionState.catalogManager.setCurrentNamespace(Array(db))
+    Utils.tryWithSafeFinally(f)(
+      spark.sessionState.catalogManager.setCurrentNamespace(Array("default")))
   }
 
   /**
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 9284b35fb3e35..1d646f40b3e28 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -543,6 +543,7 @@ private[hive] class TestHiveSparkSession(
       sharedState.cacheManager.clearCache()
       sharedState.loadedTables.clear()
       sessionState.catalog.reset()
+      sessionState.catalogManager.reset()
       metadataHive.reset()
 
       // HDFS root scratch dir requires the write all (733) permission. For each connecting user,

From d13808c9d6cc88b514e1b7b9fdd642e0e6364f8d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 6 Aug 2024 19:05:25 +0800
Subject: [PATCH 424/521] [SPARK-49099][SQL][FOLLOWUP][3.5] recover tests in
 DDLSuite

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/47592 to fix test failure during backport.

### Why are the changes needed?

recover CI

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47630 from cloud-fan/fix.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/sql/execution/command/DDLSuite.scala    | 5 +++--
 .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala   | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index dea66bb09cfac..1e63350fdb439 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -47,6 +47,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     try {
       // drop all databases, tables and functions after each test
       spark.sessionState.catalog.reset()
+      spark.sessionState.catalogManager.reset()
     } finally {
       Utils.deleteRecursively(new File(spark.sessionState.conf.warehousePath))
       super.afterEach()
@@ -1120,7 +1121,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
     sql("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('andrew' = 'or14', 'kor' = 'bel')")
     assert(getProps == Map("andrew" -> "or14", "kor" -> "bel"))
     // set table properties without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
+    spark.sessionState.catalogManager.setCurrentNamespace(Array("dbx"))
     sql("ALTER TABLE tab1 SET TBLPROPERTIES ('kor' = 'belle', 'kar' = 'bol')")
     assert(getProps == Map("andrew" -> "or14", "kor" -> "belle", "kar" -> "bol"))
     // table to alter does not exist
@@ -1154,7 +1155,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
     sql("ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('j')")
     assert(getProps == Map("p" -> "an", "c" -> "lan", "x" -> "y"))
     // unset table properties without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
+    spark.sessionState.catalogManager.setCurrentNamespace(Array("dbx"))
     sql("ALTER TABLE tab1 UNSET TBLPROPERTIES ('p')")
     assert(getProps == Map("c" -> "lan", "x" -> "y"))
     // table to alter does not exist
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index 2c5e2956f5f8a..2fad78e84f49d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -56,6 +56,7 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     try {
       // drop all databases, tables and functions after each test
       spark.sessionState.catalog.reset()
+      spark.sessionState.catalogManager.reset()
     } finally {
       super.afterEach()
     }

From bb7846dd487f259994fdc69e18e03382e3f64f42 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 6 Aug 2024 11:09:14 +0000
Subject: [PATCH 425/521] Preparing Spark release v3.5.2-rc5

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..89bee06852bef 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.2
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ed09ea1725da5..da05ae8eba11d 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 12b57da00234a..e7e172fa7a136 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e6e0a2660b45d..80f09efb00486 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c39d849494e9c..33769c5c718b4 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 1c0173d35139c..dfa0da4bd1d2a 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 58ec4e57e5158..101730cf66734 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5512ed8012ae2..158a5a14cac07 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 159064b0444c9..9b918826c7b45 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index d31f3ea7b1ac1..f6d8ceb426739 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index b15c6dd36ba74..de4196fc73277 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 317e663af153f..4aa353992d5c9 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6f3d683d9cb12..669de24b8c131 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 60c807b169699..d02193fc28bd8 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 81c0826bdd189..f5132e5c8ae69 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f16f47db18876..97de1b9f47936 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8348a32b2e238..ce6646241db40 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 8db62e2020e35..197a3b3df5848 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1d076d4d62f10..4f7c4e80466f6 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index c5ba0abd803d7..33b412061e359 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 3ba3ce45a726d..2ab75e151c988 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index a6cc9248f17a6..9526226882e70 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 034f45fc2af88..221dc8d9961c0 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index a326e41b8e233..2e7f729ca2168 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b7abf37080d8d..03c391eaad7d9 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.2
+SPARK_VERSION_SHORT: 3.5.2
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.2"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 6648ec2cc7cb1..3b7b63dd68da0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 22a8b916c4b14..cacb8684e99e6 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index fca45d2df0c35..5a4130b8f218f 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 75c85d66a935a..c9b3bbc284272 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 4e00a7b2dc9b8..0ae3e7eed0922 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b9e6e40583461..9d0999af741e0 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 61780803afb57..8fe98c3584682 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3-SNAPSHOT</version>
+  <version>3.5.2</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c779e9442f6b3..6756acf033661 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3.dev0"
+__version__: str = "3.5.2"
diff --git a/repl/pom.xml b/repl/pom.xml
index e8c2436f109c3..338a9779e9207 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e1ad633dff2ff..1cb35dd8ef7f6 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91fc5a94fa747..74c0753867071 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index d5d0c0125fce9..da43510900d74 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 2d6f7f87247c4..862acd8affb8d 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 818cb24cac80a..1011a6239c2fd 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.2</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 89cc3b9560a76..aa6bc4add957c 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 199597a36349d..2a1f09ed75e7f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1e4ad6f456371..8cf30e4e19170 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ea65b59390c83..494fa6522ee43 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6601c459741f0..72abd933503fc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3f7721c9bcff5..dfb594060e345 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.2</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 52ca284b04d94b17106c278822fb45708b71ee55 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 6 Aug 2024 11:09:18 +0000
Subject: [PATCH 426/521] Preparing development version 3.5.3-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 89bee06852bef..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.2
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index da05ae8eba11d..ed09ea1725da5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index e7e172fa7a136..12b57da00234a 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 80f09efb00486..e6e0a2660b45d 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 33769c5c718b4..c39d849494e9c 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index dfa0da4bd1d2a..1c0173d35139c 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 101730cf66734..58ec4e57e5158 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 158a5a14cac07..5512ed8012ae2 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 9b918826c7b45..159064b0444c9 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index f6d8ceb426739..d31f3ea7b1ac1 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index de4196fc73277..b15c6dd36ba74 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 4aa353992d5c9..317e663af153f 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 669de24b8c131..6f3d683d9cb12 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index d02193fc28bd8..60c807b169699 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index f5132e5c8ae69..81c0826bdd189 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index 97de1b9f47936..f16f47db18876 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index ce6646241db40..8348a32b2e238 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 197a3b3df5848..8db62e2020e35 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 4f7c4e80466f6..1d076d4d62f10 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 33b412061e359..c5ba0abd803d7 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 2ab75e151c988..3ba3ce45a726d 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9526226882e70..a6cc9248f17a6 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 221dc8d9961c0..034f45fc2af88 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 2e7f729ca2168..a326e41b8e233 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 03c391eaad7d9..b7abf37080d8d 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.2
-SPARK_VERSION_SHORT: 3.5.2
+SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.2"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 3b7b63dd68da0..6648ec2cc7cb1 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index cacb8684e99e6..22a8b916c4b14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 5a4130b8f218f..fca45d2df0c35 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c9b3bbc284272..75c85d66a935a 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 0ae3e7eed0922..4e00a7b2dc9b8 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 9d0999af741e0..b9e6e40583461 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 8fe98c3584682..61780803afb57 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.2</version>
+  <version>3.5.3-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 6756acf033661..c779e9442f6b3 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.2"
+__version__: str = "3.5.3.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 338a9779e9207..e8c2436f109c3 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 1cb35dd8ef7f6..e1ad633dff2ff 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 74c0753867071..91fc5a94fa747 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index da43510900d74..d5d0c0125fce9 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 862acd8affb8d..2d6f7f87247c4 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 1011a6239c2fd..818cb24cac80a 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.2</version>
+        <version>3.5.3-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index aa6bc4add957c..89cc3b9560a76 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 2a1f09ed75e7f..199597a36349d 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 8cf30e4e19170..1e4ad6f456371 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 494fa6522ee43..ea65b59390c83 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 72abd933503fc..6601c459741f0 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index dfb594060e345..3f7721c9bcff5 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.2</version>
+    <version>3.5.3-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 0e2d757660212d679118e587f921e640720f9245 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Sat, 10 Aug 2024 00:34:34 +0800
Subject: [PATCH 427/521] [SPARK-49182][DOCS][PYTHON] Stop publish
 site/docs/{version}/api/python/_sources dir

Remove dangling links like:
https://spark.apache.org/docs/3.5.1/api/python/_sources/user_guide/sql/index.rst.txt

reduce the size of documentation publication

```
$ du -h 3.5.1/api/python/_sources
 68K	3.5.1/api/python/_sources/development
148K	3.5.1/api/python/_sources/user_guide/pandas_on_spark
 40K	3.5.1/api/python/_sources/user_guide/sql
208K	3.5.1/api/python/_sources/user_guide
 20K	3.5.1/api/python/_sources/migration_guide
2.9M	3.5.1/api/python/_sources/reference/pyspark.sql/api
3.0M	3.5.1/api/python/_sources/reference/pyspark.sql
180K	3.5.1/api/python/_sources/reference/pyspark.ss/api
196K	3.5.1/api/python/_sources/reference/pyspark.ss
2.5M	3.5.1/api/python/_sources/reference/api
3.0M	3.5.1/api/python/_sources/reference/pyspark.pandas/api
3.1M	3.5.1/api/python/_sources/reference/pyspark.pandas
8.8M	3.5.1/api/python/_sources/reference
4.1M	3.5.1/api/python/_sources/getting_started
 13M	3.5.1/api/python/_sources
```

NO

build locally

```
ls api/python
_images         _static         getting_started migration_guide reference       searchindex.js
_modules        development     index.html      objects.inv     search.html     user_guide
```

NO

Closes #47686 from yaooqinn/SPARK-49182.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit eb859def440299f269f38f30d3e9e465bd5cec26)
Signed-off-by: Kent Yao <yao@apache.org>
---
 docs/_plugins/copy_api_dirs.rb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb
index 28d5e0d82c93a..bafc479974551 100644
--- a/docs/_plugins/copy_api_dirs.rb
+++ b/docs/_plugins/copy_api_dirs.rb
@@ -136,6 +136,7 @@
     mkdir_p "api/python"
 
     puts "cp -r ../python/docs/build/html/. api/python"
+    rm_r("../python/docs/build/html/_sources")
     cp_r("../python/docs/build/html/.", "api/python")
   end
 

From 7bfb4f0a82a1fa37f42000d3fd30cdf54f214d38 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Fri, 9 Aug 2024 16:33:41 +0900
Subject: [PATCH 428/521] [SPARK-49176][K8S][DOCS] Fix
 `spark.ui.custom.executor.log.url` docs by adding K8s

### What changes were proposed in this pull request?

This PR aims to fix `spark.ui.custom.executor.log.url` configuration's documentation by adding K8s on top of YARN.
```
-    For now, only YARN mode supports this configuration
+    For now, only YARN and K8s cluster manager supports this configuration
```

In addition, K8s example is added to `running-on-kubernetes.md` docs to help users refer the existing environment variables, `SPARK_APPLICATION_ID` and `SPARK_EXECUTOR_ID`, more easily to enable this feature.

- https://kubernetes.io/docs/tasks/inject-data-application/define-environment-variable-container/#using-environment-variables-inside-of-your-config

### Why are the changes needed?

To fix a wrong description.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual review. This is a doc-only PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47681 from dongjoon-hyun/SPARK-49176.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/configuration.md         | 2 +-
 docs/running-on-kubernetes.md | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/configuration.md b/docs/configuration.md
index 5457a77e4fc72..abd934572bd00 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -1572,7 +1572,7 @@ Apart from these, the following properties are also available, and may be useful
     which will be also effective when accessing the application on history server. The new log urls must be
     permanent, otherwise you might have dead link for executor log urls.
     <p/>
-    For now, only YARN mode supports this configuration
+    For now, only YARN and K8s cluster manager supports this configuration
   </td>
   <td>3.0.0</td>
 </tr>
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index a684e7caa1a04..cbddaaf90fca3 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -430,6 +430,14 @@ The same logs can also be accessed through the
 [Kubernetes dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) if installed on
 the cluster.
 
+When there exists a log collection system, you can expose it at Spark Driver `Executors` tab UI. For example,
+
+```
+spark.executorEnv.SPARK_EXECUTOR_ATTRIBUTE_APP_ID='$(SPARK_APPLICATION_ID)'
+spark.executorEnv.SPARK_EXECUTOR_ATTRIBUTE_EXECUTOR_ID='$(SPARK_EXECUTOR_ID)'
+spark.ui.custom.executor.log.url='https://log-server/log?appId={{APP_ID}}&execId={{EXECUTOR_ID}}'
+```
+
 ### Accessing Driver UI
 
 The UI associated with any application can be accessed locally using

From 204dd814e39efecd83623069351fc0c46fa38474 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 12 Aug 2024 11:19:47 +0800
Subject: [PATCH 429/521] [SPARK-49183][SQL] V2SessionCatalog.createTable
 should respect PROP_IS_MANAGED_LOCATION

Even if the table definition has a location, the table should be a managed table if `PROP_IS_MANAGED_LOCATION` is specified, in `V2SessionCatalog.createTable`.

It's a bug fix. A custom `spark_catalog` may generate custom location for managed table and delegate the actual table creation to `V2SessionCatalog`. The table should still be a managed table if `PROP_IS_MANAGED_LOCATION` is specified.

Yes, now users who use custom `spark_catalog` that generates custom location for managed table, can correctly create managed tables.

a new test

no

Closes #47684 from cloud-fan/table.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit ed04e160940a62fe3f5e82bda502941c7aa75a29)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/TableCatalog.java   |  3 +-
 .../datasources/v2/ShowCreateTableExec.scala  |  2 +-
 .../datasources/v2/V2SessionCatalog.scala     |  4 ++-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 34 ++++++++++++++++++-
 4 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index d99e7e14b0117..29c2da307a0f6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -52,7 +52,8 @@ public interface TableCatalog extends CatalogPlugin {
 
   /**
    * A reserved property to indicate that the table location is managed, not user-specified.
-   * If this property is "true", SHOW CREATE TABLE will not generate the LOCATION clause.
+   * If this property is "true", it means it's a managed table even if it has a location. As an
+   * example, SHOW CREATE TABLE will not generate the LOCATION clause.
    */
   String PROP_IS_MANAGED_LOCATION = "is_managed_location";
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 6fa51ed63bd46..64f76f59c286f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -120,7 +120,7 @@ case class ShowCreateTableExec(
   private def showTableLocation(table: Table, builder: StringBuilder): Unit = {
     val isManagedOption = Option(table.properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
     // Only generate LOCATION clause if it's not managed.
-    if (isManagedOption.forall(_.equalsIgnoreCase("false"))) {
+    if (isManagedOption.isEmpty || !isManagedOption.get.equalsIgnoreCase("true")) {
       Option(table.properties.get(TableCatalog.PROP_LOCATION))
         .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n")
         .foreach(builder.append)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index a7062a9a596c3..d7ab23cf08ddd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -121,7 +121,9 @@ class V2SessionCatalog(catalog: SessionCatalog)
     val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties.toMap))
         .copy(locationUri = location.map(CatalogUtils.stringToURI))
     val isExternal = properties.containsKey(TableCatalog.PROP_EXTERNAL)
-    val tableType = if (isExternal || location.isDefined) {
+    val isManagedLocation = Option(properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
+      .exists(_.equalsIgnoreCase("true"))
+    val tableType = if (isExternal || (location.isDefined && !isManagedLocation)) {
       CatalogTableType.EXTERNAL
     } else {
       CatalogTableType.MANAGED
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index ccfe8ff730717..aadbb10de703a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import java.sql.Timestamp
 import java.time.{Duration, LocalDate, Period}
+import java.util
 import java.util.Locale
 
 import scala.collection.JavaConverters._
@@ -35,7 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.{Column => ColumnV2, _}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
-import org.apache.spark.sql.connector.expressions.LiteralValue
+import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -3340,12 +3341,29 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-49099: Switch current schema with custom spark_catalog") {
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
     withSQLConf(V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryCatalog].getName) {
       sql("CREATE DATABASE test_db")
       sql("USE test_db")
     }
   }
 
+  test("SPARK-49183: custom spark_catalog generates location for managed tables") {
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    withSQLConf(V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[SimpleDelegatingCatalog].getName) {
+      withTable("t") {
+        sql(s"CREATE TABLE t (i INT) USING $v2Format")
+        val table = catalog(SESSION_CATALOG_NAME).asTableCatalog
+          .loadTable(Identifier.of(Array("default"), "t"))
+        assert(!table.properties().containsKey(TableCatalog.PROP_EXTERNAL))
+      }
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
@@ -3374,3 +3392,17 @@ class FakeV2Provider extends SimpleTableProvider {
 class ReserveSchemaNullabilityCatalog extends InMemoryCatalog {
   override def useNullableQuerySchema(): Boolean = false
 }
+
+class SimpleDelegatingCatalog extends DelegatingCatalogExtension {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    val newProps = new util.HashMap[String, String]
+    newProps.putAll(properties)
+    newProps.put(TableCatalog.PROP_LOCATION, "/tmp/test_path")
+    newProps.put(TableCatalog.PROP_IS_MANAGED_LOCATION, "true")
+    super.createTable(ident, columns, partitions, newProps)
+  }
+}

From 4a9dae9b8cdb822c9a0827639dcabe6224df14e3 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 3 Jun 2024 12:37:24 +0900
Subject: [PATCH 430/521] [SPARK-48204][INFRA][FOLLOW] fix release scripts for
 the "finalize" step

Necessary fixes to finalize the spark 4.0 preview release. The major one is that pypi now requires API token instead of username/password for authentication.

release

no

manual

no

Closes #46840 from cloud-fan/script.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 dev/create-release/do-release-docker.sh |  6 ++---
 dev/create-release/release-build.sh     | 31 ++++++++++++++-----------
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/dev/create-release/do-release-docker.sh b/dev/create-release/do-release-docker.sh
index 88398bc14dd02..ea3105b3d0a70 100755
--- a/dev/create-release/do-release-docker.sh
+++ b/dev/create-release/do-release-docker.sh
@@ -84,8 +84,8 @@ if [ ! -z "$RELEASE_STEP" ] && [ "$RELEASE_STEP" = "finalize" ]; then
     error "Exiting."
   fi
 
-  if [ -z "$PYPI_PASSWORD" ]; then
-    stty -echo && printf "PyPi password: " && read PYPI_PASSWORD && printf '\n' && stty echo
+  if [ -z "$PYPI_API_TOKEN" ]; then
+    stty -echo && printf "PyPi API token: " && read PYPI_API_TOKEN && printf '\n' && stty echo
   fi
 fi
 
@@ -142,7 +142,7 @@ GIT_NAME=$GIT_NAME
 GIT_EMAIL=$GIT_EMAIL
 GPG_KEY=$GPG_KEY
 ASF_PASSWORD=$ASF_PASSWORD
-PYPI_PASSWORD=$PYPI_PASSWORD
+PYPI_API_TOKEN=$PYPI_API_TOKEN
 GPG_PASSPHRASE=$GPG_PASSPHRASE
 RELEASE_STEP=$RELEASE_STEP
 USER=$USER
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index e0588ae934cd2..99841916cf293 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -95,8 +95,8 @@ init_java
 init_maven_sbt
 
 if [[ "$1" == "finalize" ]]; then
-  if [[ -z "$PYPI_PASSWORD" ]]; then
-    error 'The environment variable PYPI_PASSWORD is not set. Exiting.'
+  if [[ -z "$PYPI_API_TOKEN" ]]; then
+    error 'The environment variable PYPI_API_TOKEN is not set. Exiting.'
   fi
 
   git config --global user.name "$GIT_NAME"
@@ -104,22 +104,27 @@ if [[ "$1" == "finalize" ]]; then
 
   # Create the git tag for the new release
   echo "Creating the git tag for the new release"
-  rm -rf spark
-  git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master
-  cd spark
-  git tag "v$RELEASE_VERSION" "$RELEASE_TAG"
-  git push origin "v$RELEASE_VERSION"
-  cd ..
-  rm -rf spark
-  echo "git tag v$RELEASE_VERSION created"
+  if check_for_tag "v$RELEASE_VERSION"; then
+    echo "v$RELEASE_VERSION already exists. Skip creating it."
+  else
+    rm -rf spark
+    git clone "https://$ASF_USERNAME:$ASF_PASSWORD@$ASF_SPARK_REPO" -b master
+    cd spark
+    git tag "v$RELEASE_VERSION" "$RELEASE_TAG"
+    git push origin "v$RELEASE_VERSION"
+    cd ..
+    rm -rf spark
+    echo "git tag v$RELEASE_VERSION created"
+  fi
 
   # download PySpark binary from the dev directory and upload to PyPi.
   echo "Uploading PySpark to PyPi"
   svn co --depth=empty "$RELEASE_STAGING_LOCATION/$RELEASE_TAG-bin" svn-spark
   cd svn-spark
-  svn update "pyspark-$RELEASE_VERSION.tar.gz"
-  svn update "pyspark-$RELEASE_VERSION.tar.gz.asc"
-  TWINE_USERNAME=spark-upload TWINE_PASSWORD="$PYPI_PASSWORD" twine upload \
+  PYSPARK_VERSION=`echo "$RELEASE_VERSION" |  sed -e "s/-/./" -e "s/preview/dev/"`
+  svn update "pyspark-$PYSPARK_VERSION.tar.gz"
+  svn update "pyspark-$PYSPARK_VERSION.tar.gz.asc"
+  twine upload -u __token__  -p $PYPI_API_TOKEN \
     --repository-url https://upload.pypi.org/legacy/ \
     "pyspark-$RELEASE_VERSION.tar.gz" \
     "pyspark-$RELEASE_VERSION.tar.gz.asc"

From deac7807efb486dd8f3c8bec39b90cbecb6ae767 Mon Sep 17 00:00:00 2001
From: zml1206 <zhuml1206@gmail.com>
Date: Mon, 12 Aug 2024 16:30:13 +0800
Subject: [PATCH 431/521] [SPARK-46632][SQL] Fix subexpression elimination when
 equivalent ternary expressions have different children

Remove unexpected exception thrown in `EquivalentExpressions.updateExprInMap()`. Equivalent expressions may contain different children, it should happen expression not in map and `useCount` is -1.
For example, before this PR will throw IllegalStateException
```
Seq((1, 2, 3), (2, 3, 4)).toDF("a", "b", "c")
      .selectExpr("case when a + b + c>3 then 1 when c + a + b>0 then 2 else 0 end as d").show()
```

Bug fix.

No.

New unit test, before this PR will throw IllegalStateException: *** with use count: -1

No.

Closes #46135 from zml1206/SPARK-46632.

Authored-by: zml1206 <zhuml1206@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 2fb8dffe40ffd613b4cf7f59843160baba0d078c)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../catalyst/expressions/EquivalentExpressions.scala |  4 ----
 .../expressions/SubexpressionEliminationSuite.scala  | 12 ++++++++++++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 1a84859cc3a15..5d8d428e27d68 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -79,10 +79,6 @@ class EquivalentExpressions(
         case _ =>
           if (useCount > 0) {
             map.put(wrapper, ExpressionStats(expr)(useCount))
-          } else {
-            // Should not happen
-            throw new IllegalStateException(
-              s"Cannot update expression: $expr in map: $map with use count: $useCount")
           }
           false
       }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index f369635a32671..e9faeba2411ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -494,6 +494,18 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
     checkShortcut(Or(equal, Literal(true)), 1)
     checkShortcut(Not(And(equal, Literal(false))), 1)
   }
+
+  test("Equivalent ternary expressions have different children") {
+    val add1 = Add(Add(Literal(1), Literal(2)), Literal(3))
+    val add2 = Add(Add(Literal(3), Literal(1)), Literal(2))
+    val conditions1 = (GreaterThan(add1, Literal(3)), Literal(1)) ::
+      (GreaterThan(add2, Literal(0)), Literal(2)) :: Nil
+
+    val caseWhenExpr1 = CaseWhen(conditions1, Literal(0))
+    val equivalence1 = new EquivalentExpressions
+    equivalence1.addExprTree(caseWhenExpr1)
+    assert(equivalence1.getCommonSubexpressions.size == 1)
+  }
 }
 
 case class CodegenFallbackExpression(child: Expression)

From 4d6902134cae31c959045d3025ed232d43ec8623 Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Mon, 12 Aug 2024 18:09:55 +0800
Subject: [PATCH 432/521] [SPARK-49200][SQL] Fix null type non-codegen ordering
 exception

### What changes were proposed in this pull request?

Spark mark `NullType` as orderable, and we return 0 when gen comparing code for `NullType`.
```
object OrderUtils {
  def isOrderable(dataType: DataType): Boolean = dataType match {
    case NullType => true
```
This pr makes `NullType` ordering work for non-codegen path to avoid exception.

### Why are the changes needed?

Fix exception:
```sql
set spark.sql.codegen.factoryMode=NO_CODEGEN;
set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.EliminateSorts;

select * from range(10) order by array(null);
```

```
org.apache.spark.SparkIllegalArgumentException: Type PhysicalNullType does not support ordered operations.
    at org.apache.spark.sql.errors.QueryExecutionErrors$.orderedOperationUnsupportedByDataTypeError(QueryExecutionErrors.scala:352)
    at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:246)
    at org.apache.spark.sql.catalyst.types.PhysicalNullType.ordering(PhysicalDataType.scala:243)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType$$anon$1.<init>(PhysicalDataType.scala:283)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering$lzycompute(PhysicalDataType.scala:281)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.interpretedOrdering(PhysicalDataType.scala:281)
    at org.apache.spark.sql.catalyst.types.PhysicalArrayType.ordering(PhysicalDataType.scala:277)
    at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:67)
    at org.apache.spark.sql.catalyst.expressions.InterpretedOrdering.compare(ordering.scala:40)
    at org.apache.spark.sql.execution.UnsafeExternalRowSorter$RowComparator.compare(UnsafeExternalRowSorter.java:254)
    at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:70)
    at org.apache.spark.util.collection.unsafe.sort.UnsafeInMemorySorter$SortComparator.compare(UnsafeInMemorySorter.java:44)
```

### Does this PR introduce _any_ user-facing change?

yes, bug fix

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47707 from ulysses-you/null-ordering.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
(cherry picked from commit e3ba74b65b9d534655fbcf40bb2c00b7f5c69418)
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../spark/sql/catalyst/types/PhysicalDataType.scala |  3 +--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala  | 13 ++++++++++++-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
index d392557e650e3..0f60f8d7b1ef4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
@@ -239,8 +239,7 @@ case class PhysicalMapType(keyType: DataType, valueType: DataType, valueContains
 
 class PhysicalNullType() extends PhysicalDataType with PhysicalPrimitiveType {
   override private[sql] def ordering =
-    throw QueryExecutionErrors.orderedOperationUnsupportedByDataTypeError(
-      "PhysicalNullType")
+    implicitly[Ordering[Unit]].asInstanceOf[Ordering[Any]]
   override private[sql] type InternalType = Any
   @transient private[sql] lazy val tag = typeTag[InternalType]
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index cfeccbdf648c2..71f4f17de61d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -31,7 +31,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
-import org.apache.spark.sql.catalyst.expressions.{GenericRow, Hex}
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRow, Hex}
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
@@ -1430,6 +1430,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("SPARK-49200: Fix null type non-codegen ordering exception") {
+    withSQLConf(
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString,
+        SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+          "org.apache.spark.sql.catalyst.optimizer.EliminateSorts") {
+      checkAnswer(
+        sql("SELECT * FROM range(3) ORDER BY array(null)"),
+        Seq(Row(0), Row(1), Row(2)))
+    }
+  }
+
   test("SPARK-8837: use keyword in column name") {
     withTempView("t") {
       val df = Seq(1 -> "a").toDF("count", "sort")

From 6a7564061035969f72a2dacbba8226f9b0f3e418 Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Sun, 11 Aug 2024 17:32:56 +0900
Subject: [PATCH 433/521] [SPARK-49193][SQL] Improve the performance of
 RowSetUtils.toColumnBasedSet

### What changes were proposed in this pull request?

Replace `while` loop with `foreach` in `RowSetUtils.toTColumn`.

### Why are the changes needed?

Improve the performance of `RowSetUtils.toColumnBasedSet`:
<img width="1196" alt="image" src="https://github.com/user-attachments/assets/f481de39-e0bf-41c5-8fee-09dc1a93c4e1">

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.
```scala
import org.apache.hive.service.rpc.thrift.TProtocolVersion
import org.apache.spark.sql.execution.HiveResult

val df = spark.sql("select id, cast(id as string), cast(id as timestamp) from range(200000)")
val dataTypes = df.schema.fields.map(_.dataType)
val rows = df.collect().toList
val start1 = System.currentTimeMillis()
RowSetUtils.toTRowSet(1, rows, dataTypes, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V11, HiveResult.getTimeFormatters)
val start2 = System.currentTimeMillis()
RowSetUtils.toTRowSet(1, rows, dataTypes, TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V5, HiveResult.getTimeFormatters)
val start3 = System.currentTimeMillis()
println(s"toColumnBasedSet time: ${start2 - start1}, toRowBasedSet time: ${start3 - start2}")
```

Before this PR:
```
toColumnBasedSet time: 17307, toRowBasedSet time: 71
```

After this PR:
```
toColumnBasedSet time: 128, toRowBasedSet time: 70
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47699 from wangyum/toColumnBasedSet.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 567d58c2c16143eee17d6844cb902f36bf79d350)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala   | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index 047f0612898d9..43130bb204f6d 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -131,8 +131,7 @@ object RowSetUtils {
         var i = 0
         val rowSize = rows.length
         val values = new java.util.ArrayList[String](rowSize)
-        while (i < rowSize) {
-          val row = rows(i)
+        rows.foreach { row =>
           nulls.set(i, row.isNullAt(ordinal))
           val value = if (row.isNullAt(ordinal)) {
             ""

From 7799811b4718607ff93dad2f15a6a625c48c0cf8 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 12 Aug 2024 10:25:40 -0700
Subject: [PATCH 434/521] [SPARK-49183][SQL][FOLLOWUP][3.5] Fix compilation

### What changes were proposed in this pull request?

fix compilation error caused by https://github.com/apache/spark/pull/47684 . Seems scala 2.12 has some subtle differences in the import priority.

### Why are the changes needed?

N/A

### Does this PR introduce _any_ user-facing change?

N/A

### How was this patch tested?

pass compilation

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47722 from cloud-fan/fix.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../apache/spark/sql/connector/DataSourceV2SQLSuite.scala   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index aadbb10de703a..01d878f2d2b4b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.connector
 
+import java.{util => jutil}
 import java.sql.Timestamp
 import java.time.{Duration, LocalDate, Period}
-import java.util
 import java.util.Locale
 
 import scala.collection.JavaConverters._
@@ -3398,8 +3398,8 @@ class SimpleDelegatingCatalog extends DelegatingCatalogExtension {
       ident: Identifier,
       columns: Array[ColumnV2],
       partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = {
-    val newProps = new util.HashMap[String, String]
+      properties: jutil.Map[String, String]): Table = {
+    val newProps = new jutil.HashMap[String, String]
     newProps.putAll(properties)
     newProps.put(TableCatalog.PROP_LOCATION, "/tmp/test_path")
     newProps.put(TableCatalog.PROP_IS_MANAGED_LOCATION, "true")

From 3237b8e6c840b160e121966d23dddccfc548cc1f Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 12 Aug 2024 10:26:31 -0700
Subject: [PATCH 435/521] [SPARK-49197][CORE] Redact `Spark Command` output in
 `launcher` module

### What changes were proposed in this pull request?

This PR aims to redact `Spark Command` output in `launcher` module.

### Why are the changes needed?

When `launcher` module shows `Spark Command`, there is no redaction. Although Spark Cluster is supposed to be in a secure environment, this could be collected by a centralized log system. We need to do a proper redaction.

**EXAMPLE (Spark History Server)**
```
$ SPARK_NO_DAEMONIZE=1 \
  SPARK_HISTORY_OPTS="-Dspark.ui.filters=org.apache.spark.ui.JWSFilter -Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=VmlzaXQgaHR0cHM6Ly9zcGFyay5hcGFjaGUub3JnIHRvIGRvd25sb2FkIEFwYWNoZSBTcGFyay4=" \
  sbin/start-history-server.sh
```

**BEFORE**
```
Spark Command: /Users/dongjoon/.jenv/versions/17/bin/java -cp /Users/dongjoon/APACHE/spark-merge/conf/:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/slf4j-api-2.0.14.jar:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/* \
-Dspark.ui.filters=org.apache.spark.ui.JWSFilter \
-Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=VmlzaXQgaHR0cHM6Ly9zcGFyay5hcGFjaGUub3JnIHRvIGRvd25sb2FkIEFwYWNoZSBTcGFyay4= \
-Xmx1g \
org.apache.spark.deploy.history.HistoryServer
```

**AFTER**
```
...
Spark Command: /Users/dongjoon/.jenv/versions/17/bin/java -cp /Users/dongjoon/APACHE/spark-merge/conf/:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/slf4j-api-2.0.14.jar:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/* \
-Dspark.ui.filters=org.apache.spark.ui.JWSFilter \
-Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=*********(redacted) \
-Xmx1g \
org.apache.spark.deploy.history.HistoryServer
```

### Does this PR introduce _any_ user-facing change?

This only changes the log messages during startup.

### How was this patch tested?

Pass the CIs with newly added test case.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47704 from dongjoon-hyun/SPARK-49197.

Authored-by: Dongjoon Hyun <dhyun@apple.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 93cf355ab05d8c9e82e23b8ae065b2f462489e4e)
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../spark/launcher/CommandBuilderUtils.java   | 27 +++++++++++++++++++
 .../java/org/apache/spark/launcher/Main.java  |  2 +-
 .../launcher/CommandBuilderUtilsSuite.java    | 10 +++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index 172fb8c560876..ff729cd1cb6dc 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -21,6 +21,9 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * Helper methods for command builders.
@@ -30,6 +33,11 @@ class CommandBuilderUtils {
   static final String DEFAULT_MEM = "1g";
   static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
   static final String ENV_SPARK_HOME = "SPARK_HOME";
+  // This should be consistent with org.apache.spark.internal.config.SECRET_REDACTION_PATTERN
+  // We maintain this copy to avoid depending on `core` module.
+  static final String SECRET_REDACTION_PATTERN = "(?i)secret|password|token|access[.]?key";
+  static final Pattern redactPattern = Pattern.compile(SECRET_REDACTION_PATTERN);
+  static final Pattern keyValuePattern = Pattern.compile("-D(.+?)=(.+)");
 
   /** Returns whether the given string is null or empty. */
   static boolean isEmpty(String s) {
@@ -328,4 +336,23 @@ static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfN
     return libdir.getAbsolutePath();
   }
 
+  /**
+   * Redact a command-line argument's value part which matches `-Dkey=value` pattern.
+   * Note that this should be consistent with `org.apache.spark.util.Utils.redactCommandLineArgs`.
+   */
+  static List<String> redactCommandLineArgs(List<String> args) {
+    return args.stream().map(CommandBuilderUtils::redact).collect(Collectors.toList());
+  }
+
+  /**
+   * Redact a command-line argument's value part which matches `-Dkey=value` pattern.
+   */
+  static String redact(String arg) {
+    Matcher m = keyValuePattern.matcher(arg);
+    if (m.find() && redactPattern.matcher(m.group(1)).find()) {
+      return String.format("-D%s=%s", m.group(1), "*********(redacted)");
+    } else {
+      return arg;
+    }
+  }
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/Main.java b/launcher/src/main/java/org/apache/spark/launcher/Main.java
index 6501fc1764c25..321fca0912704 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/Main.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/Main.java
@@ -114,7 +114,7 @@ private static List<String> buildCommand(
       boolean printLaunchCommand) throws IOException, IllegalArgumentException {
     List<String> cmd = builder.buildCommand(env);
     if (printLaunchCommand) {
-      System.err.println("Spark Command: " + join(" ", cmd));
+      System.err.println("Spark Command: " + join(" ", redactCommandLineArgs(cmd)));
       System.err.println("========================================");
     }
     return cmd;
diff --git a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
index 46cdffc190d52..1b2c683880c25 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
@@ -68,6 +68,16 @@ public void testInvalidOptionStrings() {
     testInvalidOpt("'abcde");
   }
 
+  @Test
+  public void testRedactCommandLineArgs() {
+    assertEquals(redact("secret"), "secret");
+    assertEquals(redact("-Dk=v"), "-Dk=v");
+    assertEquals(redact("-Dk=secret"), "-Dk=secret");
+    assertEquals(redact("-DsecretKey=my-secret"), "-DsecretKey=*********(redacted)");
+    assertEquals(redactCommandLineArgs(Arrays.asList("-DsecretKey=my-secret")),
+      Arrays.asList("-DsecretKey=*********(redacted)"));
+  }
+
   @Test
   public void testWindowsBatchQuoting() {
     assertEquals("abc", quoteForBatchScript("abc"));

From d8242194a8c799e75ebc962f113f6f6731987c7c Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Tue, 13 Aug 2024 11:06:41 +0800
Subject: [PATCH 436/521] [SPARK-49152][SQL] V2SessionCatalog should use
 V2Command

### What changes were proposed in this pull request?

V2SessionCatalog should use V2Command when possible.

### Why are the changes needed?

This is because the session catalog can be overwritten thus the overwritten's catalog should use v2 commands, otherwise the V1Command will still call hive metastore or the built-in session catalog.
### Does this PR introduce _any_ user-facing change?

NO

### How was this patch tested?

Existing tests.

### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #47724 from amaliujia/branch-3.5.

Lead-authored-by: Rui Wang <rui.wang@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolveSessionCatalog.scala      | 69 ++++++++++++++-----
 .../datasources/v2/DataSourceV2Strategy.scala |  5 +-
 ...SourceV2DataFrameSessionCatalogSuite.scala |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 21 +++---
 .../connector/TestV2SessionCatalogBase.scala  | 16 +++--
 .../command/v2/ShowCreateTableSuite.scala     |  2 +-
 .../spark/sql/internal/CatalogSuite.scala     |  2 +-
 7 files changed, 76 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index d8e19c994c59e..8c679c4d57fc3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -66,7 +66,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       throw QueryCompilationErrors.unsupportedTableOperationError(ident, "REPLACE COLUMNS")
 
     case a @ AlterColumn(ResolvedTable(catalog, ident, table: V1Table, _), _, _, _, _, _, _)
-        if isSessionCatalog(catalog) =>
+        if supportsV1Command(catalog) =>
       if (a.column.name.length > 1) {
         throw QueryCompilationErrors.unsupportedTableOperationError(
           catalog, ident, "ALTER COLUMN with qualified column")
@@ -117,13 +117,13 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case UnsetViewProperties(ResolvedViewIdentifier(ident), keys, ifExists) =>
       AlterTableUnsetPropertiesCommand(ident, keys, ifExists, isView = true)
 
-    case DescribeNamespace(DatabaseInSessionCatalog(db), extended, output) if conf.useV1Command =>
+    case DescribeNamespace(ResolvedV1Database(db), extended, output) if conf.useV1Command =>
       DescribeDatabaseCommand(db, extended, output)
 
-    case SetNamespaceProperties(DatabaseInSessionCatalog(db), properties) if conf.useV1Command =>
+    case SetNamespaceProperties(ResolvedV1Database(db), properties) if conf.useV1Command =>
       AlterDatabasePropertiesCommand(db, properties)
 
-    case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) if conf.useV1Command =>
+    case SetNamespaceLocation(ResolvedV1Database(db), location) if conf.useV1Command =>
       if (StringUtils.isEmpty(location)) {
         throw QueryExecutionErrors.invalidEmptyLocationError(location)
       }
@@ -218,7 +218,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case DropTable(ResolvedIdentifier(FakeSystemCatalog, ident), _, _) =>
       DropTempViewCommand(ident)
 
-    case DropView(ResolvedV1Identifier(ident), ifExists) =>
+    case DropView(ResolvedIdentifierInSessionCatalog(ident), ifExists) =>
       DropTableCommand(ident, ifExists, isView = true, purge = false)
 
     case DropView(r @ ResolvedIdentifier(catalog, ident), _) =>
@@ -237,14 +237,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       }
       CreateDatabaseCommand(name, c.ifNotExists, location, comment, newProperties)
 
-    case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) if conf.useV1Command =>
+    case d @ DropNamespace(ResolvedV1Database(db), _, _) if conf.useV1Command =>
       DropDatabaseCommand(db, d.ifExists, d.cascade)
 
-    case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command =>
+    case ShowTables(ResolvedV1Database(db), pattern, output) if conf.useV1Command =>
       ShowTablesCommand(Some(db), pattern, output)
 
     case ShowTableExtended(
-        DatabaseInSessionCatalog(db),
+        ResolvedV1Database(db),
         pattern,
         partitionSpec @ (None | Some(UnresolvedPartitionSpec(_, _))),
         output) =>
@@ -265,7 +265,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         AnalyzePartitionCommand(ident, partitionSpec, noScan)
       }
 
-    case AnalyzeTables(DatabaseInSessionCatalog(db), noScan) =>
+    case AnalyzeTables(ResolvedV1Database(db), noScan) =>
       AnalyzeTablesCommand(Some(db), noScan)
 
     case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
@@ -293,7 +293,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       if conf.useV1Command => ShowCreateTableCommand(ident, output)
 
     case ShowCreateTable(ResolvedTable(catalog, _, table: V1Table, _), _, output)
-        if isSessionCatalog(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
+        if supportsV1Command(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
       ShowCreateTableCommand(table.catalogTable.identifier, output)
 
     case TruncateTable(ResolvedV1TableIdentifier(ident)) =>
@@ -367,7 +367,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
       AlterViewAsCommand(ident, originalText, query)
 
-    case CreateView(ResolvedV1Identifier(ident), userSpecifiedColumns, comment,
+    case CreateView(ResolvedIdentifierInSessionCatalog(ident), userSpecifiedColumns, comment,
         properties, originalText, child, allowExisting, replace) =>
       CreateViewCommand(
         name = ident,
@@ -385,7 +385,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     case ShowViews(ns: ResolvedNamespace, pattern, output) =>
       ns match {
-        case DatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output)
+        case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output)
         case _ =>
           throw QueryCompilationErrors.missingCatalogAbilityError(ns.catalog, "views")
       }
@@ -408,7 +408,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "functions")
       }
 
-    case ShowFunctions(DatabaseInSessionCatalog(db), userScope, systemScope, pattern, output) =>
+    case ShowFunctions(
+        ResolvedDatabaseInSessionCatalog(db), userScope, systemScope, pattern, output) =>
       ShowFunctionsCommand(db, pattern, userScope, systemScope, output)
 
     case DropFunction(ResolvedPersistentFunc(catalog, identifier, _), ifExists) =>
@@ -429,7 +430,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "REFRESH FUNCTION")
       }
 
-    case CreateFunction(ResolvedV1Identifier(ident), className, resources, ifExists, replace) =>
+    case CreateFunction(
+        ResolvedIdentifierInSessionCatalog(ident), className, resources, ifExists, replace) =>
       CreateFunctionCommand(
         FunctionIdentifier(ident.table, ident.database, ident.catalog),
         className,
@@ -564,7 +566,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
   object ResolvedV1TableIdentifier {
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
-      case ResolvedTable(catalog, _, t: V1Table, _) if isSessionCatalog(catalog) =>
+      case ResolvedTable(catalog, _, t: V1Table, _) if supportsV1Command(catalog) =>
         Some(t.catalogTable.identifier)
       case _ => None
     }
@@ -579,6 +581,18 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   object ResolvedV1Identifier {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedIdentifier(catalog, ident) if supportsV1Command(catalog) =>
+        if (ident.namespace().length != 1) {
+          throw QueryCompilationErrors.requiresSinglePartNamespaceError(ident.namespace())
+        }
+        Some(TableIdentifier(ident.name, Some(ident.namespace.head), Some(catalog.name)))
+      case _ => None
+    }
+  }
+
+  // Use this object to help match commands that do not have a v2 implementation.
+  object ResolvedIdentifierInSessionCatalog{
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
       case ResolvedIdentifier(catalog, ident) if isSessionCatalog(catalog) =>
         if (ident.namespace().length != 1) {
@@ -610,7 +624,21 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     }
   }
 
-  private object DatabaseInSessionCatalog {
+  private object ResolvedV1Database {
+    def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
+      case ResolvedNamespace(catalog, _) if !supportsV1Command(catalog) => None
+      case ResolvedNamespace(_, Seq()) =>
+        throw QueryCompilationErrors.databaseFromV1SessionCatalogNotSpecifiedError()
+      case ResolvedNamespace(_, Seq(dbName)) => Some(dbName)
+      case _ =>
+        assert(resolved.namespace.length > 1)
+        throw QueryCompilationErrors.nestedDatabaseUnsupportedByV1SessionCatalogError(
+          resolved.namespace.map(quoteIfNeeded).mkString("."))
+    }
+  }
+
+  // Use this object to help match commands that do not have a v2 implementation.
+  private object ResolvedDatabaseInSessionCatalog {
     def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
       case ResolvedNamespace(catalog, _) if !isSessionCatalog(catalog) => None
       case ResolvedNamespace(_, Seq()) =>
@@ -625,11 +653,16 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
   private object DatabaseNameInSessionCatalog {
     def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
-      case ResolvedNamespace(catalog, _) if !isSessionCatalog(catalog) => None
+      case ResolvedNamespace(catalog, _) if !supportsV1Command(catalog) => None
       case ResolvedNamespace(_, Seq(dbName)) => Some(dbName)
       case _ =>
         assert(resolved.namespace.length > 1)
         throw QueryCompilationErrors.invalidDatabaseNameError(resolved.namespace.quoted)
     }
   }
+
+  private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
+    catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME) &&
+      !SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 0106a9c5aea0e..5fd4aa970a62a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -69,6 +69,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     }
   }
 
+  private def hadoopConf = session.sessionState.newHadoopConf()
+
   private def refreshCache(r: DataSourceV2Relation)(): Unit = {
     session.sharedState.cacheManager.recacheByPlan(session, r)
   }
@@ -103,7 +105,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   }
 
   private def qualifyLocInTableSpec(tableSpec: TableSpec): TableSpec = {
-    tableSpec.withNewLocation(tableSpec.location.map(makeQualifiedDBObjectPath(_)))
+    tableSpec.withNewLocation(tableSpec.location.map(loc => CatalogUtils.makeQualifiedPath(
+      CatalogUtils.stringToURI(loc), hadoopConf).toString))
   }
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 79fbabbeacaa6..219c8e198fa00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -84,7 +84,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
       spark.range(20).write.format(v2Format).option("path", "/abc").saveAsTable(t1)
       val cat = spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog]
       val tableInfo = cat.loadTable(Identifier.of(Array("default"), t1))
-      assert(tableInfo.properties().get("location") === "file:/abc")
+      assert(tableInfo.properties().get("location") === "file:///abc")
       assert(tableInfo.properties().get("provider") === v2Format)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 01d878f2d2b4b..77e447062d40e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -441,8 +441,8 @@ class DataSourceV2SQLSuiteV1Filter
           "AS SELECT id FROM source")
         val location = spark.sql(s"DESCRIBE EXTENDED $identifier")
           .filter("col_name = 'Location'")
-          .select("data_type").head.getString(0)
-        assert(location === "file:/tmp/foo")
+          .select("data_type").head().getString(0)
+        assert(location === "file:///tmp/foo")
       }
     }
   }
@@ -458,8 +458,8 @@ class DataSourceV2SQLSuiteV1Filter
           "AS SELECT id FROM source")
         val location = spark.sql(s"DESCRIBE EXTENDED $identifier")
           .filter("col_name = 'Location'")
-          .select("data_type").head.getString(0)
-        assert(location === "file:/tmp/foo")
+          .select("data_type").head().getString(0)
+        assert(location === "file:///tmp/foo")
       }
     }
   }
@@ -2068,15 +2068,10 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("REPLACE TABLE: v1 table") {
-    val e = intercept[AnalysisException] {
-      sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
-    }
-    checkError(
-      exception = e,
-      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
-      sqlState = "0A000",
-      parameters = Map("tableName" -> "`spark_catalog`.`default`.`tbl`",
-        "operation" -> "REPLACE TABLE"))
+    sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
+    val v2Catalog = catalog("spark_catalog").asTableCatalog
+    val table = v2Catalog.loadTable(Identifier.of(Array("default"), "tbl"))
+    assert(table.properties().get(TableCatalog.PROP_PROVIDER) == classOf[SimpleScanSource].getName)
   }
 
   test("DeleteFrom: - delete with invalid predicate") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 46586c622db79..9042231bdc59f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -79,18 +79,22 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       partitions: Array[Transform],
       properties: java.util.Map[String, String]): Table = {
     val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY
-    val propsWithLocation = if (properties.containsKey(key)) {
+    val newProps = new java.util.HashMap[String, String]()
+    newProps.putAll(properties)
+    if (properties.containsKey(TableCatalog.PROP_LOCATION)) {
+      newProps.put(TableCatalog.PROP_EXTERNAL, "true")
+    }
+
+    val propsWithLocation = if (newProps.containsKey(key)) {
       // Always set a location so that CREATE EXTERNAL TABLE won't fail with LOCATION not specified.
-      if (!properties.containsKey(TableCatalog.PROP_LOCATION)) {
-        val newProps = new java.util.HashMap[String, String]()
-        newProps.putAll(properties)
+      if (!newProps.containsKey(TableCatalog.PROP_LOCATION)) {
         newProps.put(TableCatalog.PROP_LOCATION, "file:/abc")
         newProps
       } else {
-        properties
+        newProps
       }
     } else {
-      properties
+      newProps
     }
     val created = super.createTable(ident, schema, partitions, propsWithLocation)
     val t = newTable(created.name(), schema, partitions, propsWithLocation)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index adda9dcfffe46..fec33d811b461 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -106,7 +106,7 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
         "'via' = '2')",
         "PARTITIONED BY (a)",
         "COMMENT 'This is a comment'",
-        "LOCATION 'file:/tmp'",
+        "LOCATION 'file:///tmp'",
         "TBLPROPERTIES (",
         "'password' = '*********(redacted)',",
         "'prop1' = '1',",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index c6bf220e45d52..c70675497064e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -754,7 +754,7 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
       assert(table.properties().get("comment").equals(description))
       assert(table.properties().get("path").equals(dir.getAbsolutePath))
       assert(table.properties().get("external").equals("true"))
-      assert(table.properties().get("location").equals("file:" + dir.getAbsolutePath))
+      assert(table.properties().get("location").equals("file://" + dir.getAbsolutePath))
     }
   }
 

From 6a250e7910c691e7b74989e0b6ef9418a6a7c60b Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 13 Aug 2024 13:07:51 +0800
Subject: [PATCH 437/521] [SPARK-49179][SQL] Fix v2 multi bucketed inner joins
 throw AssertionError

### What changes were proposed in this pull request?

For SMJ with inner join, it just wraps left and right output partitioning to `PartitioningCollection` so it may not satisfy the target required clustering.

### Why are the changes needed?

Fix exception if the query contains multi bucketed inner joins

```sql
SELECT * FROM testcat.ns.t1
JOIN testcat.ns.t2 ON t1.id = t2.id
JOIN testcat.ns.t3 ON t1.id = t3.id
```

```
Cause: java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:264)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.createKeyGroupedShuffleSpec(EnsureRequirements.scala:642)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.$anonfun$checkKeyGroupCompatible$1(EnsureRequirements.scala:385)
at scala.collection.immutable.List.map(List.scala:247)
at scala.collection.immutable.List.map(List.scala:79)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.checkKeyGroupCompatible(EnsureRequirements.scala:382)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.checkKeyGroupCompatible(EnsureRequirements.scala:364)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.org$apache$spark$sql$execution$exchange$EnsureRequirements$$ensureDistributionAndOrdering(EnsureRequirements.scala:166)
at org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$1.applyOrElse(EnsureRequirements.scala:714)
at org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$1.applyOrElse(EnsureRequirements.scala:689)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUpWithPruning$4(TreeNode.scala:528)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:84)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUpWithPruning(TreeNode.scala:528)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:497)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:689)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:51)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec$.$anonfun$applyPhysicalRules$2(AdaptiveSparkPlanExec.scala:882)
```

### Does this PR introduce _any_ user-facing change?

yes, it's a bug fix

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47683 from ulysses-you/SPARK-49179.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
(cherry picked from commit 8133294d6c2c925b97b3dbfcd3aa5e0762882d5f)
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../exchange/EnsureRequirements.scala         |  7 +++---
 .../KeyGroupedPartitioningSuite.scala         | 22 +++++++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 42c880e7c6262..ee0ea11816f9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -550,7 +550,7 @@ case class EnsureRequirements(
   private def createKeyGroupedShuffleSpec(
       partitioning: Partitioning,
       distribution: ClusteredDistribution): Option[KeyGroupedShuffleSpec] = {
-    def check(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
+    def tryCreate(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
       val attributes = partitioning.expressions.flatMap(_.collectLeaves())
       val clustering = distribution.clustering
 
@@ -570,11 +570,10 @@ case class EnsureRequirements(
     }
 
     partitioning match {
-      case p: KeyGroupedPartitioning => check(p)
+      case p: KeyGroupedPartitioning => tryCreate(p)
       case PartitioningCollection(partitionings) =>
         val specs = partitionings.map(p => createKeyGroupedShuffleSpec(p, distribution))
-        assert(specs.forall(_.isEmpty) || specs.forall(_.isDefined))
-        specs.head
+        specs.filter(_.isDefined).map(_.get).headOption
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 6b07c77aefb60..0718f090cff46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -330,6 +330,28 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       .add("price", FloatType)
       .add("time", TimestampType)
 
+  test("SPARK-49179: Fix v2 multi bucketed inner joins throw AssertionError") {
+    val cols = Array(
+      Column.create("id", LongType),
+      Column.create("name", StringType))
+    val buckets = Array(bucket(8, "id"))
+
+    withTable("t1", "t2", "t3") {
+      Seq("t1", "t2", "t3").foreach { t =>
+        createTable(t, cols, buckets)
+        sql(s"INSERT INTO testcat.ns.$t VALUES (1, 'aa'), (2, 'bb'), (3, 'cc')")
+      }
+      val df = sql(
+        """
+          |SELECT t1.id, t2.id, t3.name FROM testcat.ns.t1
+          |JOIN testcat.ns.t2 ON t1.id = t2.id
+          |JOIN testcat.ns.t3 ON t1.id = t3.id
+          |""".stripMargin)
+      checkAnswer(df, Seq(Row(1, 1, "aa"), Row(2, 2, "bb"), Row(3, 3, "cc")))
+      assert(collectShuffles(df.queryExecution.executedPlan).isEmpty)
+    }
+  }
+
   test("partitioned join: join with two partition keys and matching & sorted partitions") {
     val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
     createTable(items, items_schema, items_partitions)

From 6859ef9da7368bbffa811ba4c744676ae9299c77 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dhyun@apple.com>
Date: Mon, 12 Aug 2024 22:46:25 -0700
Subject: [PATCH 438/521] Revert "[SPARK-49179][SQL] Fix v2 multi bucketed
 inner joins throw AssertionError"

This reverts commit 6a250e7910c691e7b74989e0b6ef9418a6a7c60b.
---
 .../exchange/EnsureRequirements.scala         |  7 +++---
 .../KeyGroupedPartitioningSuite.scala         | 22 -------------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index ee0ea11816f9a..42c880e7c6262 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -550,7 +550,7 @@ case class EnsureRequirements(
   private def createKeyGroupedShuffleSpec(
       partitioning: Partitioning,
       distribution: ClusteredDistribution): Option[KeyGroupedShuffleSpec] = {
-    def tryCreate(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
+    def check(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
       val attributes = partitioning.expressions.flatMap(_.collectLeaves())
       val clustering = distribution.clustering
 
@@ -570,10 +570,11 @@ case class EnsureRequirements(
     }
 
     partitioning match {
-      case p: KeyGroupedPartitioning => tryCreate(p)
+      case p: KeyGroupedPartitioning => check(p)
       case PartitioningCollection(partitionings) =>
         val specs = partitionings.map(p => createKeyGroupedShuffleSpec(p, distribution))
-        specs.filter(_.isDefined).map(_.get).headOption
+        assert(specs.forall(_.isEmpty) || specs.forall(_.isDefined))
+        specs.head
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 0718f090cff46..6b07c77aefb60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -330,28 +330,6 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       .add("price", FloatType)
       .add("time", TimestampType)
 
-  test("SPARK-49179: Fix v2 multi bucketed inner joins throw AssertionError") {
-    val cols = Array(
-      Column.create("id", LongType),
-      Column.create("name", StringType))
-    val buckets = Array(bucket(8, "id"))
-
-    withTable("t1", "t2", "t3") {
-      Seq("t1", "t2", "t3").foreach { t =>
-        createTable(t, cols, buckets)
-        sql(s"INSERT INTO testcat.ns.$t VALUES (1, 'aa'), (2, 'bb'), (3, 'cc')")
-      }
-      val df = sql(
-        """
-          |SELECT t1.id, t2.id, t3.name FROM testcat.ns.t1
-          |JOIN testcat.ns.t2 ON t1.id = t2.id
-          |JOIN testcat.ns.t3 ON t1.id = t3.id
-          |""".stripMargin)
-      checkAnswer(df, Seq(Row(1, 1, "aa"), Row(2, 2, "bb"), Row(3, 3, "cc")))
-      assert(collectShuffles(df.queryExecution.executedPlan).isEmpty)
-    }
-  }
-
   test("partitioned join: join with two partition keys and matching & sorted partitions") {
     val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
     createTable(items, items_schema, items_partitions)

From 5463bfce343ced900608a51f9fd3c903b831183e Mon Sep 17 00:00:00 2001
From: ulysses-you <ulyssesyou18@gmail.com>
Date: Tue, 13 Aug 2024 16:59:50 +0800
Subject: [PATCH 439/521] [SPARK-49179][SQL][3.5] Fix v2 multi bucketed inner
 joins throw AssertionError

backport https://github.com/apache/spark/pull/47683 to branch-3.5

### What changes were proposed in this pull request?

For SMJ with inner join, it just wraps left and right output partitioning to `PartitioningCollection` so it may not satisfy the target required clustering.

### Why are the changes needed?

Fix exception if the query contains multi bucketed inner joins

```sql
SELECT * FROM testcat.ns.t1
JOIN testcat.ns.t2 ON t1.id = t2.id
JOIN testcat.ns.t3 ON t1.id = t3.id
```

```
Cause: java.lang.AssertionError: assertion failed
at scala.Predef$.assert(Predef.scala:264)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.createKeyGroupedShuffleSpec(EnsureRequirements.scala:642)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.$anonfun$checkKeyGroupCompatible$1(EnsureRequirements.scala:385)
at scala.collection.immutable.List.map(List.scala:247)
at scala.collection.immutable.List.map(List.scala:79)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.checkKeyGroupCompatible(EnsureRequirements.scala:382)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.checkKeyGroupCompatible(EnsureRequirements.scala:364)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.org$apache$spark$sql$execution$exchange$EnsureRequirements$$ensureDistributionAndOrdering(EnsureRequirements.scala:166)
at org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$1.applyOrElse(EnsureRequirements.scala:714)
at org.apache.spark.sql.execution.exchange.EnsureRequirements$$anonfun$1.applyOrElse(EnsureRequirements.scala:689)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformUpWithPruning$4(TreeNode.scala:528)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(origin.scala:84)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUpWithPruning(TreeNode.scala:528)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:497)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:689)
at org.apache.spark.sql.execution.exchange.EnsureRequirements.apply(EnsureRequirements.scala:51)
at org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec$.$anonfun$applyPhysicalRules$2(AdaptiveSparkPlanExec.scala:882)
```

### Does this PR introduce _any_ user-facing change?

yes, it's a bug fix

### How was this patch tested?

add test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47735 from ulysses-you/SPARK-49179-3.5.

Authored-by: ulysses-you <ulyssesyou18@gmail.com>
Signed-off-by: youxiduo <youxiduo@corp.netease.com>
---
 .../exchange/EnsureRequirements.scala         |  7 +++---
 .../KeyGroupedPartitioningSuite.scala         | 22 +++++++++++++++++++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 42c880e7c6262..ee0ea11816f9a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -550,7 +550,7 @@ case class EnsureRequirements(
   private def createKeyGroupedShuffleSpec(
       partitioning: Partitioning,
       distribution: ClusteredDistribution): Option[KeyGroupedShuffleSpec] = {
-    def check(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
+    def tryCreate(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
       val attributes = partitioning.expressions.flatMap(_.collectLeaves())
       val clustering = distribution.clustering
 
@@ -570,11 +570,10 @@ case class EnsureRequirements(
     }
 
     partitioning match {
-      case p: KeyGroupedPartitioning => check(p)
+      case p: KeyGroupedPartitioning => tryCreate(p)
       case PartitioningCollection(partitionings) =>
         val specs = partitionings.map(p => createKeyGroupedShuffleSpec(p, distribution))
-        assert(specs.forall(_.isEmpty) || specs.forall(_.isDefined))
-        specs.head
+        specs.filter(_.isDefined).map(_.get).headOption
       case _ => None
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 6b07c77aefb60..71e030f535e9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -330,6 +330,28 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       .add("price", FloatType)
       .add("time", TimestampType)
 
+  test("SPARK-49179: Fix v2 multi bucketed inner joins throw AssertionError") {
+    val cols = new StructType()
+      .add("id", LongType)
+      .add("name", StringType)
+    val buckets = Array(bucket(8, "id"))
+
+    withTable("t1", "t2", "t3") {
+      Seq("t1", "t2", "t3").foreach { t =>
+        createTable(t, cols, buckets)
+        sql(s"INSERT INTO testcat.ns.$t VALUES (1, 'aa'), (2, 'bb'), (3, 'cc')")
+      }
+      val df = sql(
+        """
+          |SELECT t1.id, t2.id, t3.name FROM testcat.ns.t1
+          |JOIN testcat.ns.t2 ON t1.id = t2.id
+          |JOIN testcat.ns.t3 ON t1.id = t3.id
+          |""".stripMargin)
+      checkAnswer(df, Seq(Row(1, 1, "aa"), Row(2, 2, "bb"), Row(3, 3, "cc")))
+      assert(collectShuffles(df.queryExecution.executedPlan).isEmpty)
+    }
+  }
+
   test("partitioned join: join with two partition keys and matching & sorted partitions") {
     val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
     createTable(items, items_schema, items_partitions)

From bd2cbd6ac5d7cbfce332cfdb3f31be27105ed36d Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 13 Aug 2024 22:46:48 -0700
Subject: [PATCH 440/521] [SPARK-49038][SQL][3.5] SQLMetric should report the
 raw value in the accumulator update event

backport https://github.com/apache/spark/pull/47721 to 3.5

### What changes were proposed in this pull request?

Some `SQLMetrics` set the initial value to `-1`, so that we can recognize no-update metrics (e.g. there is no input data and the metric is not updated at all) and filter them out later in the UI.

However, there is a bug here. Spark turns accumulator updates into `AccumulableInfo`, using `AccumulatorV2#value`. To avoid exposing the internal `-1` value to end users, `SQLMetric#value` turns `-1` into `0` before returning the value. See more details in https://github.com/apache/spark/pull/39311 . UI can no longer see `-1` and filter them out.

This PR fixes the bug by using the raw value of `SQLMetric` to create `AccumulableInfo`, so that UI can still see `-1` and filters it.

### Why are the changes needed?

To avoid getting the wrong min value for certain SQL metrics when some partitions have no data.

### Does this PR introduce _any_ user-facing change?

Yes, if people write spark listeners to watch the `SparkListenerExecutorMetricsUpdate` event, they can see the correct value of SQL metrics.

### How was this patch tested?

manual UI tests. We do not have an end-to-end UI test framework for SQL metrics yet.

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47749 from cloud-fan/branch-3.5.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
---
 .../scala/org/apache/spark/executor/Executor.scala     |  2 +-
 .../org/apache/spark/scheduler/TaskSchedulerImpl.scala |  2 +-
 .../scala/org/apache/spark/util/AccumulatorV2.scala    | 10 +++++++++-
 .../scala/org/apache/spark/util/JsonProtocol.scala     |  2 +-
 .../test/scala/org/apache/spark/AccumulatorSuite.scala |  2 +-
 .../apache/spark/sql/execution/metric/SQLMetrics.scala |  7 +++++++
 .../spark/sql/execution/ui/SQLAppStatusListener.scala  |  2 +-
 .../spark/sql/execution/metric/SQLMetricsSuite.scala   |  5 +++++
 .../sql/execution/metric/SQLMetricsTestUtils.scala     |  2 +-
 9 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 9290b5b36a8f7..69a91839bbeb5 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -530,7 +530,7 @@ private[spark] class Executor(
       // Collect latest accumulator values to report back to the driver
       val accums: Seq[AccumulatorV2[_, _]] =
         Option(task).map(_.collectAccumulatorUpdates(taskFailed = true)).getOrElse(Seq.empty)
-      val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
+      val accUpdates = accums.map(acc => acc.toInfoUpdate)
 
       setTaskFinishedAndClearInterruptStatus()
       (accums, accUpdates)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 0cb970fd27880..5e9716dfcfe90 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -901,7 +901,7 @@ private[spark] class TaskSchedulerImpl(
          executorRunTime = acc.value.asInstanceOf[Long]
        }
      }
-     acc.toInfo(Some(acc.value), None)
+     acc.toInfoUpdate
    }
    val taskProcessRate = if (efficientTaskCalcualtionEnabled) {
      getTaskProcessRate(recordsRead, executorRunTime)
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 181033c9d20c8..aadde1e20226a 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -102,16 +102,24 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
     metadata.countFailedValues
   }
 
+  private def isInternal = name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
+
   /**
    * Creates an [[AccumulableInfo]] representation of this [[AccumulatorV2]] with the provided
    * values.
    */
   private[spark] def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
-    val isInternal = name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
     AccumulableInfo(id, name, internOption(update), internOption(value), isInternal,
       countFailedValues)
   }
 
+  /**
+   * Creates an [[AccumulableInfo]] representation of this [[AccumulatorV2]] as an update.
+   */
+  private[spark] def toInfoUpdate: AccumulableInfo = {
+    AccumulableInfo(id, name, internOption(Some(value)), None, isInternal, countFailedValues)
+  }
+
   final private[spark] def isAtDriverSide: Boolean = atDriverSide
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 025e5d5bac94b..377caf776deb0 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -1350,7 +1350,7 @@ private[spark] object JsonProtocol extends JsonUtils {
         val accumUpdates = jsonOption(json.get("Accumulator Updates"))
           .map(_.extractElements.map(accumulableInfoFromJson).toArray.toSeq)
           .getOrElse(taskMetricsFromJson(json.get("Metrics")).accumulators().map(acc => {
-            acc.toInfo(Some(acc.value), None)
+            acc.toInfoUpdate
           }).toArray.toSeq)
         ExceptionFailure(className, description, stackTrace, fullStackTrace, None, accumUpdates)
       case `taskResultLost` => TaskResultLost
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 9b70ccdf07e1b..a9d7e8a0f2eda 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -147,7 +147,7 @@ private[spark] object AccumulatorSuite {
    * Make an `AccumulableInfo` out of an `AccumulatorV2` with the intent to use the
    * info as an accumulator update.
    */
-  def makeInfo(a: AccumulatorV2[_, _]): AccumulableInfo = a.toInfo(Some(a.value), None)
+  def makeInfo(a: AccumulatorV2[_, _]): AccumulableInfo = a.toInfoUpdate
 
   /**
    * Run one or more Spark jobs and verify that in at least one job the peak execution memory
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index 3326c5d4cb994..09406345ed770 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -90,6 +90,13 @@ class SQLMetric(val metricType: String, initValue: Long = 0L) extends Accumulato
     AccumulableInfo(id, name, internOption(update), internOption(value), true, true,
       SQLMetrics.cachedSQLAccumIdentifier)
   }
+
+  // We should provide the raw value which can be -1, so that `SQLMetrics.stringValue` can correctly
+  // filter out the invalid -1 values.
+  override def toInfoUpdate: AccumulableInfo = {
+    AccumulableInfo(id, name, internOption(Some(_value)), None, true, true,
+      SQLMetrics.cachedSQLAccumIdentifier)
+  }
 }
 
 object SQLMetrics {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index 067f7305ab113..17de4d42257b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -181,7 +181,7 @@ class SQLAppStatusListener(
       event.taskMetrics.withExternalAccums(_.flatMap { a =>
         // This call may fail if the accumulator is gc'ed, so account for that.
         try {
-          Some(a.toInfo(Some(a.value), None))
+          Some(a.toInfoUpdate)
         } catch {
           case _: IllegalAccessError => None
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 6347757e178c0..5cdbdc27b3259 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -960,6 +960,11 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     assert(SQLMetrics.createNanoTimingMetric(sparkContext, name = "m", initValue = -1).isZero())
     assert(SQLMetrics.createNanoTimingMetric(sparkContext, name = "m", initValue = 5).isZero())
   }
+
+  test("SQLMetric#toInfoUpdate") {
+    assert(SQLMetrics.createSizeMetric(sparkContext, name = "m").toInfoUpdate.update === Some(-1))
+    assert(SQLMetrics.createMetric(sparkContext, name = "m").toInfoUpdate.update === Some(0))
+  }
 }
 
 case class CustomFileCommitProtocol(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 46dc84c0582f8..e964867cb86ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -312,7 +312,7 @@ object InputOutputMetricsHelper {
 
       var maxOutputRows = 0L
       taskEnd.taskMetrics.withExternalAccums(_.foreach { accum =>
-        val info = accum.toInfo(Some(accum.value), None)
+        val info = accum.toInfoUpdate
         if (info.name.toString.contains("number of output rows")) {
           info.update match {
             case Some(n: Number) =>

From 8d05bf2bd0173d291848f4538ff218330229f9cb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 15 Aug 2024 10:47:18 +0800
Subject: [PATCH 441/521] [SPARK-49152][SQL][FOLLOWUP][3.5] table location
 string should be Hadoop Path string

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/47660 to restore the behavior change. The table location string should be Hadoop Path string instead of URL string which escapes all special chars.

### Why are the changes needed?

restore the unintentional behavior change.

### Does this PR introduce _any_ user-facing change?

No, it's not released yet

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47765 from cloud-fan/fix.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/TableCatalog.java   |  2 +-
 .../spark/sql/connector/catalog/V1Table.scala |  8 ++++--
 .../analysis/ResolveSessionCatalog.scala      | 11 ++++++--
 .../datasources/v2/DataSourceV2Strategy.scala | 16 +++++++++--
 .../results/show-create-table.sql.out         |  4 +--
 ...SourceV2DataFrameSessionCatalogSuite.scala |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 28 +++++++++++++++----
 .../command/v2/ShowCreateTableSuite.scala     |  2 +-
 .../spark/sql/internal/CatalogSuite.scala     |  2 +-
 9 files changed, 57 insertions(+), 18 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 29c2da307a0f6..b990f59bfd90e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -46,7 +46,7 @@ public interface TableCatalog extends CatalogPlugin {
 
   /**
    * A reserved property to specify the location of the table. The files of the table
-   * should be under this location.
+   * should be under this location. The location is a Hadoop Path string.
    */
   String PROP_LOCATION = "location";
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index da201e816497c..8928ba57f06c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -22,7 +22,7 @@ import java.util
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
@@ -38,7 +38,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
   lazy val options: Map[String, String] = {
     v1Table.storage.locationUri match {
       case Some(uri) =>
-        v1Table.storage.properties + ("path" -> uri.toString)
+        v1Table.storage.properties + ("path" -> CatalogUtils.URIToString(uri))
       case _ =>
         v1Table.storage.properties
     }
@@ -81,7 +81,9 @@ private[sql] object V1Table {
         TableCatalog.OPTION_PREFIX + key -> value } ++
       v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++
-      v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++
+      v1Table.storage.locationUri.map { loc =>
+        TableCatalog.PROP_LOCATION -> CatalogUtils.URIToString(loc)
+      } ++
       (if (managed) Some(TableCatalog.PROP_IS_MANAGED_LOCATION -> "true") else None) ++
       (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       Some(TableCatalog.PROP_OWNER -> v1Table.owner)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 8c679c4d57fc3..aee243b6529da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -361,8 +361,15 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         serdeProperties,
         partitionSpec)
 
-    case SetTableLocation(ResolvedV1TableIdentifier(ident), partitionSpec, location) =>
-      AlterTableSetLocationCommand(ident, partitionSpec, location)
+    case SetTableLocation(ResolvedV1TableIdentifier(ident), None, location) =>
+      AlterTableSetLocationCommand(ident, None, location)
+
+    // V2 catalog doesn't support setting partition location yet, we must use v1 command here.
+    case SetTableLocation(
+        ResolvedTable(catalog, _, t: V1Table, _),
+        Some(partitionSpec),
+        location) if isSessionCatalog(catalog) =>
+      AlterTableSetLocationCommand(t.v1Table.identifier, Some(partitionSpec), location)
 
     case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
       AlterViewAsCommand(ident, originalText, query)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 5fd4aa970a62a..d46c5116e6151 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
@@ -105,8 +106,19 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   }
 
   private def qualifyLocInTableSpec(tableSpec: TableSpec): TableSpec = {
-    tableSpec.withNewLocation(tableSpec.location.map(loc => CatalogUtils.makeQualifiedPath(
-      CatalogUtils.stringToURI(loc), hadoopConf).toString))
+    val newLoc = tableSpec.location.map { loc =>
+      val locationUri = CatalogUtils.stringToURI(loc)
+      val qualified = if (locationUri.isAbsolute) {
+        locationUri
+      } else if (new Path(locationUri).isAbsolute) {
+        CatalogUtils.makeQualifiedPath(locationUri, hadoopConf)
+      } else {
+        // Leave it to the catalog implementation to qualify relative paths.
+        locationUri
+      }
+      CatalogUtils.URIToString(qualified)
+    }
+    tableSpec.withNewLocation(newLoc)
   }
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index dcb96b9d2dce6..e1f4e3068b458 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -78,7 +78,7 @@ CREATE TABLE spark_catalog.default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:///path/to/table'
+LOCATION 'file:/path/to/table'
 
 
 -- !query
@@ -108,7 +108,7 @@ CREATE TABLE spark_catalog.default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:///path/to/table'
+LOCATION 'file:/path/to/table'
 
 
 -- !query
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 219c8e198fa00..79fbabbeacaa6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -84,7 +84,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
       spark.range(20).write.format(v2Format).option("path", "/abc").saveAsTable(t1)
       val cat = spark.sessionState.catalogManager.currentCatalog.asInstanceOf[TableCatalog]
       val tableInfo = cat.loadTable(Identifier.of(Array("default"), t1))
-      assert(tableInfo.properties().get("location") === "file:///abc")
+      assert(tableInfo.properties().get("location") === "file:/abc")
       assert(tableInfo.properties().get("provider") === v2Format)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 77e447062d40e..0638b50cfb9d3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -432,6 +432,25 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-49152: CreateTable should store location as qualified") {
+    val tbl = "testcat.table_name"
+
+    def testWithLocation(location: String, qualified: String): Unit = {
+      withTable(tbl) {
+        sql(s"CREATE TABLE $tbl USING foo LOCATION '$location'")
+        val loc = catalog("testcat").asTableCatalog
+          .loadTable(Identifier.of(Array.empty, "table_name"))
+          .properties().get(TableCatalog.PROP_LOCATION)
+        assert(loc === qualified)
+      }
+    }
+
+    testWithLocation("/absolute/path", "file:/absolute/path")
+    testWithLocation("s3://host/full/path", "s3://host/full/path")
+    testWithLocation("relative/path", "relative/path")
+    testWithLocation("/path/special+ char", "file:/path/special+ char")
+  }
+
   test("SPARK-37545: CreateTableAsSelect should store location as qualified") {
     val basicIdentifier = "testcat.table_name"
     val atomicIdentifier = "testcat_atomic.table_name"
@@ -442,7 +461,7 @@ class DataSourceV2SQLSuiteV1Filter
         val location = spark.sql(s"DESCRIBE EXTENDED $identifier")
           .filter("col_name = 'Location'")
           .select("data_type").head().getString(0)
-        assert(location === "file:///tmp/foo")
+        assert(location === "file:/tmp/foo")
       }
     }
   }
@@ -459,7 +478,7 @@ class DataSourceV2SQLSuiteV1Filter
         val location = spark.sql(s"DESCRIBE EXTENDED $identifier")
           .filter("col_name = 'Location'")
           .select("data_type").head().getString(0)
-        assert(location === "file:///tmp/foo")
+        assert(location === "file:/tmp/foo")
       }
     }
   }
@@ -1357,8 +1376,7 @@ class DataSourceV2SQLSuiteV1Filter
             val identifier = Identifier.of(Array(), "reservedTest")
             val location = tableCatalog.loadTable(identifier).properties()
               .get(TableCatalog.PROP_LOCATION)
-            assert(location.startsWith("file:") && location.endsWith("foo"),
-              "path as a table property should not have side effects")
+            assert(location == "foo", "path as a table property should not have side effects")
             assert(tableCatalog.loadTable(identifier).properties().get("path") == "bar",
               "path as a table property should not have side effects")
             assert(tableCatalog.loadTable(identifier).properties().get("Path") == "noop",
@@ -3148,7 +3166,7 @@ class DataSourceV2SQLSuiteV1Filter
       val properties = table.properties
       assert(properties.get(TableCatalog.PROP_PROVIDER) == "parquet")
       assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment")
-      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:///tmp")
+      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp")
       assert(properties.containsKey(TableCatalog.PROP_OWNER))
       assert(properties.get(TableCatalog.PROP_EXTERNAL) == "true")
       assert(properties.get(s"${TableCatalog.OPTION_PREFIX}from") == "0")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index fec33d811b461..adda9dcfffe46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -106,7 +106,7 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
         "'via' = '2')",
         "PARTITIONED BY (a)",
         "COMMENT 'This is a comment'",
-        "LOCATION 'file:///tmp'",
+        "LOCATION 'file:/tmp'",
         "TBLPROPERTIES (",
         "'password' = '*********(redacted)',",
         "'prop1' = '1',",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index c70675497064e..c6bf220e45d52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -754,7 +754,7 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
       assert(table.properties().get("comment").equals(description))
       assert(table.properties().get("path").equals(dir.getAbsolutePath))
       assert(table.properties().get("external").equals("true"))
-      assert(table.properties().get("location").equals("file://" + dir.getAbsolutePath))
+      assert(table.properties().get("location").equals("file:" + dir.getAbsolutePath))
     }
   }
 

From 3d9690103538f698aef41733b01fe9a0ce7acbbd Mon Sep 17 00:00:00 2001
From: Gabor Roczei <roczei@cloudera.com>
Date: Fri, 16 Aug 2024 10:10:11 +0800
Subject: [PATCH 442/521] [SPARK-45590][BUILD][3.5] Upgrade okio to 1.17.6 from
 1.15.0

Backport #47758 to 3.5

### What changes were proposed in this pull request?

This PR aims to upgrade `okio` from 1.15.0 to 1.17.6.

### Why are the changes needed?

Okio 1.15.0 is vulnerable due to CVE-2023-3635,  details: https://nvd.nist.gov/vuln/detail/CVE-2023-3635

Previous attempts to fix this security issue:

Update okio to version 1.17.6 #5587: https://github.com/fabric8io/kubernetes-client/pull/5587
Followup to Update okio to version 1.17.6 #5935: https://github.com/fabric8io/kubernetes-client/pull/5935

Unfortunately it is still using 1.15.0:

https://github.com/apache/spark/blob/v4.0.0-preview1/dev/deps/spark-deps-hadoop-3-hive-2.3#L227 https://github.com/apache/spark/blob/v3.5.2/dev/deps/spark-deps-hadoop-3-hive-2.3#L210

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47769 from roczei/roczei/SPARK-45590-branch-3.5.

Authored-by: Gabor Roczei <roczei@cloudera.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +-
 pom.xml                               | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 0ea9d120f0808..a9d63c1ad0f99 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -207,7 +207,7 @@ netty-transport-native-unix-common/4.1.96.Final//netty-transport-native-unix-com
 netty-transport/4.1.96.Final//netty-transport-4.1.96.Final.jar
 objenesis/3.3//objenesis-3.3.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
-okio/1.15.0//okio-1.15.0.jar
+okio/1.17.6//okio-1.17.6.jar
 opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
diff --git a/pom.xml b/pom.xml
index 61780803afb57..7bbdd0a998039 100644
--- a/pom.xml
+++ b/pom.xml
@@ -232,6 +232,7 @@
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
     <kubernetes-client.version>6.7.2</kubernetes-client.version>
+    <okio.version>1.17.6</okio.version>
 
     <test.java.home>${java.home}</test.java.home>
 
@@ -2821,6 +2822,11 @@
         <artifactId>arpack</artifactId>
         <version>${netlib.ludovic.dev.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.squareup.okio</groupId>
+        <artifactId>okio</artifactId>
+        <version>${okio.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 

From ac2219e69ce2768614c84a422e42fd2238acc889 Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Fri, 16 Aug 2024 18:08:26 +0800
Subject: [PATCH 443/521] [SPARK-49039][UI] Reset checkbox when executor
 metrics are loaded in the Stages tab

### What changes were proposed in this pull request?
This PR aims to reset checkbox when executor metrics are loaded in the Stages tab.

### Why are the changes needed?
The variable `executorSummaryTableSelector` in the `reselectCheckboxesBasedOnTaskTableState` function may not be initialized, which results in inconsistent UI display.

Sometimes `allexecutors` api calls are slow, which causes `executorSummaryTableSelector` to be initialized lazily.
```js
$.getJSON(createRESTEndPointForExecutorsPage(appId),
```

<img width="800" alt="image" src="https://github.com/user-attachments/assets/780705ee-71b2-4063-bee9-cac3a3d35f27">

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
local test

<img width="1299" alt="image" src="https://github.com/user-attachments/assets/b3ee9cc3-088f-490f-a52b-259e8b07c016">

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47519 from cxzl25/SPARK-49039.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit e3a44307fe73c4bff253c734aafbbc019ed8448f)
Signed-off-by: Kent Yao <yao@apache.org>
---
 core/src/main/resources/org/apache/spark/ui/static/stagepage.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 50bf959d3aa96..c7513c8268b26 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -650,6 +650,7 @@ $(document).ready(function () {
             }
             executorSummaryTableSelector.column(13).visible(dataToShow.showBytesSpilledData);
             executorSummaryTableSelector.column(14).visible(dataToShow.showBytesSpilledData);
+            reselectCheckboxesBasedOnTaskTableState();
           });
 
         // Prepare data for speculation metrics

From 3148cfa10bd781f258afc5dbfb7bdc8bfcec269c Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Fri, 16 Aug 2024 22:33:35 +0800
Subject: [PATCH 444/521] [SPARK-49211][SQL][3.5] V2 Catalog can also support
 built-in data sources

### What changes were proposed in this pull request?

V2 Catalog can also support built-in data sources.

### Why are the changes needed?

V2 catalog could still support spark built-in data sources if the V2 catalog returns v1 table and do not track partitions in catalog. This is because we do not need to require V2 catalog to implement every thing to support built-in data sources (as that is a big chunk of work).

### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

UT
### Was this patch authored or co-authored using generative AI tooling?

NO

Closes #47777 from amaliujia/branch-3.5.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/analysis/Analyzer.scala      |   9 +-
 .../sql/catalyst/catalog/SessionCatalog.scala |  21 ++--
 .../spark/sql/catalyst/identifiers.scala      |   4 +
 .../catalog/SessionCatalogSuite.scala         |  10 +-
 .../datasources/DataSourceStrategy.scala      |  19 ++--
 .../sql/StatisticsCollectionTestBase.scala    |   6 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 100 +++++++++++++++++-
 .../sql/execution/command/DDLSuite.scala      |   9 +-
 .../command/v1/TruncateTableSuite.scala       |   6 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala |  14 +--
 10 files changed, 164 insertions(+), 34 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index bd917fd73e20a..fd0a0715b6344 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1246,7 +1246,14 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         options: CaseInsensitiveStringMap,
         isStreaming: Boolean): Option[LogicalPlan] = {
       table.map {
-        case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) =>
+        // To utilize this code path to execute V1 commands, e.g. INSERT,
+        // either it must be session catalog, or tracksPartitionsInCatalog
+        // must be false so it does not require use catalog to manage partitions.
+        // Obviously we cannot execute V1Table by V1 code path if the table
+        // is not from session catalog and the table still requires its catalog
+        // to manage partitions.
+        case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog)
+          || !v1Table.catalogTable.tracksPartitionsInCatalog =>
           if (isStreaming) {
             if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
               throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index cba928dfa924c..99074b859a7fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Subque
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
@@ -193,7 +194,7 @@ class SessionCatalog(
     }
   }
 
-  private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
+  private val tableRelationCache: Cache[FullQualifiedTableName, LogicalPlan] = {
     var builder = CacheBuilder.newBuilder()
       .maximumSize(cacheSize)
 
@@ -201,33 +202,34 @@ class SessionCatalog(
       builder = builder.expireAfterWrite(cacheTTL, TimeUnit.SECONDS)
     }
 
-    builder.build[QualifiedTableName, LogicalPlan]()
+    builder.build[FullQualifiedTableName, LogicalPlan]()
   }
 
   /** This method provides a way to get a cached plan. */
-  def getCachedPlan(t: QualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
+  def getCachedPlan(t: FullQualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
     tableRelationCache.get(t, c)
   }
 
   /** This method provides a way to get a cached plan if the key exists. */
-  def getCachedTable(key: QualifiedTableName): LogicalPlan = {
+  def getCachedTable(key: FullQualifiedTableName): LogicalPlan = {
     tableRelationCache.getIfPresent(key)
   }
 
   /** This method provides a way to cache a plan. */
-  def cacheTable(t: QualifiedTableName, l: LogicalPlan): Unit = {
+  def cacheTable(t: FullQualifiedTableName, l: LogicalPlan): Unit = {
     tableRelationCache.put(t, l)
   }
 
   /** This method provides a way to invalidate a cached plan. */
-  def invalidateCachedTable(key: QualifiedTableName): Unit = {
+  def invalidateCachedTable(key: FullQualifiedTableName): Unit = {
     tableRelationCache.invalidate(key)
   }
 
   /** This method discards any cached table relation plans for the given table identifier. */
   def invalidateCachedTable(name: TableIdentifier): Unit = {
     val qualified = qualifyIdentifier(name)
-    invalidateCachedTable(QualifiedTableName(qualified.database.get, qualified.table))
+    invalidateCachedTable(FullQualifiedTableName(
+      qualified.catalog.get, qualified.database.get, qualified.table))
   }
 
   /** This method provides a way to invalidate all the cached plans. */
@@ -295,7 +297,7 @@ class SessionCatalog(
     }
     if (cascade && databaseExists(dbName)) {
       listTables(dbName).foreach { t =>
-        invalidateCachedTable(QualifiedTableName(dbName, t.table))
+        invalidateCachedTable(FullQualifiedTableName(SESSION_CATALOG_NAME, dbName, t.table))
       }
     }
     externalCatalog.dropDatabase(dbName, ignoreIfNotExists, cascade)
@@ -1130,7 +1132,8 @@ class SessionCatalog(
   def refreshTable(name: TableIdentifier): Unit = synchronized {
     getLocalOrGlobalTempView(name).map(_.refresh).getOrElse {
       val qualifiedIdent = qualifyIdentifier(name)
-      val qualifiedTableName = QualifiedTableName(qualifiedIdent.database.get, qualifiedIdent.table)
+      val qualifiedTableName = FullQualifiedTableName(
+        qualifiedIdent.catalog.get, qualifiedIdent.database.get, qualifiedIdent.table)
       tableRelationCache.invalidate(qualifiedTableName)
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index 2f818fecad93a..cc881539002b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -111,6 +111,10 @@ case class QualifiedTableName(database: String, name: String) {
   override def toString: String = s"$database.$name"
 }
 
+case class FullQualifiedTableName(catalog: String, database: String, name: String) {
+  override def toString: String = s"$catalog.$database.$name"
+}
+
 object TableIdentifier {
   def apply(tableName: String): TableIdentifier = new TableIdentifier(tableName)
   def apply(table: String, database: Option[String]): TableIdentifier =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index b668386bc472d..447a85fe4a41e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -22,7 +22,7 @@ import scala.concurrent.duration._
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, FullQualifiedTableName, FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -1880,7 +1880,8 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     conf.setConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS, 1L)
 
     withConfAndEmptyCatalog(conf) { catalog =>
-      val table = QualifiedTableName(catalog.getCurrentDatabase, "test")
+      val table = FullQualifiedTableName(
+        CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "test")
 
       // First, make sure the test table is not cached.
       assert(catalog.getCachedTable(table) === null)
@@ -1899,13 +1900,14 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
   test("SPARK-34197: refreshTable should not invalidate the relation cache for temporary views") {
     withBasicCatalog { catalog =>
       createTempView(catalog, "tbl1", Range(1, 10, 1, 10), false)
-      val qualifiedName1 = QualifiedTableName("default", "tbl1")
+      val qualifiedName1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "tbl1")
       catalog.cacheTable(qualifiedName1, Range(1, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl1"))
       assert(catalog.getCachedTable(qualifiedName1) != null)
 
       createGlobalTempView(catalog, "tbl2", Range(2, 10, 1, 10), false)
-      val qualifiedName2 = QualifiedTableName(catalog.globalTempViewManager.database, "tbl2")
+      val qualifiedName2 =
+        FullQualifiedTableName(SESSION_CATALOG_NAME, catalog.globalTempViewManager.database, "tbl2")
       catalog.cacheTable(qualifiedName2, Range(2, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl2", Some(catalog.globalTempViewManager.database)))
       assert(catalog.getCachedTable(qualifiedName2) != null)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 94c2d2ffaca59..548a5734da06c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -27,28 +27,27 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
+import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, FullQualifiedTableName, InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{GeneratedColumn, ResolveDefaultColumns, V2ExpressionBuilder}
-import org.apache.spark.sql.connector.catalog.SupportsRead
+import org.apache.spark.sql.connector.catalog.{SupportsRead, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, PushedDownOperators}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -242,7 +241,8 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
 class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   private def readDataSourceTable(
       table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
-    val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
+    val qualifiedTableName =
+      FullQualifiedTableName(table.identifier.catalog.get, table.database, table.identifier.table)
     val catalog = sparkSession.sessionState.catalog
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
@@ -284,6 +284,13 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
         _, _, _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
+    case append @ AppendData(
+        DataSourceV2Relation(
+          V1Table(table: CatalogTable), _, _, _, _), _, _, _, _, _) if !append.isByName =>
+      InsertIntoStatement(UnresolvedCatalogRelation(table),
+        table.partitionColumnNames.map(name => name -> None).toMap,
+        Seq.empty, append.query, false, append.isByName)
+
     case UnresolvedCatalogRelation(tableMeta, options, false)
         if DDLUtils.isDatasourceTable(tableMeta) =>
       readDataSourceTable(tableMeta, options)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 04e47ac4a1132..2e237a9b9ea11 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -25,13 +25,14 @@ import java.time.LocalDateTime
 import scala.collection.mutable
 import scala.util.Random
 
-import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable, HiveTableRelation}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, HistogramBin, HistogramSerializer, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.SQLTestUtils
@@ -269,7 +270,8 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
 
   def getTableFromCatalogCache(tableName: String): LogicalPlan = {
     val catalog = spark.sessionState.catalog
-    val qualifiedTableName = QualifiedTableName(catalog.getCurrentDatabase, tableName)
+    val qualifiedTableName = FullQualifiedTableName(
+      CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, tableName)
     catalog.getCachedTable(qualifiedTableName)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 0638b50cfb9d3..4c6ae425291d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -27,8 +27,9 @@ import scala.concurrent.duration.MICROSECONDS
 
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{FullQualifiedTableName, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
 import org.apache.spark.sql.catalyst.statsEstimation.StatsEstimationTestBase
@@ -41,6 +42,7 @@ import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -3377,6 +3379,69 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-49211: V2 Catalog can support built-in data sources") {
+    def checkParquet(tableName: String, path: String): Unit = {
+      withTable(tableName) {
+        sql("CREATE TABLE " + tableName +
+          " (name STRING) USING PARQUET LOCATION '" + path + "'")
+        sql("INSERT INTO " + tableName + " VALUES('Bob')")
+        val df = sql("SELECT * FROM " + tableName)
+        assert(df.queryExecution.analyzed.exists {
+          case LogicalRelation(_: HadoopFsRelation, _, _, _) => true
+          case _ => false
+        })
+        checkAnswer(df, Row("Bob"))
+      }
+    }
+
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    val table1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    spark.sessionState.catalogManager.reset()
+    withSQLConf(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key ->
+        classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkParquet(table1.toString, path.getAbsolutePath)
+      }
+    }
+    val table2 = FullQualifiedTableName("testcat3", "default", "t")
+    withSQLConf(
+      "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkParquet(table2.toString, path.getAbsolutePath)
+      }
+    }
+  }
+
+  test("SPARK-49211: V2 Catalog support CTAS") {
+    def checkCTAS(tableName: String, path: String): Unit = {
+      sql("CREATE TABLE " + tableName + " USING PARQUET LOCATION '" + path +
+        "' AS SELECT 1, 2, 3")
+      checkAnswer(sql("SELECT * FROM " + tableName), Row(1, 2, 3))
+    }
+
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    val table1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    withSQLConf(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key ->
+        classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkCTAS(table1.toString, path.getAbsolutePath)
+      }
+    }
+
+    val table2 = FullQualifiedTableName("testcat3", "default", "t")
+    withSQLConf(
+      "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkCTAS(table2.toString, path.getAbsolutePath)
+      }
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
@@ -3419,3 +3484,36 @@ class SimpleDelegatingCatalog extends DelegatingCatalogExtension {
     super.createTable(ident, columns, partitions, newProps)
   }
 }
+
+
+class V2CatalogSupportBuiltinDataSource extends InMemoryCatalog {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: jutil.Map[String, String]): Table = {
+    super.createTable(ident, columns, partitions, properties)
+    loadTable(ident)
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    val superTable = super.loadTable(ident)
+    val tableIdent = {
+      TableIdentifier(ident.name(), Some(ident.namespace().head), Some(name))
+    }
+    val uri = CatalogUtils.stringToURI(superTable.properties().get(TableCatalog.PROP_LOCATION))
+    val sparkTable = CatalogTable(
+      tableIdent,
+      tableType = CatalogTableType.EXTERNAL,
+      storage = CatalogStorageFormat.empty.copy(
+        locationUri = Some(uri),
+        properties = superTable.properties().asScala.toMap
+      ),
+      schema = superTable.schema(),
+      provider = Some(superTable.properties().get(TableCatalog.PROP_PROVIDER)),
+      tracksPartitionsInCatalog = false
+    )
+    V1Table(sparkTable)
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 1e63350fdb439..0466781c119aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -27,11 +27,12 @@ import org.apache.hadoop.fs.permission.{AclEntry, AclStatus}
 import org.apache.spark.{SparkClassNotFoundException, SparkException, SparkFiles, SparkRuntimeException}
 import org.apache.spark.internal.config
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FullQualifiedTableName, FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
@@ -219,7 +220,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
   test("SPARK-25403 refresh the table after inserting data") {
     withTable("t") {
       val catalog = spark.sessionState.catalog
-      val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+      val table = FullQualifiedTableName(
+        CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
       sql("CREATE TABLE t (a INT) USING parquet")
       sql("INSERT INTO TABLE t VALUES (1)")
       assert(catalog.getCachedTable(table) === null, "Table relation should be invalidated.")
@@ -232,7 +234,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     withTable("t") {
       withTempDir { dir =>
         val catalog = spark.sessionState.catalog
-        val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+        val table = FullQualifiedTableName(
+          CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
         val p1 = s"${dir.getCanonicalPath}/p1"
         val p2 = s"${dir.getCanonicalPath}/p2"
         sql(s"CREATE TABLE t (a INT) USING parquet LOCATION '$p1'")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
index cd0a057284705..5810a35ddcf8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
@@ -23,7 +23,8 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, FsAction, FsPermission}
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.execution.command.FakeLocalFsFileSystem
 import org.apache.spark.sql.internal.SQLConf
@@ -146,7 +147,8 @@ trait TruncateTableSuiteBase extends command.TruncateTableSuiteBase {
           spark.table(t)
 
           val catalog = spark.sessionState.catalog
-          val qualifiedTableName = QualifiedTableName("ns", "tbl")
+          val qualifiedTableName =
+            FullQualifiedTableName(CatalogManager.SESSION_CATALOG_NAME, "ns", "tbl")
           val cachedPlan = catalog.getCachedTable(qualifiedTableName)
           assert(cachedPlan.stats.sizeInBytes == 0)
         }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index dbfb4d65bd144..00536135a1b1b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -27,10 +27,11 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
 import org.apache.spark.sql.internal.SQLConf
@@ -54,7 +55,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   private val tableCreationLocks = Striped.lazyWeakLock(100)
 
   /** Acquires a lock on the table cache for the duration of `f`. */
-  private def withTableCreationLock[A](tableName: QualifiedTableName, f: => A): A = {
+  private def withTableCreationLock[A](tableName: FullQualifiedTableName, f: => A): A = {
     val lock = tableCreationLocks.get(tableName)
     lock.lock()
     try f finally {
@@ -64,8 +65,9 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
 
   // For testing only
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
-    val key = QualifiedTableName(
+    val key = FullQualifiedTableName(
       // scalastyle:off caselocale
+      table.catalog.getOrElse(CatalogManager.SESSION_CATALOG_NAME).toLowerCase,
       table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase,
       table.table.toLowerCase)
       // scalastyle:on caselocale
@@ -73,7 +75,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private def getCached(
-      tableIdentifier: QualifiedTableName,
+      tableIdentifier: FullQualifiedTableName,
       pathsInMetastore: Seq[Path],
       schemaInMetastore: StructType,
       expectedFileFormat: Class[_ <: FileFormat],
@@ -191,8 +193,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       fileType: String,
       isWrite: Boolean): LogicalRelation = {
     val metastoreSchema = relation.tableMeta.schema
-    val tableIdentifier =
-      QualifiedTableName(relation.tableMeta.database, relation.tableMeta.identifier.table)
+    val tableIdentifier = FullQualifiedTableName(relation.tableMeta.identifier.catalog.get,
+      relation.tableMeta.database, relation.tableMeta.identifier.table)
 
     val lazyPruningEnabled = sparkSession.sqlContext.conf.manageFilesourcePartitions
     val tablePath = new Path(relation.tableMeta.location)

From c06906d0efe59605767226677f2a58c8bd3fa4fb Mon Sep 17 00:00:00 2001
From: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Date: Tue, 20 Aug 2024 11:53:39 +0200
Subject: [PATCH 445/521] [SPARK-49017][SQL][3.5] Insert statement fails when
 multiple parameters are being used

### What changes were proposed in this pull request?
Fix for multiple parameters support.

This PR is a backport of https://github.com/apache/spark/pull/47501.

### Why are the changes needed?
The use of multiple parameters with identifiers were broken

### Does this PR introduce _any_ user-facing change?
Yes, look at tests.

### How was this patch tested?
New tests added to ParametersSuite.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #47814 from mihailom-db/fixParametersBackport.

Authored-by: Mihailo Milosevic <mihailo.milosevic@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../analysis/ResolveIdentifierClause.scala    |  4 +-
 .../sql/catalyst/analysis/unresolved.scala    | 14 ++++-
 .../sql/catalyst/parser/AstBuilder.scala      | 37 ++++++++----
 .../sql/catalyst/analysis/AnalysisSuite.scala |  4 +-
 .../spark/sql/execution/SparkSqlParser.scala  |  4 +-
 .../apache/spark/sql/ParametersSuite.scala    | 57 +++++++++++++++++++
 6 files changed, 101 insertions(+), 19 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index 422bad3d89e24..9031b33a84a08 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -36,8 +36,8 @@ class ResolveIdentifierClause(earlyBatches: Seq[RuleExecutor[LogicalPlan]#Batch]
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
     _.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
-    case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved =>
-      executor.execute(p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr)))
+    case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved && p.childrenResolved =>
+      executor.execute(p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr), p.children))
     case other =>
       other.transformExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_IDENTIFIER)) {
         case e: ExpressionWithUnresolvedIdentifier if e.identifierExpr.resolved =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 07ad5e57306a5..a03c3e317c104 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -55,9 +55,19 @@ trait UnresolvedUnaryNode extends UnaryNode with UnresolvedNode
  */
 case class PlanWithUnresolvedIdentifier(
     identifierExpr: Expression,
-    planBuilder: Seq[String] => LogicalPlan)
-  extends UnresolvedLeafNode {
+    children: Seq[LogicalPlan],
+    planBuilder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan)
+  extends UnresolvedNode {
+
+  def this(identifierExpr: Expression, planBuilder: Seq[String] => LogicalPlan) = {
+    this(identifierExpr, Nil, (ident, _) => planBuilder(ident))
+  }
+
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_IDENTIFIER)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan =
+    copy(identifierExpr, newChildren, planBuilder)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f38d41af445ea..f28a6de9fc2c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -66,12 +66,25 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       builder: Seq[String] => LogicalPlan): LogicalPlan = {
     val exprCtx = ctx.expression
     if (exprCtx != null) {
-      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, Nil,
+        (ident, _) => builder(ident))
     } else {
       builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier))
     }
   }
 
+  protected def withIdentClause(
+      ctx: IdentifierReferenceContext,
+      otherPlans: Seq[LogicalPlan],
+      builder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan): LogicalPlan = {
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, otherPlans, builder)
+    } else {
+      builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier), otherPlans)
+    }
+  }
+
   protected def withIdentClause(
       ctx: IdentifierReferenceContext,
       builder: Seq[String] => Expression): Expression = {
@@ -85,12 +98,13 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
   protected def withFuncIdentClause(
       ctx: FunctionNameContext,
-      builder: Seq[String] => LogicalPlan): LogicalPlan = {
+      otherPlans: Seq[LogicalPlan],
+      builder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan): LogicalPlan = {
     val exprCtx = ctx.expression
     if (exprCtx != null) {
-      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, otherPlans, builder)
     } else {
-      builder.apply(getFunctionMultiparts(ctx))
+      builder.apply(getFunctionMultiparts(ctx), otherPlans)
     }
   }
 
@@ -320,12 +334,12 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       case table: InsertIntoTableContext =>
         val (relationCtx, cols, partition, ifPartitionNotExists, byName)
         = visitInsertIntoTable(table)
-        withIdentClause(relationCtx, ident => {
+        withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
             createUnresolvedRelation(relationCtx, ident),
             partition,
             cols,
-            query,
+            otherPlans.head,
             overwrite = false,
             ifPartitionNotExists,
             byName)
@@ -333,21 +347,21 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       case table: InsertOverwriteTableContext =>
         val (relationCtx, cols, partition, ifPartitionNotExists, byName)
         = visitInsertOverwriteTable(table)
-        withIdentClause(relationCtx, ident => {
+        withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
             createUnresolvedRelation(relationCtx, ident),
             partition,
             cols,
-            query,
+            otherPlans.head,
             overwrite = true,
             ifPartitionNotExists,
             byName)
         })
       case ctx: InsertIntoReplaceWhereContext =>
-        withIdentClause(ctx.identifierReference, ident => {
+        withIdentClause(ctx.identifierReference, Seq(query), (ident, otherPlans) => {
           OverwriteByExpression.byPosition(
             createUnresolvedRelation(ctx.identifierReference, ident),
-            query,
+            otherPlans.head,
             expression(ctx.whereClause().booleanExpression()))
         })
       case dir: InsertOverwriteDirContext =>
@@ -1598,7 +1612,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
     withFuncIdentClause(
       func.functionName,
-      ident => {
+      Nil,
+      (ident, _) => {
         if (ident.length > 1) {
           throw QueryParsingErrors.invalidTableValuedFunctionNameError(ident, ctx)
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 1ed83bf14be7d..e54b95f91af60 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1662,9 +1662,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(testRelation.select(ident2), testRelation.select($"a").analyze)
     }
     withClue("IDENTIFIER as table") {
-      val ident = PlanWithUnresolvedIdentifier(name, _ => testRelation)
+      val ident = new PlanWithUnresolvedIdentifier(name, _ => testRelation)
       checkAnalysis(ident.select($"a"), testRelation.select($"a").analyze)
-      val ident2 = PlanWithUnresolvedIdentifier(replaceable, _ => testRelation)
+      val ident2 = new PlanWithUnresolvedIdentifier(replaceable, _ => testRelation)
       checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index dfe3c67e18b1f..492d95cf3c482 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -545,7 +545,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         throw QueryParsingErrors.defineTempViewWithIfNotExistsError(ctx)
       }
 
-      withIdentClause(ctx.identifierReference(), ident => {
+      withIdentClause(ctx.identifierReference(), Seq(qPlan), (ident, otherPlans) => {
         val tableIdentifier = ident.asTableIdentifier
         if (tableIdentifier.database.isDefined) {
           // Temporary view names should NOT contain database prefix like "database.table"
@@ -559,7 +559,7 @@ class SparkSqlAstBuilder extends AstBuilder {
           visitCommentSpecList(ctx.commentSpec()),
           properties,
           Option(source(ctx.query)),
-          qPlan,
+          otherPlans.head,
           ctx.EXISTS != null,
           ctx.REPLACE != null,
           viewType = viewType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
index a72c9a600adea..773b7041dee5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -502,4 +502,61 @@ class ParametersSuite extends QueryTest with SharedSparkSession {
         start = 24,
         stop = 36))
   }
+
+  test("SPARK-49017: bind named parameters with IDENTIFIER clause") {
+    withTable("testtab") {
+      // Create table
+      spark.sql("create table testtab (id int, name string) using parquet")
+
+      // Insert into table using single param
+      spark.sql("insert into identifier(:tab) values(1, 'test1')", Map("tab" -> "testtab"))
+
+      // Select from table using param
+      checkAnswer(spark.sql("select * from identifier(:tab)", Map("tab" -> "testtab")),
+        Seq(Row(1, "test1")))
+
+      // Insert into table using multiple params
+      spark.sql("insert into identifier(:tab) values(2, :name)",
+        Map("tab" -> "testtab", "name" -> "test2"))
+
+      // Select from table using param
+      checkAnswer(sql("select * from testtab"), Seq(Row(1, "test1"), Row(2, "test2")))
+
+      // Insert into table using multiple params and idents
+      sql("insert into testtab values(2, 'test3')")
+
+      // Select from table using param
+      checkAnswer(spark.sql("select identifier(:col) from identifier(:tab) where :name == name",
+        Map("tab" -> "testtab", "name" -> "test2", "col" -> "id")), Seq(Row(2)))
+    }
+  }
+
+  test("SPARK-49017: bind positional parameters with IDENTIFIER clause") {
+    withTable("testtab") {
+      // Create table
+      spark.sql("create table testtab (id int, name string) using parquet")
+
+      // Insert into table using single param
+      spark.sql("insert into identifier(?) values(1, 'test1')",
+        Array("testtab"))
+
+      // Select from table using param
+      checkAnswer(spark.sql("select * from identifier(?)", Array("testtab")),
+        Seq(Row(1, "test1")))
+
+      // Insert into table using multiple params
+      spark.sql("insert into identifier(?) values(2, ?)",
+        Array("testtab", "test2"))
+
+      // Select from table using param
+      checkAnswer(sql("select * from testtab"), Seq(Row(1, "test1"), Row(2, "test2")))
+
+      // Insert into table using multiple params and idents
+      sql("insert into testtab values(2, 'test3')")
+
+      // Select from table using param
+      checkAnswer(spark.sql("select identifier(?) from identifier(?) where ? == name",
+        Array("id", "testtab", "test2")), Seq(Row(2)))
+    }
+  }
 }

From 027a14b3cc058effab7f5a9f7e9d633e8179c5bb Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Wed, 21 Aug 2024 10:53:31 +0800
Subject: [PATCH 446/521] [SPARK-49246][SQL] TableCatalog#loadTable should
 indicate if it's for writing

For custom catalogs that have access control, read and write permissions can be different. However, currently Spark always call `TableCatalog#loadTable` to look up the table, no matter it's for read or write.

This PR adds a variant of `loadTable`: `loadTableForWrite`, in `TableCatalog`. All the write commands will call this new method to look up tables instead. This new method has a default implementation that just calls `loadTable`, so there is no breaking change.

allow more fine-grained access control for custom catalogs.

No

new tests

no

Closes #47772 from cloud-fan/write.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit b6164e6ed7174201057cf8f9ad59f59d6f60f089)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/TableCatalog.java   | 20 ++++
 .../catalog/TableWritePrivilege.java          | 40 ++++++++
 .../sql/catalyst/analysis/Analyzer.scala      |  8 +-
 .../sql/catalyst/analysis/unresolved.scala    | 27 ++++++
 .../sql/catalyst/parser/AstBuilder.scala      | 57 +++++++-----
 .../catalyst/plans/logical/v2Commands.scala   | 15 +++
 .../sql/connector/catalog/CatalogV2Util.scala | 17 +++-
 .../sql/catalyst/parser/DDLParserSuite.scala  | 29 ++++--
 .../sql/catalyst/parser/PlanParserSuite.scala | 93 ++++++++++---------
 .../apache/spark/sql/DataFrameWriter.scala    | 18 +++-
 .../apache/spark/sql/DataFrameWriterV2.scala  | 11 ++-
 .../analysis/ResolveSessionCatalog.scala      |  4 +-
 .../analyzer-results/explain-aqe.sql.out      |  2 +-
 .../analyzer-results/explain.sql.out          |  2 +-
 .../sql-tests/results/explain-aqe.sql.out     |  2 +-
 .../sql-tests/results/explain.sql.out         |  2 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 83 +++++++++++++++++
 .../command/AlignAssignmentsSuiteBase.scala   |  4 +-
 .../command/PlanResolutionSuite.scala         |  6 +-
 19 files changed, 344 insertions(+), 96 deletions(-)
 create mode 100644 sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index b990f59bfd90e..387477d0f1911 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -110,6 +110,26 @@ public interface TableCatalog extends CatalogPlugin {
    */
   Table loadTable(Identifier ident) throws NoSuchTableException;
 
+  /**
+   * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data
+   * into this table later.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must throw {@link NoSuchTableException}.
+   *
+   * @param ident a table identifier
+   * @param writePrivileges
+   * @return the table's metadata
+   * @throws NoSuchTableException If the table doesn't exist or is a view
+   *
+   * @since 3.5.3
+   */
+  default Table loadTable(
+      Identifier ident,
+      Set<TableWritePrivilege> writePrivileges) throws NoSuchTableException {
+    return loadTable(ident);
+  }
+
   /**
    * Load table metadata of a specific version by {@link Identifier identifier} from the catalog.
    * <p>
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java
new file mode 100644
index 0000000000000..ca2d4ba9e7b4e
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+/**
+ * The table write privileges that will be provided when loading a table.
+ *
+ * @since 3.5.3
+ */
+public enum TableWritePrivilege {
+  /**
+   * The privilege for adding rows to the table.
+   */
+  INSERT,
+
+  /**
+   * The privilege for changing existing rows in th table.
+   */
+  UPDATE,
+
+  /**
+   * The privilege for deleting rows from the table.
+   */
+  DELETE
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index fd0a0715b6344..463bd3c3a8a27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1309,8 +1309,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
                 cachedConnectRelation
               }.getOrElse(cachedRelation)
             }.orElse {
-              val table = CatalogV2Util.loadTable(catalog, ident, timeTravelSpec)
-              val loaded = createRelation(catalog, ident, table, u.options, u.isStreaming)
+              val writePrivilegesString =
+                Option(u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES))
+              val table = CatalogV2Util.loadTable(
+                catalog, ident, timeTravelSpec, writePrivilegesString)
+              val loaded = createRelation(
+                catalog, ident, table, u.clearWritePrivileges.options, u.isStreaming)
               loaded.foreach(AnalysisContext.get.relationCache.update(key, _))
               u.getTagValue(LogicalPlan.PLAN_ID_TAG).map { planId =>
                 loaded.map { loadedRelation =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index a03c3e317c104..81d92acc6e84a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, Unary
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -106,10 +107,36 @@ case class UnresolvedRelation(
 
   override def name: String = tableName
 
+  def requireWritePrivileges(privileges: Seq[TableWritePrivilege]): UnresolvedRelation = {
+    if (privileges.nonEmpty) {
+      val newOptions = new java.util.HashMap[String, String]
+      newOptions.putAll(options)
+      newOptions.put(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES, privileges.mkString(","))
+      copy(options = new CaseInsensitiveStringMap(newOptions))
+    } else {
+      this
+    }
+  }
+
+  def clearWritePrivileges: UnresolvedRelation = {
+    if (options.containsKey(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES)) {
+      val newOptions = new java.util.HashMap[String, String]
+      newOptions.putAll(options)
+      newOptions.remove(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES)
+      copy(options = new CaseInsensitiveStringMap(newOptions))
+    } else {
+      this
+    }
+  }
+
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_RELATION)
 }
 
 object UnresolvedRelation {
+  // An internal option of `UnresolvedRelation` to specify the required write privileges when
+  // writing data to this relation.
+  val REQUIRED_WRITE_PRIVILEGES = "__required_write_privileges__"
+
   def apply(
       tableIdentifier: TableIdentifier,
       extraOptions: CaseInsensitiveStringMap,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f28a6de9fc2c1..2b600743e1bd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, GeneratedColumn, IntervalUtils, ResolveDefaultColumns}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryParsingErrors}
@@ -336,7 +336,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         = visitInsertIntoTable(table)
         withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
-            createUnresolvedRelation(relationCtx, ident),
+            createUnresolvedRelation(relationCtx, ident, Seq(TableWritePrivilege.INSERT)),
             partition,
             cols,
             otherPlans.head,
@@ -349,7 +349,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         = visitInsertOverwriteTable(table)
         withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
-            createUnresolvedRelation(relationCtx, ident),
+            createUnresolvedRelation(relationCtx, ident,
+              Seq(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)),
             partition,
             cols,
             otherPlans.head,
@@ -360,7 +361,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       case ctx: InsertIntoReplaceWhereContext =>
         withIdentClause(ctx.identifierReference, Seq(query), (ident, otherPlans) => {
           OverwriteByExpression.byPosition(
-            createUnresolvedRelation(ctx.identifierReference, ident),
+            createUnresolvedRelation(ctx.identifierReference, ident,
+              Seq(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)),
             otherPlans.head,
             expression(ctx.whereClause().booleanExpression()))
         })
@@ -439,7 +441,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
   override def visitDeleteFromTable(
       ctx: DeleteFromTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = createUnresolvedRelation(ctx.identifierReference)
+    val table = createUnresolvedRelation(
+      ctx.identifierReference, writePrivileges = Seq(TableWritePrivilege.DELETE))
     val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "DELETE")
     val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
     val predicate = if (ctx.whereClause() != null) {
@@ -451,7 +454,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   override def visitUpdateTable(ctx: UpdateTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = createUnresolvedRelation(ctx.identifierReference)
+    val table = createUnresolvedRelation(
+      ctx.identifierReference, writePrivileges = Seq(TableWritePrivilege.UPDATE))
     val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "UPDATE")
     val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
     val assignments = withAssignments(ctx.setClause().assignmentList())
@@ -473,10 +477,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
 
   override def visitMergeIntoTable(ctx: MergeIntoTableContext): LogicalPlan = withOrigin(ctx) {
-    val targetTable = createUnresolvedRelation(ctx.target)
-    val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
-    val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
-
     val sourceTableOrQuery = if (ctx.source != null) {
       createUnresolvedRelation(ctx.source)
     } else if (ctx.sourceQuery != null) {
@@ -506,7 +506,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.matchedAction().getText}")
         }
       }
-    }
+    }.toSeq
     val notMatchedActions = ctx.notMatchedClause().asScala.map {
       clause => {
         if (clause.notMatchedAction().INSERT() != null) {
@@ -527,7 +527,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.notMatchedAction().getText}")
         }
       }
-    }
+    }.toSeq
     val notMatchedBySourceActions = ctx.notMatchedBySourceClause().asScala.map {
       clause => {
         val notMatchedBySourceAction = clause.notMatchedBySourceAction()
@@ -542,7 +542,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.notMatchedBySourceAction().getText}")
         }
       }
-    }
+    }.toSeq
     if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
       throw QueryParsingErrors.mergeStatementWithoutWhenClauseError(ctx)
     }
@@ -561,13 +561,19 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       throw QueryParsingErrors.nonLastNotMatchedBySourceClauseOmitConditionError(ctx)
     }
 
+    val targetTable = createUnresolvedRelation(
+      ctx.target,
+      writePrivileges = MergeIntoTable.getWritePrivileges(
+        matchedActions, notMatchedActions, notMatchedBySourceActions))
+    val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
+    val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
     MergeIntoTable(
       aliasedTarget,
       aliasedSource,
       mergeCondition,
-      matchedActions.toSeq,
-      notMatchedActions.toSeq,
-      notMatchedBySourceActions.toSeq)
+      matchedActions,
+      notMatchedActions,
+      notMatchedBySourceActions)
   }
 
   /**
@@ -2793,16 +2799,23 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Create an [[UnresolvedRelation]] from an identifier reference.
    */
   private def createUnresolvedRelation(
-      ctx: IdentifierReferenceContext): LogicalPlan = withOrigin(ctx) {
-    withIdentClause(ctx, UnresolvedRelation(_))
+      ctx: IdentifierReferenceContext,
+      writePrivileges: Seq[TableWritePrivilege] = Nil): LogicalPlan = withOrigin(ctx) {
+    withIdentClause(ctx, parts => {
+      val relation = new UnresolvedRelation(parts, isStreaming = false)
+      relation.requireWritePrivileges(writePrivileges)
+    })
   }
 
   /**
    * Create an [[UnresolvedRelation]] from a multi-part identifier.
    */
   private def createUnresolvedRelation(
-      ctx: ParserRuleContext, ident: Seq[String]): UnresolvedRelation = withOrigin(ctx) {
-    UnresolvedRelation(ident)
+      ctx: ParserRuleContext,
+      ident: Seq[String],
+      writePrivileges: Seq[TableWritePrivilege]): UnresolvedRelation = withOrigin(ctx) {
+    val relation = new UnresolvedRelation(ident, isStreaming = false)
+    relation.requireWritePrivileges(writePrivileges)
   }
 
   /**
@@ -4601,7 +4614,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       if (query.isDefined) {
         CacheTableAsSelect(ident.head, query.get, source(ctx.query()), isLazy, options)
       } else {
-        CacheTable(createUnresolvedRelation(ctx.identifierReference, ident), ident, isLazy, options)
+        CacheTable(
+          createUnresolvedRelation(ctx.identifierReference, ident, writePrivileges = Nil),
+          ident, isLazy, options)
       }
     })
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 805f277cf9f6a..d7669ac0b1d78 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -758,6 +758,21 @@ case class MergeIntoTable(
     copy(targetTable = newLeft, sourceTable = newRight)
 }
 
+object MergeIntoTable {
+  def getWritePrivileges(
+      matchedActions: Seq[MergeAction],
+      notMatchedActions: Seq[MergeAction],
+      notMatchedBySourceActions: Seq[MergeAction]): Seq[TableWritePrivilege] = {
+    val privileges = scala.collection.mutable.HashSet.empty[TableWritePrivilege]
+    (matchedActions.iterator ++ notMatchedActions ++ notMatchedBySourceActions).foreach {
+      case _: DeleteAction => privileges.add(TableWritePrivilege.DELETE)
+      case _: UpdateAction | _: UpdateStarAction => privileges.add(TableWritePrivilege.UPDATE)
+      case _: InsertAction | _: InsertStarAction => privileges.add(TableWritePrivilege.INSERT)
+    }
+    privileges.toSeq
+  }
+}
+
 sealed abstract class MergeAction extends Expression with Unevaluable {
   def condition: Option[Expression]
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 47c438f154ab9..f8f682e76cfc8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -331,9 +331,10 @@ private[sql] object CatalogV2Util {
   def loadTable(
       catalog: CatalogPlugin,
       ident: Identifier,
-      timeTravelSpec: Option[TimeTravelSpec] = None): Option[Table] =
+      timeTravelSpec: Option[TimeTravelSpec] = None,
+      writePrivilegesString: Option[String] = None): Option[Table] =
     try {
-      Option(getTable(catalog, ident, timeTravelSpec))
+      Option(getTable(catalog, ident, timeTravelSpec, writePrivilegesString))
     } catch {
       case _: NoSuchTableException => None
       case _: NoSuchDatabaseException => None
@@ -343,8 +344,10 @@ private[sql] object CatalogV2Util {
   def getTable(
       catalog: CatalogPlugin,
       ident: Identifier,
-      timeTravelSpec: Option[TimeTravelSpec] = None): Table = {
+      timeTravelSpec: Option[TimeTravelSpec] = None,
+      writePrivilegesString: Option[String] = None): Table = {
     if (timeTravelSpec.nonEmpty) {
+      assert(writePrivilegesString.isEmpty, "Should not write to a table with time travel")
       timeTravelSpec.get match {
         case v: AsOfVersion =>
           catalog.asTableCatalog.loadTable(ident, v.version)
@@ -352,7 +355,13 @@ private[sql] object CatalogV2Util {
           catalog.asTableCatalog.loadTable(ident, ts.timestamp)
       }
     } else {
-      catalog.asTableCatalog.loadTable(ident)
+      if (writePrivilegesString.isDefined) {
+        val writePrivileges = writePrivilegesString.get.split(",").map(_.trim)
+          .map(TableWritePrivilege.valueOf).toSet.asJava
+        catalog.asTableCatalog.loadTable(ident, writePrivileges)
+      } else {
+        catalog.asTableCatalog.loadTable(ident)
+      }
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 6f36a8c9719cb..176c24d4e100f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -39,7 +39,16 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   private def parseCompare(sql: String, expected: LogicalPlan): Unit = {
-    comparePlans(parsePlan(sql), expected, checkAnalysis = false)
+    // We don't care the write privileges in this suite.
+    val parsed = parsePlan(sql).transform {
+      case u: UnresolvedRelation => u.clearWritePrivileges
+      case i: InsertIntoStatement =>
+        i.table match {
+          case u: UnresolvedRelation => i.copy(table = u.clearWritePrivileges)
+          case _ => i
+        }
+    }
+    comparePlans(parsed, expected, checkAnalysis = false)
   }
 
   private def internalException(sqlText: String): SparkThrowable = {
@@ -2614,15 +2623,15 @@ class DDLParserSuite extends AnalysisTest {
     val timestampTypeSql = s"INSERT INTO t PARTITION(part = timestamp'$timestamp') VALUES('a')"
     val binaryTypeSql = s"INSERT INTO t PARTITION(part = X'$binaryHexStr') VALUES('a')"
 
-    comparePlans(parsePlan(dateTypeSql), insertPartitionPlan("2019-01-02"))
+    parseCompare(dateTypeSql, insertPartitionPlan("2019-01-02"))
     withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
-      comparePlans(parsePlan(intervalTypeSql), insertPartitionPlan(interval))
+      parseCompare(intervalTypeSql, insertPartitionPlan(interval))
     }
-    comparePlans(parsePlan(ymIntervalTypeSql), insertPartitionPlan("INTERVAL '1-2' YEAR TO MONTH"))
-    comparePlans(parsePlan(dtIntervalTypeSql),
+    parseCompare(ymIntervalTypeSql, insertPartitionPlan("INTERVAL '1-2' YEAR TO MONTH"))
+    parseCompare(dtIntervalTypeSql,
       insertPartitionPlan("INTERVAL '1 02:03:04.128462' DAY TO SECOND"))
-    comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
-    comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr))
+    parseCompare(timestampTypeSql, insertPartitionPlan(timestamp))
+    parseCompare(binaryTypeSql, insertPartitionPlan(binaryStr))
   }
 
   test("SPARK-38335: Implement parser support for DEFAULT values for columns in tables") {
@@ -2717,12 +2726,12 @@ class DDLParserSuite extends AnalysisTest {
 
     // In each of the following cases, the DEFAULT reference parses as an unresolved attribute
     // reference. We can handle these cases after the parsing stage, at later phases of analysis.
-    comparePlans(parsePlan("VALUES (1, 2, DEFAULT) AS val"),
+    parseCompare("VALUES (1, 2, DEFAULT) AS val",
       SubqueryAlias("val",
         UnresolvedInlineTable(Seq("col1", "col2", "col3"), Seq(Seq(Literal(1), Literal(2),
           UnresolvedAttribute("DEFAULT"))))))
-    comparePlans(parsePlan(
-      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)"),
+    parseCompare(
+      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)",
       InsertIntoStatement(
         UnresolvedRelation(Seq("t")),
         Map("part" -> Some("2019-01-02")),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 13474fe29de98..acc5a6ebddd2e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -39,7 +39,16 @@ class PlanParserSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.dsl.plans._
 
   private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
-    comparePlans(parsePlan(sqlCommand), plan, checkAnalysis = false)
+    // We don't care the write privileges in this suite.
+    val parsed = parsePlan(sqlCommand).transform {
+      case u: UnresolvedRelation => u.clearWritePrivileges
+      case i: InsertIntoStatement =>
+        i.table match {
+          case u: UnresolvedRelation => i.copy(table = u.clearWritePrivileges)
+          case _ => i
+        }
+    }
+    comparePlans(parsed, plan, checkAnalysis = false)
   }
 
   private def parseException(sqlText: String): SparkThrowable = {
@@ -1033,57 +1042,56 @@ class PlanParserSuite extends AnalysisTest {
       errorClass = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'b'", "hint" -> ""))
 
-    comparePlans(
-      parsePlan("SELECT /*+ HINT */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ HINT */ * FROM t",
       UnresolvedHint("HINT", Seq.empty, table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ BROADCASTJOIN(u) */ * FROM t",
       UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(u) */ * FROM t",
       UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t",
       UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t",
       UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`",
       UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")),
         table("default.t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a",
       UnresolvedHint("MAPJOIN", Seq($"t"),
         table("t").where(Literal(true)).groupBy($"a")($"a")).orderBy($"a".asc))
 
-    comparePlans(
-      parsePlan("SELECT /*+ COALESCE(10) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ COALESCE(10) */ * FROM t",
       UnresolvedHint("COALESCE", Seq(Literal(10)),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100)),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan(
-        "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t"),
+    assertEqual(
+      "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t",
       InsertIntoStatement(table("s"), Map.empty, Nil,
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           UnresolvedHint("COALESCE", Seq(Literal(500)),
             UnresolvedHint("COALESCE", Seq(Literal(10)),
               table("t").select(star())))), overwrite = false, ifPartitionNotExists = false))
 
-    comparePlans(
-      parsePlan("SELECT /*+ BROADCASTJOIN(u), REPARTITION(100) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ BROADCASTJOIN(u), REPARTITION(100) */ * FROM t",
       UnresolvedHint("BROADCASTJOIN", Seq($"u"),
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           table("t").select(star()))))
@@ -1094,49 +1102,48 @@ class PlanParserSuite extends AnalysisTest {
       errorClass = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'+'", "hint" -> ""))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(c) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c), COALESCE(50) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c), COALESCE(50) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("COALESCE", Seq(Literal(50)),
           table("t").select(star()))))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("BROADCASTJOIN", Seq($"u"),
           UnresolvedHint("COALESCE", Seq(Literal(50)),
             table("t").select(star())))))
 
-    comparePlans(
-      parsePlan(
-        """
-          |SELECT
-          |/*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50), REPARTITION(300, c) */
-          |* FROM t
-        """.stripMargin),
+    assertEqual(
+      """
+        |SELECT
+        |/*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50), REPARTITION(300, c) */
+        |* FROM t
+      """.stripMargin,
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("BROADCASTJOIN", Seq($"u"),
           UnresolvedHint("COALESCE", Seq(Literal(50)),
             UnresolvedHint("REPARTITION", Seq(Literal(300), UnresolvedAttribute("c")),
               table("t").select(star()))))))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t",
       UnresolvedHint("REPARTITION_BY_RANGE", Seq(UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(100, c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION_BY_RANGE(100, c) */ * FROM t",
       UnresolvedHint("REPARTITION_BY_RANGE", Seq(Literal(100), UnresolvedAttribute("c")),
         table("t").select(star())))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 4de6b944bc868..84f02c723136b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -30,12 +30,15 @@ import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSel
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Implicits, CatalogV2Util, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege._
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, DataSourceUtils, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
@@ -448,7 +451,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   private def insertInto(catalog: CatalogPlugin, ident: Identifier): Unit = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
-    val table = catalog.asTableCatalog.loadTable(ident) match {
+    val table = catalog.asTableCatalog.loadTable(ident, getWritePrivileges.toSet.asJava) match {
       case _: V1Table =>
         return insertInto(TableIdentifier(ident.name(), ident.namespace().headOption))
       case t =>
@@ -479,7 +482,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   private def insertInto(tableIdent: TableIdentifier): Unit = {
     runCommand(df.sparkSession) {
       InsertIntoStatement(
-        table = UnresolvedRelation(tableIdent),
+        table = UnresolvedRelation(tableIdent).requireWritePrivileges(getWritePrivileges),
         partitionSpec = Map.empty[String, Option[String]],
         Nil,
         query = df.logicalPlan,
@@ -488,6 +491,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
+  private def getWritePrivileges: Seq[TableWritePrivilege] = mode match {
+    case SaveMode.Overwrite => Seq(INSERT, DELETE)
+    case _ => Seq(INSERT)
+  }
+
   private def getBucketSpec: Option[BucketSpec] = {
     if (sortColumnNames.isDefined && numBuckets.isEmpty) {
       throw QueryCompilationErrors.sortByWithoutBucketingError()
@@ -557,7 +565,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
-    val canUseV2 = lookupV2Provider().isDefined
+    val canUseV2 = lookupV2Provider().isDefined ||
+      df.sparkSession.sessionState.conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case nameParts @ NonSessionCatalogAndIdentifier(catalog, ident) =>
@@ -578,7 +587,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def saveAsTable(
       catalog: TableCatalog, ident: Identifier, nameParts: Seq[String]): Unit = {
-    val tableOpt = try Option(catalog.loadTable(ident)) catch {
+    val tableOpt = try Option(catalog.loadTable(ident, getWritePrivileges.toSet.asJava)) catch {
       case _: NoSuchTableException => None
     }
 
@@ -639,7 +648,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val catalog = df.sparkSession.sessionState.catalog
     val qualifiedIdent = catalog.qualifyIdentifier(tableIdent)
     val tableExists = catalog.tableExists(qualifiedIdent)
-    val tableName = qualifiedIdent.unquotedString
 
     (tableExists, mode) match {
       case (true, SaveMode.Ignore) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index 7ca9c7ef71d67..09d884af05b18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedIdentifier, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OptionList, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, UnresolvedTableSpec}
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege._
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
@@ -146,7 +147,9 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
    */
   @throws(classOf[NoSuchTableException])
   def append(): Unit = {
-    val append = AppendData.byName(UnresolvedRelation(tableName), logicalPlan, options.toMap)
+    val append = AppendData.byName(
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT)),
+      logicalPlan, options.toMap)
     runCommand(append)
   }
 
@@ -163,7 +166,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   @throws(classOf[NoSuchTableException])
   def overwrite(condition: Column): Unit = {
     val overwrite = OverwriteByExpression.byName(
-      UnresolvedRelation(tableName), logicalPlan, condition.expr, options.toMap)
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT, DELETE)),
+      logicalPlan, condition.expr, options.toMap)
     runCommand(overwrite)
   }
 
@@ -183,7 +187,8 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
   @throws(classOf[NoSuchTableException])
   def overwritePartitions(): Unit = {
     val dynamicOverwrite = OverwritePartitionsDynamic.byName(
-      UnresolvedRelation(tableName), logicalPlan, options.toMap)
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT, DELETE)),
+      logicalPlan, options.toMap)
     runCommand(dynamicOverwrite)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index aee243b6529da..c96ce2daa49e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -669,7 +669,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
-    catalog.name().equalsIgnoreCase(CatalogManager.SESSION_CATALOG_NAME) &&
-      !SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined
+    isSessionCatalog(catalog) &&
+      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty
   }
 }
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
index f37e31bdb389c..522cfb0cbbd28 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
@@ -196,7 +196,7 @@ ExplainCommand 'Aggregate ['key], ['key, unresolvedalias('MIN('val), None)], For
 -- !query
 EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 -- !query analysis
-ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false, ExtendedMode
+ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false, ExtendedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
index f37e31bdb389c..522cfb0cbbd28 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
@@ -196,7 +196,7 @@ ExplainCommand 'Aggregate ['key], ['key, unresolvedalias('MIN('val), None)], For
 -- !query
 EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 -- !query analysis
-ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false, ExtendedMode
+ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false, ExtendedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 3c2677c936f9c..54fa9ca418cc1 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -1081,7 +1081,7 @@ EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
-'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false
 +- 'Project [*]
    +- 'UnresolvedRelation [explain_temp4], [], false
 
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index f54c6c5e44f2e..20314b5f9b93a 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -1023,7 +1023,7 @@ EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
-'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false
 +- 'Project [*]
    +- 'UnresolvedRelation [explain_temp4], [], false
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 4c6ae425291d0..27a0b731021eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3442,6 +3442,73 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-49246: read-only catalog") {
+    def assertPrivilegeError(f: => Unit, privilege: String): Unit = {
+      val e = intercept[RuntimeException](f)
+      assert(e.getMessage.contains(privilege))
+    }
+
+    def checkWriteOperations(catalog: String): Unit = {
+      withSQLConf(s"spark.sql.catalog.$catalog" -> classOf[ReadOnlyCatalog].getName) {
+        val input = sql("SELECT 1")
+        val tbl = s"$catalog.default.t"
+        withTable(tbl) {
+          sql(s"CREATE TABLE $tbl (i INT)")
+          val df = sql(s"SELECT * FROM $tbl")
+          assert(df.collect().isEmpty)
+          assert(df.schema == new StructType().add("i", "int"))
+
+          assertPrivilegeError(sql(s"INSERT INTO $tbl SELECT 1"), "INSERT")
+          assertPrivilegeError(
+            sql(s"INSERT INTO $tbl REPLACE WHERE i = 0 SELECT 1"), "DELETE,INSERT")
+          assertPrivilegeError(sql(s"INSERT OVERWRITE $tbl SELECT 1"), "DELETE,INSERT")
+          assertPrivilegeError(sql(s"DELETE FROM $tbl WHERE i = 0"), "DELETE")
+          assertPrivilegeError(sql(s"UPDATE $tbl SET i = 0"), "UPDATE")
+          assertPrivilegeError(
+            sql(s"""
+               |MERGE INTO $tbl USING (SELECT 1 i) AS source
+               |ON source.i = $tbl.i
+               |WHEN MATCHED THEN UPDATE SET *
+               |WHEN NOT MATCHED THEN INSERT *
+               |WHEN NOT MATCHED BY SOURCE THEN DELETE
+               |""".stripMargin),
+            "DELETE,INSERT,UPDATE"
+          )
+
+          assertPrivilegeError(input.write.insertInto(tbl), "INSERT")
+          assertPrivilegeError(input.write.mode("overwrite").insertInto(tbl), "DELETE,INSERT")
+          assertPrivilegeError(input.write.mode("append").saveAsTable(tbl), "INSERT")
+          assertPrivilegeError(input.write.mode("overwrite").saveAsTable(tbl), "DELETE,INSERT")
+          assertPrivilegeError(input.writeTo(tbl).append(), "INSERT")
+          assertPrivilegeError(input.writeTo(tbl).overwrite(df.col("i") === 1), "DELETE,INSERT")
+          assertPrivilegeError(input.writeTo(tbl).overwritePartitions(), "DELETE,INSERT")
+        }
+
+        // Test CTAS
+        withTable(tbl) {
+          // assertPrivilegeError(sql(s"CREATE TABLE $tbl AS SELECT 1 i"), "INSERT")
+        }
+        withTable(tbl) {
+          // assertPrivilegeError(sql(s"CREATE OR REPLACE TABLE $tbl AS SELECT 1 i"), "INSERT")
+        }
+        withTable(tbl) {
+          // assertPrivilegeError(input.write.saveAsTable(tbl), "INSERT")
+        }
+        withTable(tbl) {
+          // assertPrivilegeError(input.writeTo(tbl).create(), "INSERT")
+        }
+        withTable(tbl) {
+          // assertPrivilegeError(input.writeTo(tbl).createOrReplace(), "INSERT")
+        }
+      }
+    }
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    checkWriteOperations(SESSION_CATALOG_NAME)
+    checkWriteOperations("read_only_cat")
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
@@ -3517,3 +3584,19 @@ class V2CatalogSupportBuiltinDataSource extends InMemoryCatalog {
   }
 }
 
+class ReadOnlyCatalog extends InMemoryCatalog {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: jutil.Map[String, String]): Table = {
+    super.createTable(ident, columns, partitions, properties)
+  }
+
+  override def loadTable(
+      ident: Identifier,
+      writePrivileges: jutil.Set[TableWritePrivilege]): Table = {
+    throw new RuntimeException("cannot write with " +
+      writePrivileges.asScala.toSeq.map(_.toString).sorted.mkString(","))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
index a2f3d872a68e9..2979d3cdcab56 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, CatalogV2Util, Column, ColumnDefaultValue, Identifier, SupportsRowLevelOperations, TableCapability, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, CatalogV2Util, Column, ColumnDefaultValue, Identifier, SupportsRowLevelOperations, TableCapability, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
 import org.apache.spark.sql.internal.SQLConf
@@ -160,6 +160,8 @@ abstract class AlignAssignmentsSuiteBase extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn("cat")
     newCatalog
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 8eb0d5456c111..d738270699bd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCom
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.connector.FakeV2Provider
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Column, ColumnDefaultValue, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Column, ColumnDefaultValue, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, TableWritePrivilege, V1Table}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
@@ -157,6 +157,8 @@ class PlanResolutionSuite extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn("testcat")
     newCatalog
   }
@@ -174,6 +176,8 @@ class PlanResolutionSuite extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn(CatalogManager.SESSION_CATALOG_NAME)
     newCatalog
   }

From 481bc58bddb6b998386b320e61a1b9f0e73c4711 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 26 Dec 2023 15:17:30 +0800
Subject: [PATCH 447/521] [SPARK-46444][SQL] V2SessionCatalog#createTable
 should not load the table

It's a perf regression in CREATE TABLE if we switch to the v2 command framework, as `V2SessionCatalog#createTable` does an extra table lookup, which does not happen in v1. This PR fixes it by allowing `TableCatalog#createTable` to return null, and Spark will call `loadTable` to get the new table metadata in the case of CTAS. This PR also fixed `alterTable` in the same way.

fix perf regression in v2. The perf of a single command may not matter, but in a cluster with many Spark applications, it's important to reduce the RPCs to the metastore.

no

existing tests

No

Closes #44377 from cloud-fan/create-table.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/connector/catalog/TableCatalog.java   |   8 +-
 .../datasources/v2/V2SessionCatalog.scala     |   6 +-
 .../v2/WriteToDataSourceV2Exec.scala          |   8 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  |  13 +-
 .../connector/TestV2SessionCatalogBase.scala  |   5 +-
 .../v2/V2SessionCatalogSuite.scala            | 181 +++++++++++-------
 6 files changed, 139 insertions(+), 82 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 387477d0f1911..d1951a7f7fbf3 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -208,7 +208,9 @@ Table createTable(
    * @param columns the columns of the new table.
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if getting the metadata for the new table
+   *         is expensive. Spark will call {@link #loadTable(Identifier)} if needed (e.g. CTAS).
+   *
    * @throws TableAlreadyExistsException If a table or view already exists for the identifier
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
@@ -242,7 +244,9 @@ default boolean useNullableQuerySchema() {
    *
    * @param ident a table identifier
    * @param changes changes to apply to the table
-   * @return updated metadata for the table
+   * @return updated metadata for the table. This can be null if getting the metadata for the
+   *         updated table is expensive. Spark always discard the returned table here.
+   *
    * @throws NoSuchTableException If the table doesn't exist or is a view
    * @throws IllegalArgumentException If any change is rejected by the implementation.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index d7ab23cf08ddd..a022a01455a09 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -148,7 +148,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
         throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
 
-    loadTable(ident)
+    null // Return null to save the `loadTable` call for CREATE TABLE without AS SELECT.
   }
 
   private def toOptions(properties: Map[String, String]): Map[String, String] = {
@@ -189,7 +189,7 @@ class V2SessionCatalog(catalog: SessionCatalog)
         throw QueryCompilationErrors.noSuchTableError(ident)
     }
 
-    loadTable(ident)
+    null // Return null to save the `loadTable` call for ALTER TABLE.
   }
 
   override def purgeTable(ident: Identifier): Boolean = {
@@ -233,8 +233,6 @@ class V2SessionCatalog(catalog: SessionCatalog)
       throw QueryCompilationErrors.tableAlreadyExistsError(newIdent)
     }
 
-    // Load table to make sure the table exists
-    loadTable(oldIdent)
     catalog.renameTable(oldIdent.asTableIdentifier, newIdent.asTableIdentifier)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 4a9b85450a176..89c879beda825 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, TableSpec, UnaryNode}
 import org.apache.spark.sql.catalyst.util.{removeInternalMetadata, CharVarcharUtils, WriteDeltaProjections}
 import org.apache.spark.sql.catalyst.util.RowDeltaUtils.{DELETE_OPERATION, INSERT_OPERATION, UPDATE_OPERATION}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.metric.CustomMetric
 import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, DeltaWrite, DeltaWriter, PhysicalWriteInfoImpl, Write, WriterCommitMessage}
@@ -81,9 +81,10 @@ case class CreateTableAsSelectExec(
       }
       throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
-    val table = catalog.createTable(
+    val table = Option(catalog.createTable(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
       partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
@@ -161,9 +162,10 @@ case class ReplaceTableAsSelectExec(
     } else if (!orCreate) {
       throw QueryCompilationErrors.cannotReplaceMissingTableError(ident)
     }
-    val table = catalog.createTable(
+    val table = Option(catalog.createTable(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
       partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 27a0b731021eb..6ae182810b8ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3486,19 +3486,19 @@ class DataSourceV2SQLSuiteV1Filter
 
         // Test CTAS
         withTable(tbl) {
-          // assertPrivilegeError(sql(s"CREATE TABLE $tbl AS SELECT 1 i"), "INSERT")
+          assertPrivilegeError(sql(s"CREATE TABLE $tbl AS SELECT 1 i"), "INSERT")
         }
         withTable(tbl) {
-          // assertPrivilegeError(sql(s"CREATE OR REPLACE TABLE $tbl AS SELECT 1 i"), "INSERT")
+          assertPrivilegeError(sql(s"CREATE OR REPLACE TABLE $tbl AS SELECT 1 i"), "INSERT")
         }
         withTable(tbl) {
-          // assertPrivilegeError(input.write.saveAsTable(tbl), "INSERT")
+          assertPrivilegeError(input.write.saveAsTable(tbl), "INSERT")
         }
         withTable(tbl) {
-          // assertPrivilegeError(input.writeTo(tbl).create(), "INSERT")
+          assertPrivilegeError(input.writeTo(tbl).create(), "INSERT")
         }
         withTable(tbl) {
-          // assertPrivilegeError(input.writeTo(tbl).createOrReplace(), "INSERT")
+          assertPrivilegeError(input.writeTo(tbl).createOrReplace(), "INSERT")
         }
       }
     }
@@ -3560,7 +3560,7 @@ class V2CatalogSupportBuiltinDataSource extends InMemoryCatalog {
       partitions: Array[Transform],
       properties: jutil.Map[String, String]): Table = {
     super.createTable(ident, columns, partitions, properties)
-    loadTable(ident)
+    null
   }
 
   override def loadTable(ident: Identifier): Table = {
@@ -3591,6 +3591,7 @@ class ReadOnlyCatalog extends InMemoryCatalog {
       partitions: Array[Transform],
       properties: jutil.Map[String, String]): Table = {
     super.createTable(ident, columns, partitions, properties)
+    null
   }
 
   override def loadTable(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 9042231bdc59f..9144fb9390454 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -78,6 +78,7 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       schema: StructType,
       partitions: Array[Transform],
       properties: java.util.Map[String, String]): Table = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
     val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY
     val newProps = new java.util.HashMap[String, String]()
     newProps.putAll(properties)
@@ -96,8 +97,8 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
     } else {
       newProps
     }
-    val created = super.createTable(ident, schema, partitions, propsWithLocation)
-    val t = newTable(created.name(), schema, partitions, propsWithLocation)
+    super.createTable(ident, schema, partitions, propsWithLocation)
+    val t = newTable(ident.quoted, schema, partitions, propsWithLocation)
     addTable(ident, t)
     t
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 8f5996438e202..01033cd681b73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -125,7 +125,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("db", "test_table"))
@@ -143,7 +144,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val table = catalog.loadTable(testIdent)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("db", "test_table"))
@@ -158,7 +160,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
       .map(part => quoteIdentifier(part)).mkString(".")
@@ -185,26 +188,30 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
 
     // default location
-    val t1 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val t1 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t1.catalogTable.location ===
       spark.sessionState.catalog.defaultTablePath(testIdent.asTableIdentifier))
     catalog.dropTable(testIdent)
 
     // relative path
     properties.put(TableCatalog.PROP_LOCATION, "relative/path")
-    val t2 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val t2 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
     catalog.dropTable(testIdent)
 
     // absolute path without scheme
     properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
-    val t3 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val t3 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t3.catalogTable.location.toString === "file:///absolute/path")
     catalog.dropTable(testIdent)
 
     // absolute path with scheme
     properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
-    val t4 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val t4 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t4.catalogTable.location.toString === "file:/absolute/path")
     catalog.dropTable(testIdent)
   }
@@ -226,12 +233,11 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("loadTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     val loaded = catalog.loadTable(testIdent)
 
-    assert(table.name == loaded.name)
-    assert(table.schema == loaded.schema)
-    assert(table.properties == loaded.properties)
+    assert(loaded.name == testIdent.toString)
+    assert(loaded.schema == schema)
   }
 
   test("loadTable: table does not exist") {
@@ -247,7 +253,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("invalidateTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
     catalog.invalidateTable(testIdent)
 
     val loaded = catalog.loadTable(testIdent)
@@ -268,11 +275,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(filterV2TableProperties(table.properties) == Map())
 
-    val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-1", "1"))
+    catalog.alterTable(testIdent, TableChange.setProperty("prop-1", "1"))
+    val updated = catalog.loadTable(testIdent)
     assert(filterV2TableProperties(updated.properties) == Map("prop-1" -> "1"))
 
     val loaded = catalog.loadTable(testIdent)
@@ -287,11 +296,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val table = catalog.loadTable(testIdent)
 
     assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1"))
 
-    val updated = catalog.alterTable(testIdent, TableChange.setProperty("prop-2", "2"))
+    catalog.alterTable(testIdent, TableChange.setProperty("prop-2", "2"))
+    val updated = catalog.loadTable(testIdent)
     assert(filterV2TableProperties(updated.properties) == Map("prop-1" -> "1", "prop-2" -> "2"))
 
     val loaded = catalog.loadTable(testIdent)
@@ -306,11 +317,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
+    catalog.createTable(testIdent, schema, emptyTrans, properties)
+    val table = catalog.loadTable(testIdent)
 
     assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1"))
 
-    val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    val updated = catalog.loadTable(testIdent)
     assert(filterV2TableProperties(updated.properties) == Map())
 
     val loaded = catalog.loadTable(testIdent)
@@ -322,11 +335,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: remove missing property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(filterV2TableProperties(table.properties) == Map())
 
-    val updated = catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    catalog.alterTable(testIdent, TableChange.removeProperty("prop-1"))
+    val updated = catalog.loadTable(testIdent)
     assert(filterV2TableProperties(updated.properties) == Map())
 
     val loaded = catalog.loadTable(testIdent)
@@ -338,11 +353,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent, TableChange.addColumn(Array("ts"), TimestampType))
+    catalog.alterTable(testIdent, TableChange.addColumn(Array("ts"), TimestampType))
+    val updated = catalog.loadTable(testIdent)
 
     assert(updated.schema == schema.add("ts", TimestampType))
   }
@@ -350,12 +367,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add required column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.addColumn(Array("ts"), TimestampType, false))
+    val updated = catalog.loadTable(testIdent)
 
     assert(updated.schema == schema.add("ts", TimestampType, nullable = false))
   }
@@ -363,12 +382,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add column with comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.addColumn(Array("ts"), TimestampType, false, "comment text"))
+    val updated = catalog.loadTable(testIdent)
 
     val field = StructField("ts", TimestampType, nullable = false).withComment("comment text")
     assert(updated.schema == schema.add(field))
@@ -380,12 +401,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.addColumn(Array("point", "z"), DoubleType))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = schema.add("point", pointStruct.add("z", DoubleType))
 
@@ -395,7 +418,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add column to primitive field fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -413,7 +437,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add field to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -429,11 +454,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: update column data type") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType))
+    catalog.alterTable(testIdent, TableChange.updateColumnType(Array("id"), LongType))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = new StructType().add("id", LongType).add("data", StringType)
     assert(updated.schema == expectedSchema)
@@ -445,12 +472,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val originalSchema = new StructType()
         .add("id", IntegerType, nullable = false)
         .add("data", StringType)
-    val table = catalog.createTable(testIdent, originalSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, originalSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == originalSchema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.updateColumnNullability(Array("id"), true))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = new StructType().add("id", IntegerType).add("data", StringType)
     assert(updated.schema == expectedSchema)
@@ -459,7 +488,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: update missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -475,12 +505,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.updateColumnComment(Array("id"), "comment text"))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = new StructType()
         .add("id", IntegerType, nullable = true, "comment text")
@@ -491,7 +523,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: replace comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -501,8 +534,9 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
         .add("id", IntegerType, nullable = true, "replacement comment")
         .add("data", StringType)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.updateColumnComment(Array("id"), "replacement comment"))
+    val updated = catalog.loadTable(testIdent)
 
     assert(updated.schema == expectedSchema)
   }
@@ -510,7 +544,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add comment to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -526,11 +561,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: rename top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent, TableChange.renameColumn(Array("id"), "some_id"))
+    catalog.alterTable(testIdent, TableChange.renameColumn(Array("id"), "some_id"))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = new StructType().add("some_id", IntegerType).add("data", StringType)
 
@@ -543,12 +580,14 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.renameColumn(Array("point", "x"), "first"))
+    val updated = catalog.loadTable(testIdent)
 
     val newPointStruct = new StructType().add("first", DoubleType).add("y", DoubleType)
     val expectedSchema = schema.add("point", newPointStruct)
@@ -562,12 +601,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
-    val updated = catalog.alterTable(testIdent,
-      TableChange.renameColumn(Array("point"), "p"))
+    catalog.alterTable(testIdent, TableChange.renameColumn(Array("point"), "p"))
+    val updated = catalog.loadTable(testIdent)
 
     val newPointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val expectedSchema = schema.add("p", newPointStruct)
@@ -578,7 +618,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: rename missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -597,13 +638,15 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
-    val updated = catalog.alterTable(testIdent,
+    catalog.alterTable(testIdent,
       TableChange.renameColumn(Array("point", "x"), "first"),
       TableChange.renameColumn(Array("point", "y"), "second"))
+    val updated = catalog.loadTable(testIdent)
 
     val newPointStruct = new StructType().add("first", DoubleType).add("second", DoubleType)
     val expectedSchema = schema.add("point", newPointStruct)
@@ -614,12 +657,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: delete top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
-    val updated = catalog.alterTable(testIdent,
-      TableChange.deleteColumn(Array("id"), false))
+    catalog.alterTable(testIdent, TableChange.deleteColumn(Array("id"), false))
+    val updated = catalog.loadTable(testIdent)
 
     val expectedSchema = new StructType().add("data", StringType)
     assert(updated.schema == expectedSchema)
@@ -631,12 +675,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
-    val updated = catalog.alterTable(testIdent,
-      TableChange.deleteColumn(Array("point", "y"), false))
+    catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "y"), false))
+    val updated = catalog.loadTable(testIdent)
 
     val newPointStruct = new StructType().add("x", DoubleType)
     val expectedSchema = schema.add("point", newPointStruct)
@@ -647,7 +692,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: delete missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == schema)
 
@@ -669,7 +715,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
+    val table = catalog.loadTable(testIdent)
 
     assert(table.schema == tableSchema)
 
@@ -700,23 +747,27 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
 
     // default location
-    val t1 = catalog.createTable(testIdent, schema, emptyTrans, emptyProps).asInstanceOf[V1Table]
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    val t1 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t1.catalogTable.location ===
       spark.sessionState.catalog.defaultTablePath(testIdent.asTableIdentifier))
 
     // relative path
-    val t2 = catalog.alterTable(testIdent,
-      TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path")).asInstanceOf[V1Table]
+    catalog.alterTable(testIdent,
+      TableChange.setProperty(TableCatalog.PROP_LOCATION, "relative/path"))
+    val t2 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
 
     // absolute path without scheme
-    val t3 = catalog.alterTable(testIdent,
-      TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path")).asInstanceOf[V1Table]
+    catalog.alterTable(testIdent,
+      TableChange.setProperty(TableCatalog.PROP_LOCATION, "/absolute/path"))
+    val t3 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t3.catalogTable.location.toString === "file:///absolute/path")
 
     // absolute path with scheme
-    val t4 = catalog.alterTable(testIdent, TableChange.setProperty(
-      TableCatalog.PROP_LOCATION, "file:/absolute/path")).asInstanceOf[V1Table]
+    catalog.alterTable(testIdent, TableChange.setProperty(
+      TableCatalog.PROP_LOCATION, "file:/absolute/path"))
+    val t4 = catalog.loadTable(testIdent).asInstanceOf[V1Table]
     assert(t4.catalogTable.location.toString === "file:/absolute/path")
   }
 

From eaadb39c29509881d0778432e226ab0357e688c8 Mon Sep 17 00:00:00 2001
From: zhangshuyan <zhangshuyan@apache.org>
Date: Thu, 22 Aug 2024 13:29:46 -0700
Subject: [PATCH 448/521] [SPARK-49300][CORE][3.5] Fix Hadoop delegation token
 leak when tokenRenewalInterval is not set

Backport from master.

### What changes were proposed in this pull request?

Cancel delegation token once they are used in `getTokenRenewalInterval`

### Why are the changes needed?

When `tokenRenewalInterval` is not set, HadoopFSDelegationTokenProvider#getTokenRenewalInterval will fetch some tokens and renew them to get a interval value.
https://github.com/apache/spark/blob/dd259b0b27841e6dd7c07f8ca3cc05d275863dd5/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala#L60-L64
These tokens do not call cancel(), resulting in a large number of existing tokens on HDFS not being cleared in a timely manner, causing additional pressure on the HDFS server.
### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

manual test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47823 from zhangshuyan0/branch-3.5-zsy.

Authored-by: zhangshuyan <zhangshuyan@apache.org>
Signed-off-by: Chao Sun <chao@openai.com>
---
 .../deploy/security/HadoopFSDelegationTokenProvider.scala      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index 6ec281f5b4406..c3f931f356ea7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -146,6 +146,9 @@ private[deploy] class HadoopFSDelegationTokenProvider
         val tokenKind = token.getKind.toString
         val interval = newExpiration - getIssueDate(tokenKind, identifier)
         logInfo(s"Renewal interval is $interval for token $tokenKind")
+        // The token here is only used to obtain renewal intervals. We should cancel it in
+        // a timely manner to avoid causing additional pressure on the server.
+        token.cancel(hadoopConf)
         interval
       }.toOption
     }

From b2a25bec2912a8cd7cc624c567ec3215f02a7ffa Mon Sep 17 00:00:00 2001
From: Yuming Wang <yumwang@ebay.com>
Date: Fri, 23 Aug 2024 08:21:38 +0800
Subject: [PATCH 449/521] [SPARK-49333][SQL] Shutdown timeout thread while
 cleaning up SparkExecuteStatementOperation

### What changes were proposed in this pull request?

Shutdown timeout thread while cleaning up `SparkExecuteStatementOperation`.

### Why are the changes needed?

Avoid Spark driver memory leak if query timeout is configured. For example, there are 4127 `SparkExecuteStatementOperation` instances in the Spark driver:
```
jmap  -histo 398 | grep SparkExecuteStatementOperation

 308:          4127        1122544  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation
 563:          4127         363176  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$ErrRowCountType$
 876:          4127         132064  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$1
2101:           333           7992  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$5
3106:            32           1024  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2
3303:            32            768  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$3755/0x00000008021fe800
3304:            32            768  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$anon$2$$anon$3
3961:             9            360  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$5398/0x0000000802523900
3962:             9            360  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$5399/0x0000000802523bd8
20239:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$
20240:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$$Lambda$5397/0x000000080251e180
20241:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$11228/0x000000080306ba38
20242:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$11230/0x00000008032962d8
20243:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$11231/0x00000008032966b8
20244:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$5363/0x0000000802509470
20245:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$5367/0x000000080250a618
20246:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$6475/0x00000008026fda40
20247:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$7355/0x00000008028aa180
20248:             1             16  org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation$$Lambda$7356/0x00000008028aa560
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Manual test.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47826 from wangyum/SPARK-49333.

Authored-by: Yuming Wang <yumwang@ebay.com>
Signed-off-by: Yuming Wang <yumwang@ebay.com>
(cherry picked from commit 853731dc5731e4f185d7364c500cc633cfd52d07)
Signed-off-by: Yuming Wang <yumwang@ebay.com>
---
 .../thriftserver/SparkExecuteStatementOperation.scala  | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index e6b4c70bb395b..47ec242c9da95 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.security.PrivilegedExceptionAction
 import java.util.{Collections, Map => JMap}
-import java.util.concurrent.{Executors, RejectedExecutionException, TimeUnit}
+import java.util.concurrent.{Executors, RejectedExecutionException, ScheduledExecutorService, TimeUnit}
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
@@ -60,6 +60,7 @@ private[hive] class SparkExecuteStatementOperation(
       queryTimeout
     }
   }
+  private var timeoutExecutor: ScheduledExecutorService = _
 
   private val forceCancel = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
 
@@ -132,7 +133,7 @@ private[hive] class SparkExecuteStatementOperation(
     setHasResultSet(true) // avoid no resultset for async run
 
     if (timeout > 0) {
-      val timeoutExecutor = Executors.newSingleThreadScheduledExecutor()
+      timeoutExecutor = Executors.newSingleThreadScheduledExecutor()
       timeoutExecutor.schedule(new Runnable {
         override def run(): Unit = {
           try {
@@ -306,6 +307,11 @@ private[hive] class SparkExecuteStatementOperation(
     if (statementId != null) {
       sqlContext.sparkContext.cancelJobGroup(statementId)
     }
+    // Shutdown the timeout thread if any, while cleaning up this operation
+    if (timeoutExecutor != null &&
+      getStatus.getState != OperationState.TIMEDOUT && getStatus.getState.isTerminal) {
+      timeoutExecutor.shutdownNow()
+    }
   }
 }
 

From f4bcefbe8b33f6d8e64d2542eb69ea271e6a97c5 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 23 Aug 2024 22:26:39 -0700
Subject: [PATCH 450/521] [SPARK-49352][SQL][3.5] Avoid redundant array
 transform for identical expression

### What changes were proposed in this pull request?

This patch avoids `ArrayTransform` in `resolveArrayType` function if the resolution expression is the same as input param.

### Why are the changes needed?

Our customer encounters significant performance regression when migrating from Spark 3.2 to Spark 3.4 on a `Insert Into` query which is analyzed as a `AppendData` on an Iceberg table.
We found that the root cause is in Spark 3.4, `TableOutputResolver` resolves the query with additional `ArrayTransform` on an `ArrayType` field. The `ArrayTransform`'s lambda function is actually an identical function, i.e., the transformation is redundant.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Unit test and manual e2e test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47863 from viirya/fix_redundant_array_transform_3.5.

Authored-by: Liang-Chi Hsieh <viirya@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../analysis/TableOutputResolver.scala        | 12 +++++--
 .../analysis/V2WriteAnalysisSuite.scala       | 32 ++++++++++++++++++-
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 42abc0eafda7a..fabb5634ad10c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -371,8 +371,16 @@ object TableOutputResolver {
       resolveColumnsByPosition(tableName, Seq(param), Seq(fakeAttr), conf, addError, colPath)
     }
     if (res.length == 1) {
-      val func = LambdaFunction(res.head, Seq(param))
-      Some(Alias(ArrayTransform(nullCheckedInput, func), expected.name)())
+      if (res.head == param) {
+        // If the element type is the same, we can reuse the input array directly.
+        Some(
+          Alias(nullCheckedInput, expected.name)(
+            nonInheritableMetadataKeys =
+              Seq(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)))
+      } else {
+        val func = LambdaFunction(res.head, Seq(param))
+        Some(Alias(ArrayTransform(nullCheckedInput, func), expected.name)())
+      }
     } else {
       None
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
index d91a080d8fe89..21a049e914182 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import org.apache.spark.QueryContext
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, CreateNamedStruct, GetStructField, If, IsNull, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, ArrayTransform, AttributeReference, Cast, CreateNamedStruct, GetStructField, If, IsNull, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -304,6 +304,36 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
   def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan
 
+  test("SPARK-49352: Avoid redundant array transform for identical expression") {
+    def assertArrayField(fromType: ArrayType, toType: ArrayType, hasTransform: Boolean): Unit = {
+      val table = TestRelation(Seq($"a".int, $"arr".array(toType)))
+      val query = TestRelation(Seq($"arr".array(fromType), $"a".int))
+
+      val writePlan = byName(table, query).analyze
+
+      assertResolved(writePlan)
+      checkAnalysis(writePlan, writePlan)
+
+      val transform = writePlan.children.head.expressions.exists { e =>
+        e.find {
+          case _: ArrayTransform => true
+          case _ => false
+        }.isDefined
+      }
+      if (hasTransform) {
+        assert(transform)
+      } else {
+        assert(!transform)
+      }
+    }
+
+    assertArrayField(ArrayType(LongType), ArrayType(LongType), hasTransform = false)
+    assertArrayField(
+      ArrayType(new StructType().add("x", "int").add("y", "int")),
+      ArrayType(new StructType().add("y", "int").add("x", "byte")),
+      hasTransform = true)
+  }
+
   test("SPARK-33136: output resolved on complex types for V2 write commands") {
     def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
       val table = TestRelation(StructType(Seq(StructField("a", toType))))

From 5eca9530309c681a0e522d010f380c61a1d3df50 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 23 Aug 2024 10:49:39 +0900
Subject: [PATCH 451/521] [SPARK-49359][SQL] Allow StagedTableCatalog
 implementations to fall back to non-atomic write

### What changes were proposed in this pull request?

This PR allows `StagedTableCatalog#create/replaceTable` to return null and Spark will fall back to normal non-atomic write.

### Why are the changes needed?

Extending an interface is static but sometimes the implementations need more dynamicity. For example, a catalog may only support atomic CTAS for certain table formats, and we shouldn't force them to implement atomic writes for all other formats.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #47848 from cloud-fan/stage.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 .../catalog/StagingTableCatalog.java          |  9 ++-
 .../v2/WriteToDataSourceV2Exec.scala          |  7 +-
 .../sql/connector/DataSourceV2SQLSuite.scala  | 72 +++++++++++++++++++
 3 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index 4337a7c615208..3094b0cf1bbda 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -80,7 +80,8 @@ StagedTable stageCreate(
    * @param columns the column of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws TableAlreadyExistsException If a table or view already exists for the identifier
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
@@ -128,7 +129,8 @@ StagedTable stageReplace(
    * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    * @throws NoSuchTableException If the table does not exist
@@ -176,7 +178,8 @@ StagedTable stageCreateOrReplace(
    * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 89c879beda825..c99e2bba2e960 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -116,9 +116,10 @@ case class AtomicCreateTableAsSelectExec(
       }
       throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
-    val stagedTable = catalog.stageCreate(
+    val stagedTable = Option(catalog.stageCreate(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
       partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, stagedTable, writeOptions, ident, query)
   }
 }
@@ -215,7 +216,9 @@ case class AtomicReplaceTableAsSelectExec(
     } else {
       throw QueryCompilationErrors.cannotReplaceMissingTableError(ident)
     }
-    writeToTable(catalog, staged, writeOptions, ident, query)
+    val table = Option(staged).getOrElse(
+      catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
+    writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 6ae182810b8ef..cb8db49d124cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3509,6 +3509,19 @@ class DataSourceV2SQLSuiteV1Filter
     checkWriteOperations("read_only_cat")
   }
 
+  test("StagingTableCatalog without atomic support") {
+    withSQLConf("spark.sql.catalog.fakeStagedCat" -> classOf[FakeStagedTableCatalog].getName) {
+      withTable("fakeStagedCat.t") {
+        sql("CREATE TABLE fakeStagedCat.t AS SELECT 1 col")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(1))
+        sql("REPLACE TABLE fakeStagedCat.t AS SELECT 2 col")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(2))
+        sql("CREATE OR REPLACE TABLE fakeStagedCat.t AS SELECT 1 c1, 2 c2")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(1, 2))
+      }
+    }
+  }
+
   private def testNotSupportedV2Command(
       sqlCommand: String,
       sqlParams: String,
@@ -3601,3 +3614,62 @@ class ReadOnlyCatalog extends InMemoryCatalog {
       writePrivileges.asScala.toSeq.map(_.toString).sorted.mkString(","))
   }
 }
+
+class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
+  override def stageCreate(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    throw new RuntimeException("shouldn't be called")
+  }
+
+  override def stageCreate(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    throw new RuntimeException("shouldn't be called")
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    super.dropTable(ident)
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    throw new RuntimeException("shouldn't be called")
+  }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    try {
+      super.dropTable(ident)
+    } catch {
+      case _: Throwable =>
+    }
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+}

From 870f1d8bb97c56e02135e98888b38aeae3c30de8 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 25 Aug 2024 19:07:46 -0700
Subject: [PATCH 452/521] [SPARK-49381][K8S][TESTS] Fix `SPARK-41388` test case
 to assert the result

### What changes were proposed in this pull request?

This PR aims to fix a test case of SPARK-41388 by adding a missed assert.

### Why are the changes needed?

To check the result correctly.

### Does this PR introduce _any_ user-facing change?

No, this is a test case change.

### How was this patch tested?

Pass the CIs with newly added assertion.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47865 from dongjoon-hyun/SPARK-49381.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 5706942bc6ac7a84fb0b6630f13a4592f1c690f0)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../cluster/k8s/ExecutorPodsAllocatorSuite.scala          | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index 350a09f0218ba..a5b904b8cd95b 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -842,15 +842,17 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     val getReusablePVCs =
       PrivateMethod[mutable.Buffer[PersistentVolumeClaim]](Symbol("getReusablePVCs"))
 
-    val pvc1 = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
-    val pvc2 = persistentVolumeClaim("pvc-1", "gp2", "200Gi")
+    val pvc1 = persistentVolumeClaim("pvc-1", "gp2", "200Gi")
+    val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
 
     val now = Instant.now()
     pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, MILLIS).toString)
     pvc2.getMetadata.setCreationTimestamp(now.toString)
 
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)
-    podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq("pvc-1"))
+    val reusablePVCs = podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq.empty)
+    assert(reusablePVCs.size == 1)
+    assert(reusablePVCs.head.getMetadata.getName == "pvc-1")
   }
 
   test("SPARK-41410: Support waitToReusePersistentVolumeClaims") {

From 2bc4c828e05bfd8f4ee417a6d5eff1612c0be8e5 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 25 Aug 2024 20:37:47 -0700
Subject: [PATCH 453/521] [SPARK-49385][K8S] Fix `getReusablePVCs` to use
 `podCreationTimeout` instead of `podAllocationDelay`

This PR aims to use `podCreationTimeout` instead of `podAllocationDelay` when `getReusablePVCs` excludes the newly created PVCs of previous batches.

K8s control plane pod creation can be delayed due to the unknown reasons. So, `podAllocationDelay (default: 1s)` is insufficient to say that the previous allocation batch's pods are created with their PVCs. We had better wait until `podCreationTimeout`.

This affects only the initial set of executors because the baseline is PVC's `getCreationTimestamp`. So, this fixes only a buggy situation where a PVC is shared by two executors due to the long pending executor pod.

Pass the CIs with newly updated test cases.

No.

Closes #47867 from dongjoon-hyun/SPARK-49385.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit f59607995bbd18cb881d062b65e32ff97c133107)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala   | 2 +-
 .../scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index 25970e918ec42..0ace14e2aaafd 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -424,7 +424,7 @@ class ExecutorPodsAllocator(
         val reusablePVCs = createdPVCs
           .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
           .filter(pvc => now - Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
-            > podAllocationDelay)
+            > podCreationTimeout)
         logInfo(s"Found ${reusablePVCs.size} reusable PVCs from ${createdPVCs.size} PVCs")
         reusablePVCs
       } catch {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index a5b904b8cd95b..f202499c849e1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -768,7 +768,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
     val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
     pvc.getMetadata
-      .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1, MILLIS).toString)
+      .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1, MILLIS).toString)
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
         meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -846,7 +846,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
 
     val now = Instant.now()
-    pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, MILLIS).toString)
+    pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1, MILLIS).toString)
     pvc2.getMetadata.setCreationTimestamp(now.toString)
 
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)

From c6f4dd70c8a4503680e88a05d0e09e78bcc65dda Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Sun, 25 Aug 2024 22:14:15 -0700
Subject: [PATCH 454/521] [SPARK-49359][TESTS][FOLLOWUP][3.5] Fix compilation
 to rename `util.Map` to `jutil.Map`

### What changes were proposed in this pull request?

This PR is a follow-up of the following and aims to fix compilation failures of `branch-3.5` by renaming `util.Map` to `jutil.Map`
- https://github.com/apache/spark/pull/47848

### Why are the changes needed?

To recover the compilation on branch-3.5.
- https://github.com/apache/spark/actions/runs/10544835250/job/29214315896

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47868 from dongjoon-hyun/SPARK-49359.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/sql/connector/DataSourceV2SQLSuite.scala   | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index cb8db49d124cb..d55f7879262fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -3620,7 +3620,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     throw new RuntimeException("shouldn't be called")
   }
 
@@ -3628,7 +3628,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       columns: Array[ColumnV2],
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     super.createTable(ident, columns, partitions, properties)
     null
   }
@@ -3637,7 +3637,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     throw new RuntimeException("shouldn't be called")
   }
 
@@ -3645,7 +3645,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       columns: Array[ColumnV2],
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     super.dropTable(ident)
     super.createTable(ident, columns, partitions, properties)
     null
@@ -3655,7 +3655,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       schema: StructType,
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     throw new RuntimeException("shouldn't be called")
   }
 
@@ -3663,7 +3663,7 @@ class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
       ident: Identifier,
       columns: Array[ColumnV2],
       partitions: Array[Transform],
-      properties: util.Map[String, String]): StagedTable = {
+      properties: jutil.Map[String, String]): StagedTable = {
     try {
       super.dropTable(ident)
     } catch {

From c9791279d60c5e3bb924c0fe19058eb1e358d1bd Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 14:51:02 +0900
Subject: [PATCH 455/521] [SPARK-49402][PYTHON] Fix Binder integration in
 PySpark documentation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This PR proposes to fix Binder integration by using `Dockerfile` directly.

Binder integration is broken now (https://mybinder.org/v2/gh/apache/spark/bb7846dd487?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb):

![Screenshot 2024-08-27 at 2 04 35 PM](https://github.com/user-attachments/assets/29222fc2-7cc6-43fa-8e04-a65c8384c4d5)

This seems to be related to the size of the repository (https://github.com/jupyterhub/mybinder.org-deploy/issues/3074).

I tried all the ways out but could not find the way except using `Dockerfile`.

Yes. This should recover the Binder integration.

Manually tested within my fork:

https://mybinder.org/v2/gh/HyukjinKwon/spark/binder-test1?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb

No.

Closes #47883 from HyukjinKwon/binder-test1.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05224d2b4207885a1d4f822fed6db0cb1a1)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 43 +++++++++++++++++++++++++++++++++++++++++++
 binder/apt.txt    |  2 --
 binder/postBuild  |  2 +-
 3 files changed, 44 insertions(+), 3 deletions(-)
 create mode 100644 binder/Dockerfile
 delete mode 100644 binder/apt.txt
 mode change 100644 => 100755 binder/postBuild

diff --git a/binder/Dockerfile b/binder/Dockerfile
new file mode 100644
index 0000000000000..a0af7312ef4ff
--- /dev/null
+++ b/binder/Dockerfile
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM python:3.10-slim
+# install the notebook package
+RUN  pip install --no-cache notebook jupyterlab
+
+# create user with a home directory
+ARG NB_USER
+ARG NB_UID
+ENV USER ${NB_USER}
+ENV HOME /home/${NB_USER}
+
+RUN adduser --disabled-password \
+    --gecos "Default user" \
+    --uid ${NB_UID} \
+    ${NB_USER}
+WORKDIR ${HOME}
+USER ${USER}
+
+# Make sure the contents of our repo are in ${HOME}
+COPY . ${HOME}
+USER root
+RUN chown -R ${NB_UID} ${HOME}
+RUN apt-get update && apt-get install -y openjdk-8-jre git coreutils
+USER ${NB_USER}
+
+RUN binder/postBuild
+
diff --git a/binder/apt.txt b/binder/apt.txt
deleted file mode 100644
index 3d86667d4b910..0000000000000
--- a/binder/apt.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-openjdk-8-jre
-git
diff --git a/binder/postBuild b/binder/postBuild
old mode 100644
new mode 100755
index fec233f8c8ce9..801f0a4c27633
--- a/binder/postBuild
+++ b/binder/postBuild
@@ -26,7 +26,7 @@ set -o pipefail
 set -e
 
 VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)")
-TAG=$(git describe --tags --exact-match 2>/dev/null)
+TAG=$(git describe --tags --exact-match 2> /dev/null || true)
 
 # If a commit is tagged, exactly specified version of pyspark should be installed to avoid
 # a kind of accident that an old version of pyspark is installed in the live notebook environment.

From ddc99bc98f361a5045117466a822ddf23f3f0e51 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 17:12:17 +0900
Subject: [PATCH 456/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 11

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index a0af7312ef4ff..6eb8c7c048820 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -36,7 +36,7 @@ USER ${USER}
 COPY . ${HOME}
 USER root
 RUN chown -R ${NB_UID} ${HOME}
-RUN apt-get update && apt-get install -y openjdk-8-jre git coreutils
+RUN apt-get update && apt-get install -y openjdk-11-jre git coreutils
 USER ${NB_USER}
 
 RUN binder/postBuild

From 2192437fdafbdb2dfcadfc178ce832194451a455 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 17:22:53 +0900
Subject: [PATCH 457/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to add a repository for JDK

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index 6eb8c7c048820..b91ca1dce0d92 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -36,6 +36,7 @@ USER ${USER}
 COPY . ${HOME}
 USER root
 RUN chown -R ${NB_UID} ${HOME}
+RUN add-apt-repository ppa:openjdk-r/ppa
 RUN apt-get update && apt-get install -y openjdk-11-jre git coreutils
 USER ${NB_USER}
 

From ff83dd9e45a228124704d869e29e1579dfae2e94 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 17:36:47 +0900
Subject: [PATCH 458/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 11

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index b91ca1dce0d92..ac6a77b8cda06 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -36,8 +36,23 @@ USER ${USER}
 COPY . ${HOME}
 USER root
 RUN chown -R ${NB_UID} ${HOME}
-RUN add-apt-repository ppa:openjdk-r/ppa
-RUN apt-get update && apt-get install -y openjdk-11-jre git coreutils
+
+# To solve add-apt-repository : command not found
+RUN apt-get -y install software-properties-common
+
+# Install Java
+RUN \
+  echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
+  add-apt-repository -y ppa:webupd8team/java && \
+  apt-get update && \
+  apt-get install -y oracle-java8-installer --allow-unauthenticated && \
+  rm -rf /var/lib/apt/lists/* && \
+  rm -rf /var/cache/oracle-jdk8-installer
+
+# Define commonly used JAVA_HOME variable
+ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
+
+RUN apt-get update && apt-get install -y git coreutils
 USER ${NB_USER}
 
 RUN binder/postBuild

From 315d98758ae5235900a97322e91754cf1d0eeed5 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 17:43:14 +0900
Subject: [PATCH 459/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 11

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index ac6a77b8cda06..87a69fef69ce8 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -38,7 +38,7 @@ USER root
 RUN chown -R ${NB_UID} ${HOME}
 
 # To solve add-apt-repository : command not found
-RUN apt-get -y install software-properties-common
+RUN apt-get update && apt-get -y install software-properties-common
 
 # Install Java
 RUN \

From 39f760c1948134809ab19f772ebee3c3c3490e29 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 17:55:00 +0900
Subject: [PATCH 460/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 11

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index 87a69fef69ce8..daa9d2d76c393 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -38,21 +38,9 @@ USER root
 RUN chown -R ${NB_UID} ${HOME}
 
 # To solve add-apt-repository : command not found
-RUN apt-get update && apt-get -y install software-properties-common
-
-# Install Java
-RUN \
-  echo oracle-java8-installer shared/accepted-oracle-license-v1-1 select true | debconf-set-selections && \
-  add-apt-repository -y ppa:webupd8team/java && \
-  apt-get update && \
-  apt-get install -y oracle-java8-installer --allow-unauthenticated && \
-  rm -rf /var/lib/apt/lists/* && \
-  rm -rf /var/cache/oracle-jdk8-installer
-
-# Define commonly used JAVA_HOME variable
-ENV JAVA_HOME /usr/lib/jvm/java-8-oracle
-
-RUN apt-get update && apt-get install -y git coreutils
+RUN apt-get update && apt-get install -y software-properties-common
+RUN add-apt-repository ppa:openjdk-r/ppa
+RUN apt-get update && apt-get install -y openjdk-8-jre git coreutils
 USER ${NB_USER}
 
 RUN binder/postBuild

From 51bfb88d1453472c9568c1ea72f505cf71e41bf7 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 18:07:05 +0900
Subject: [PATCH 461/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 11

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index daa9d2d76c393..794eb7f0d2e92 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -38,7 +38,7 @@ USER root
 RUN chown -R ${NB_UID} ${HOME}
 
 # To solve add-apt-repository : command not found
-RUN apt-get update && apt-get install -y software-properties-common
+RUN apt-get update && apt-get install -y software-properties-common python3-launchpadlib
 RUN add-apt-repository ppa:openjdk-r/ppa
 RUN apt-get update && apt-get install -y openjdk-8-jre git coreutils
 USER ${NB_USER}

From e5a5d199ba6438d4243a9440b975b083f39871d7 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 18:29:37 +0900
Subject: [PATCH 462/521] [SPARK-49402][PYTHON][FOLLOW-UP] Fix Binder
 integration in PySpark documentation

Followup PR to change JRE version from 8 to 17

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/Dockerfile | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/binder/Dockerfile b/binder/Dockerfile
index 794eb7f0d2e92..6e3dd9155fb7a 100644
--- a/binder/Dockerfile
+++ b/binder/Dockerfile
@@ -36,11 +36,7 @@ USER ${USER}
 COPY . ${HOME}
 USER root
 RUN chown -R ${NB_UID} ${HOME}
-
-# To solve add-apt-repository : command not found
-RUN apt-get update && apt-get install -y software-properties-common python3-launchpadlib
-RUN add-apt-repository ppa:openjdk-r/ppa
-RUN apt-get update && apt-get install -y openjdk-8-jre git coreutils
+RUN apt-get update && apt-get install -y openjdk-17-jre git coreutils
 USER ${NB_USER}
 
 RUN binder/postBuild

From 93fedc549d82aab70517c1d5dfe9d96b5e20db6e Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Tue, 27 Aug 2024 18:59:53 +0900
Subject: [PATCH 463/521] [SPARK-49402][PYTHON][FOLLOW-UP] Set upperfound for
 NumPy

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 9fc1e05)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 binder/postBuild | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/binder/postBuild b/binder/postBuild
index 801f0a4c27633..c17816d4a5009 100755
--- a/binder/postBuild
+++ b/binder/postBuild
@@ -38,9 +38,9 @@ else
 fi
 
 if [[ ! $VERSION < "3.4.0" ]]; then
-  pip install plotly "pandas<2.0.0" "pyspark[sql,ml,mllib,pandas_on_spark,connect]$SPECIFIER$VERSION"
+  pip install plotly "pandas<2.0.0" "numpy>=1.15,<2" "pyspark[sql,ml,mllib,pandas_on_spark,connect]$SPECIFIER$VERSION"
 else
-  pip install plotly "pandas<2.0.0" "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION"
+  pip install plotly "pandas<2.0.0" "numpy>=1.15,<2" "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION"
 fi
 
 # Set 'PYARROW_IGNORE_TIMEZONE' to surpress warnings from PyArrow.

From dcfefd07244068af2d24de02e5bde37047644d88 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Wed, 28 Aug 2024 12:25:24 +0900
Subject: [PATCH 464/521] [SPARK-49402][PYTHON][FOLLOW-UP] Manually load
 ~/.profile in Spark Connect notebook

This is a followup of https://github.com/apache/spark/pull/47883 that adds manual `source ~/.profile`.

Ever since we switched to `Dockerfile`, none of `./profile`, `/.bashrc`, `./bash_profile`, etc seems working. There are a couple of related issues in Jupyter but I cannot figure it out.

This is the only cell it needs the environment variable so decided to simply work around.

No.

Manually tested.

No.

Closes #47902 from HyukjinKwon/SPARK-49402-followup.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 1c9cde59ba65dc4444cbb85471965e21d1e1e253)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit df07aa717cfe9835ba668954ef12bdcc61c1559e)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/docs/source/getting_started/quickstart_connect.ipynb | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/python/docs/source/getting_started/quickstart_connect.ipynb b/python/docs/source/getting_started/quickstart_connect.ipynb
index 15a2ab749d2a6..0397a0ebf5071 100644
--- a/python/docs/source/getting_started/quickstart_connect.ipynb
+++ b/python/docs/source/getting_started/quickstart_connect.ipynb
@@ -28,7 +28,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!$HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:$SPARK_VERSION"
+    "%%bash\n",
+    "source ~/.profile # Make sure environment variables are loaded.\n",
+    "$HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:$SPARK_VERSION"
    ]
   },
   {

From 2ad11b632e072f47b84793a6cbaeb06c984b0e35 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 29 Aug 2024 12:27:41 +0800
Subject: [PATCH 465/521] [SPARK-46037][SQL] Correctness fix for Shuffled Hash
 Join build left without codegen

This is a re-submitting of https://github.com/apache/spark/pull/43938 to fix a join correctness bug caused by https://github.com/apache/spark/pull/41398 . Credits go to mcdull-zhang

correctness fix

Yes, the query result will be corrected.

new test

no

Closes #47905 from cloud-fan/join.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit af5e0a267e5a37adc25bdf9c78b6fe207ef7bfb5)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../spark/sql/execution/joins/HashJoin.scala  |  5 ++---
 .../sql/execution/joins/OuterJoinSuite.scala  | 22 +++++++++++++++++--
 2 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 7c48baf99ef83..07f7915416c1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -138,9 +138,8 @@ trait HashJoin extends JoinCodegenSupport {
     UnsafeProjection.create(streamedBoundKeys)
 
   @transient protected[this] lazy val boundCondition = if (condition.isDefined) {
-    if (joinType == FullOuter && buildSide == BuildLeft) {
-      // Put join left side before right side. This is to be consistent with
-      // `ShuffledHashJoinExec.fullOuterJoin`.
+    if ((joinType == FullOuter || joinType == LeftOuter) && buildSide == BuildLeft) {
+      // Put join left side before right side.
       Predicate.create(condition.get, buildPlan.output ++ streamedPlan.output).eval _
     } else {
       Predicate.create(condition.get, streamedPlan.output ++ buildPlan.output).eval _
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
index 4f78833abdb9f..a4a3d76db313a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
@@ -26,10 +26,11 @@ import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
 import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest}
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestData}
 import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
 
-class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
+class OuterJoinSuite extends SparkPlanTest with SharedSparkSession with SQLTestData {
+  setupTestData()
 
   private val EnsureRequirements = new EnsureRequirements()
 
@@ -325,4 +326,21 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
       (null, null, 7, 7.0)
     )
   )
+
+  testWithWholeStageCodegenOnAndOff(
+    "SPARK-46037: ShuffledHashJoin build left with left outer join, codegen off") { _ =>
+    def join(hint: String): DataFrame = {
+      sql(
+        s"""
+          |SELECT /*+ $hint */ *
+          |FROM testData t1
+          |LEFT OUTER JOIN
+          |testData2 t2
+          |ON key = a AND concat(value, b) = '12'
+          |""".stripMargin)
+    }
+    val df1 = join("SHUFFLE_HASH(t1)")
+    val df2 = join("SHUFFLE_MERGE(t1)")
+    checkAnswer(df1, identity, df2.collect().toSeq)
+  }
 }

From dce7c9aa94eadbeafb7bb1fc770991d5ec8d0b84 Mon Sep 17 00:00:00 2001
From: zhangliang <zhangliang@trip.com>
Date: Fri, 30 Aug 2024 11:09:46 +0800
Subject: [PATCH 466/521] [SPARK-43242][CORE][3.5] Fix throw 'Unexpected type
 of BlockId' in shuffle corruption diagnose

#### What changes were proposed in this pull request?
port to 3.5 for [[SPARK-43242](https://issues.apache.org/jira/browse/SPARK-43242)][CORE] Fix throw 'Unexpected type of BlockId' in shuffle corruption diagnose

#### Why are the changes needed?
3.5 conflict with PR in master, see end of discussion https://github.com/apache/spark/pull/40921

#### Does this PR introduce any user-facing change?
No

#### How was this patch tested?
Existing tests

Closes #47910 from CavemanIV/port3.5-SPARK-43242.

Authored-by: zhangliang <zhangliang@trip.com>
Signed-off-by: Yi Wu <yi.wu@databricks.com>
---
 .../storage/ShuffleBlockFetcherIterator.scala |  6 +++++
 .../ShuffleBlockFetcherIteratorSuite.scala    | 27 +++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index b9365f45a11ae..17407f4ee21f5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -1142,6 +1142,12 @@ final class ShuffleBlockFetcherIterator(
           s"diagnosis is skipped due to lack of shuffle checksum support for push-based shuffle."
         logWarning(diagnosisResponse)
         diagnosisResponse
+      case shuffleBlockBatch: ShuffleBlockBatchId =>
+          val diagnosisResponse = s"BlockBatch $shuffleBlockBatch is corrupted " +
+          s"but corruption diagnosis is skipped due to lack of shuffle checksum support for " +
+          s"ShuffleBlockBatchId"
+        logWarning(diagnosisResponse)
+        diagnosisResponse
       case unexpected: BlockId =>
         throw SparkException.internalError(
           s"Unexpected type of BlockId, $unexpected", category = "STORAGE")
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index f2d5f27a66cce..a9902cb4ccb4c 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -1941,4 +1941,31 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     assert(err2.getMessage.contains("corrupt at reset"))
   }
+
+  test("SPARK-43242: Fix throw 'Unexpected type of BlockId' in shuffle corruption diagnose") {
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 0, 0, 3) -> createMockManagedBuffer())
+    answerFetchBlocks { invocation =>
+      val listener = invocation.getArgument[BlockFetchingListener](4)
+      listener.onBlockFetchSuccess(ShuffleBlockBatchId(0, 0, 0, 3).toString, mockCorruptBuffer())
+    }
+
+    val logAppender = new LogAppender("diagnose corruption")
+    withLogAppender(logAppender) {
+      val iterator = createShuffleBlockIteratorWithDefaults(
+        Map(remoteBmId -> toBlockList(blocks.keys, 1L, 0)),
+        streamWrapperLimitSize = Some(100)
+      )
+      intercept[FetchFailedException](iterator.next())
+      verify(transfer, times(2))
+        .fetchBlocks(any(), any(), any(), any(), any(), any())
+      assert(logAppender.loggingEvents.count(
+        _.getMessage.getFormattedMessage.contains("Start corruption diagnosis")) === 1)
+      assert(logAppender.loggingEvents.exists(
+        _.getMessage.getFormattedMessage.contains("shuffle_0_0_0_3 is corrupted " +
+          "but corruption diagnosis is skipped due to lack of " +
+          "shuffle checksum support for ShuffleBlockBatchId")))
+    }
+  }
 }

From 30a75d13d681a24f705ec88ec03daccc1257714c Mon Sep 17 00:00:00 2001
From: Ruifeng Zheng <ruifengz@apache.org>
Date: Fri, 30 Aug 2024 18:01:59 +0800
Subject: [PATCH 467/521] [MINOR] Add `artifacts` to `.gitignore`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?
Add `artifacts` to `.gitignore`

### Why are the changes needed?
```
bin/spark-shell --remote "local[*]"
```

generates a lot of files in it
```
(spark_dev_312) ➜  spark git:(master) ✗ git status
On branch master
Your branch is ahead of 'origin/master' by 1386 commits.
  (use "git push" to publish your local commits)

Changes to be committed:
  (use "git restore --staged <file>..." to unstage)
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd0$.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd0$Helper.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd0.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd1$.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd1$Helper.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd1.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd2$.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd2$Helper.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd2.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd9999999$.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/85157252-6f8a-46b3-ab42-585c70184d08/classes/ammonite/$sess/cmd9999999$Helper.class
	new file:   artifacts/spark-37fc351b-0207-4957-ac39-5b23ae672c0c/8515
```

### Does this PR introduce _any_ user-facing change?
No, dev only

### How was this patch tested?
manually check

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47936 from zhengruifeng/infra_artifacts.

Authored-by: Ruifeng Zheng <ruifengz@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit df4256836d719ddca87629b8701784bd8140d84e)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index ca22dd7845281..06c6660900d66 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,7 @@
 .scala_dependencies
 .settings
 .vscode
+artifacts/
 /lib/
 R-unit-tests.log
 R/unit-tests.out

From d5caaaa3437b29ae7d2253b4c4a05f4b87952eb9 Mon Sep 17 00:00:00 2001
From: Xi Chen <jshmchenxi@gmail.com>
Date: Mon, 2 Sep 2024 13:58:18 +0800
Subject: [PATCH 468/521] [SPARK-49480][CORE] Fix NullPointerException from
 `SparkThrowableHelper.isInternalError`

### What changes were proposed in this pull request?

Handle null input for `SparkThrowableHelper.isInternalError` method.

### Why are the changes needed?

The `SparkThrowableHelper.isInternalError` method doesn't handle null input, and it could lead to NullPointerException. It happens when a `SparkException` without `errorClass` is invoked `isInternalError`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Add 2 assertions to current test cases to cover this issue.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47946 from jshmchenxi/SPARK-49480/null-pointer-is-internal-error.

Authored-by: Xi Chen <jshmchenxi@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit cef3c86e046edb43beb487e92f0542b5d8354be4)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../src/main/scala/org/apache/spark/SparkThrowableHelper.scala  | 2 +-
 core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
index 0f329b5655b32..2331a8e67b28e 100644
--- a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
+++ b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
@@ -61,7 +61,7 @@ private[spark] object SparkThrowableHelper {
   }
 
   def isInternalError(errorClass: String): Boolean = {
-    errorClass.startsWith("INTERNAL_ERROR")
+    errorClass != null && errorClass.startsWith("INTERNAL_ERROR")
   }
 
   def getMessage(e: SparkThrowable with Throwable, format: ErrorMessageFormat.Value): String = {
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index 299bcea3f9e23..a5f5eb21c68b3 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -416,6 +416,7 @@ class SparkThrowableSuite extends SparkFunSuite {
     } catch {
       case e: SparkThrowable =>
         assert(e.getErrorClass == null)
+        assert(!e.isInternalError)
         assert(e.getSqlState == null)
       case _: Throwable =>
         // Should not end up here
@@ -432,6 +433,7 @@ class SparkThrowableSuite extends SparkFunSuite {
     } catch {
       case e: SparkThrowable =>
         assert(e.getErrorClass == "CANNOT_PARSE_DECIMAL")
+        assert(!e.isInternalError)
         assert(e.getSqlState == "22018")
       case _: Throwable =>
         // Should not end up here

From 38ad0e743bb4ae088ea6b2942fca1ac1e2a566f1 Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Mon, 2 Sep 2024 08:06:14 +0200
Subject: [PATCH 469/521] [SPARK-49476][SQL] Fix nullability of base64 function

### What changes were proposed in this pull request?

Fix the nullability of the `Base64` expression to be based on the child's nullability, and not always be nullable.

### Why are the changes needed?

https://github.com/apache/spark/pull/47303 had a side effect of changing the nullability by the switch to using `StaticInvoke`. This was also backported to Spark 3.5.2 and caused schema mismatch errors for stateful streams when we upgraded. This restores the previous behavior which is supported by StaticInvoke through the `returnNullable` argument. If the child is non-nullable, we know the result will be non-nullable.

### Does this PR introduce _any_ user-facing change?

Restores the nullability of the `Base64` expression to what is was in Spark 3.5.1 and earlier.

### How was this patch tested?

New UT

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47941 from Kimahriman/base64-nullability.

Lead-authored-by: Adam Binford <adamq43@gmail.com>
Co-authored-by: Maxim Gekk <max.gekk@gmail.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit c274c5a1aecd7a03c98e523152ef73fb9afb7b3a)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../spark/sql/catalyst/expressions/stringExpressions.scala | 3 ++-
 .../sql/catalyst/expressions/StringExpressionsSuite.scala  | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 98a3b71a28627..0da6d171a1dd4 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -2432,7 +2432,8 @@ case class Base64(child: Expression, chunkBase64: Boolean)
     dataType,
     "encode",
     Seq(child, Literal(chunkBase64, BooleanType)),
-    Seq(BinaryType, BooleanType))
+    Seq(BinaryType, BooleanType),
+    returnNullable = false)
 
   override def toString: String = s"$prettyName($child)"
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index c172688383688..53a76e2cb9ff1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -464,6 +464,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val b = $"b".binary.at(0)
     val bytes = Array[Byte](1, 2, 3, 4)
 
+    assert(!Base64(Literal(bytes)).nullable)
+    assert(Base64(Literal.create(null, BinaryType)).nullable)
+    assert(Base64(Literal(bytes).castNullable()).nullable)
+    assert(!UnBase64(Literal("AQIDBA==")).nullable)
+    assert(UnBase64(Literal.create(null, StringType)).nullable)
+    assert(UnBase64(Literal("AQIDBA==").castNullable()).nullable)
+
     checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))

From a1cd99de04aaa56a710c5ecfbfa1dca07a012242 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Tue, 3 Sep 2024 03:44:37 +0000
Subject: [PATCH 470/521] Preparing Spark release v3.5.3-rc1

---
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 2 +-
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 44 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index ed09ea1725da5..9c47b16865791 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 12b57da00234a..46f4be085728c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index e6e0a2660b45d..9704e4aeee933 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c39d849494e9c..25c1b785961c2 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 1c0173d35139c..3c95492690393 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 58ec4e57e5158..2bf35c713923d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5512ed8012ae2..509e067dd246a 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 159064b0444c9..8b0130e551292 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index d31f3ea7b1ac1..a402affc88229 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index b15c6dd36ba74..5380c3705f4f0 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 317e663af153f..56b66be3f7744 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 6f3d683d9cb12..2d209746dc7fc 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 60c807b169699..983d4f087a686 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 81c0826bdd189..c665e009199c7 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f16f47db18876..f85820ba1454e 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 8348a32b2e238..4a0d1af7968e7 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 8db62e2020e35..b7aff2d217bc3 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1d076d4d62f10..a1ebf137e324d 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index c5ba0abd803d7..5aa937d481582 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 3ba3ce45a726d..ab48e0eeae2c6 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index a6cc9248f17a6..9fdaaa925a759 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index 034f45fc2af88..e52b828bef975 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index a326e41b8e233..17004f875869c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index b7abf37080d8d..2dfc5322b2ffa 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,7 +19,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3-SNAPSHOT
+SPARK_VERSION: 3.5.3
 SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
diff --git a/examples/pom.xml b/examples/pom.xml
index 6648ec2cc7cb1..74f7a8562cbb0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 22a8b916c4b14..8a562f6ab0a14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index fca45d2df0c35..72fb8e9ae9bef 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 75c85d66a935a..d548e91536d93 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 4e00a7b2dc9b8..8733e01ac89b9 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index b9e6e40583461..395f00c4c42f4 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 7bbdd0a998039..c005af470cc8b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3-SNAPSHOT</version>
+  <version>3.5.3</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index c779e9442f6b3..e12826098ccbf 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3.dev0"
+__version__: str = "3.5.3"
diff --git a/repl/pom.xml b/repl/pom.xml
index e8c2436f109c3..84d595c272e81 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index e1ad633dff2ff..7c728c913f698 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 91fc5a94fa747..55131c524420a 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index d5d0c0125fce9..c4c20d2d0948f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 2d6f7f87247c4..eff7a72e0194b 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 818cb24cac80a..ace4be938d454 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 89cc3b9560a76..4619c7e658b87 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 199597a36349d..19e35fa5a252f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 1e4ad6f456371..e752ba0e5084f 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index ea65b59390c83..54b7fdcdcc259 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6601c459741f0..a9965c1c35724 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 3f7721c9bcff5..11b52018c765e 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 0616ef23c6248cc13f8ca6e3d8845d3caa9df9f8 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Tue, 3 Sep 2024 03:44:41 +0000
Subject: [PATCH 471/521] Preparing development version 3.5.4-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9c47b16865791..3367c1629c578 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 46f4be085728c..014ff5bbaf209 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9704e4aeee933..ed2352fd1276e 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 25c1b785961c2..b791a06aad43a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 3c95492690393..685ada5194905 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2bf35c713923d..b2e488c7bb222 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 509e067dd246a..3a260a8dff53f 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 8b0130e551292..fd0aa7ba2a3a2 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index a402affc88229..7c87be73d7d96 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 5380c3705f4f0..8bc2802ea5d0d 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 56b66be3f7744..87f6a589261cc 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 2d209746dc7fc..994179fd99ac8 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 983d4f087a686..801c28319ee84 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index c665e009199c7..d655d1a552814 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f85820ba1454e..ae11f0eac307d 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 4a0d1af7968e7..533a45e18f662 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index b7aff2d217bc3..07ca1c2b2f3c7 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index a1ebf137e324d..176d92da63801 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5aa937d481582..a6ef06142f5cb 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ab48e0eeae2c6..4282e1f035716 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9fdaaa925a759..2af6002b5c7db 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index e52b828bef975..a46c9bbfec2cf 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 17004f875869c..d1b0e82c7c0d5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 2dfc5322b2ffa..3dea0c82204bd 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.4-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 74f7a8562cbb0..26d91eff504f2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8a562f6ab0a14..c4f250b40f33d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 72fb8e9ae9bef..a47d25015dfa9 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d548e91536d93..5c1844be5782d 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8733e01ac89b9..bb821190273e1 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 395f00c4c42f4..202b80d38e24f 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c005af470cc8b..04acbdb3cd6e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3</version>
+  <version>3.5.4-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e12826098ccbf..002d06e28ea15 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3"
+__version__: str = "3.5.4.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 84d595c272e81..5ef505bbc48e5 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 7c728c913f698..cd90f32d0814f 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 55131c524420a..b72a3daea3c38 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index c4c20d2d0948f..31377cbda5d8e 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index eff7a72e0194b..d7f3786e1050f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index ace4be938d454..038b6c16a4e88 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4619c7e658b87..0564a6be7432a 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19e35fa5a252f..62d33dbfc2d41 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index e752ba0e5084f..5d2708dfdd714 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 54b7fdcdcc259..9a313907eb130 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index a9965c1c35724..6cbccb39772c9 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b52018c765e..f23f4a4b50559 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From d8adf4b3109c0e6353c8c8a300eea5cf6aa3caca Mon Sep 17 00:00:00 2001
From: Adam Binford <adamq43@gmail.com>
Date: Tue, 3 Sep 2024 11:52:14 +0800
Subject: [PATCH 472/521] [SPARK-49476][SQL][3.5][FOLLOWUP] Fix base64 proto
 test

### What changes were proposed in this pull request?

Fix a test that is failing from backporting https://github.com/apache/spark/pull/47941

### Why are the changes needed?

Fix test

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Fixed test

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47964 from Kimahriman/base64-proto-test.

Authored-by: Adam Binford <adamq43@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../query-tests/explain-results/function_base64.explain         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
index 3b4e69469c30b..27058b201c01a 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
@@ -1,2 +1,2 @@
-Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Base64, StringType, encode, cast(g#0 as binary), true, BinaryType, BooleanType, true, true, true) AS base64(g)#0]
+Project [staticinvoke(class org.apache.spark.sql.catalyst.expressions.Base64, StringType, encode, cast(g#0 as binary), true, BinaryType, BooleanType, true, false, true) AS base64(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]

From 7febde1bf3e39b314b9d2184986b719888b8b195 Mon Sep 17 00:00:00 2001
From: Chenhao Li <chenhao.li@databricks.com>
Date: Wed, 4 Sep 2024 07:14:41 +0200
Subject: [PATCH 473/521] [SPARK-49275][SQL][3.5] Fix return type nullness of
 the xpath expression

### What changes were proposed in this pull request?

This is a cherry-pick of https://github.com/apache/spark/pull/47796.

The `xpath` expression incorrectly marks its return type as array of non-null strings. However, it can actually return an array containing nulls. This can cause NPE in code generation, such as query `select coalesce(xpath(repeat('<a></a>', id), 'a')[0], '') from range(1, 2)`.

### Why are the changes needed?

It avoids potential failures in queries that uses the `xpath` expression.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

A new unit test. It would fail without the change in the PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47959 from chenhao-db/fix_xpath_nullness_3.5.

Authored-by: Chenhao Li <chenhao.li@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../apache/spark/sql/catalyst/expressions/xml/xpath.scala    | 5 ++++-
 .../sql/catalyst/expressions/xml/XPathExpressionSuite.scala  | 5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index 2ed13944be9af..105931cf4f803 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -239,13 +239,16 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
        ["b1","b2","b3"]
+      > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b');
+       [null,null,null]
   """,
   since = "2.0.0",
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
   override def prettyName: String = "xpath"
-  override def dataType: DataType = ArrayType(StringType, containsNull = false)
+
+  override def dataType: DataType = ArrayType(StringType)
 
   override def nullSafeEval(xml: Any, path: Any): Any = {
     val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
index 8d9f90a1a87c5..b25b191f8b136 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
@@ -185,6 +185,11 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     testExpr("<a><b class='bb'>b1</b><b>b2</b><b>b3</b><c class='bb'>c1</c><c>c2</c></a>",
       "a/*[@class='bb']/text()", Seq("b1", "c1"))
 
+    checkEvaluation(
+      Coalesce(Seq(
+          GetArrayItem(XPathList(Literal("<a></a>"), Literal("a")), Literal(0)),
+          Literal("nul"))), "nul")
+
     testNullAndErrorBehavior(testExpr)
   }
 

From 7718777c3e57da85426378881f9ce9f6ed743a1d Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Wed, 4 Sep 2024 16:36:35 +0900
Subject: [PATCH 474/521] [MINOR][DOCS] Fix site.SPARK_VERSION pattern in RDD
 Programming Guide

Fix site.SPARK_VERSION pattern in RDD Programming Guide. I found this when I was developing #47968

doc fix

no

doc build

no

Closes #47985 from yaooqinn/version.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
(cherry picked from commit 90a236eefd509f6ed1ffb48b7bbb32c395d3b940)
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 docs/rdd-programming-guide.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index cc897aea06c93..c63558cbc2426 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -46,7 +46,7 @@ Spark applications in Python can either be run with the `bin/spark-submit` scrip
 
 {% highlight python %}
     install_requires=[
-        'pyspark=={site.SPARK_VERSION}'
+        'pyspark=={{site.SPARK_VERSION}}'
     ]
 {% endhighlight %}
 

From d83bf8c9b775d52bc359d0c9a491eeeb0f0d6917 Mon Sep 17 00:00:00 2001
From: sychen <sychen@ctrip.com>
Date: Wed, 4 Sep 2024 07:50:51 -0700
Subject: [PATCH 475/521] [SPARK-49509][CORE] Use
 `Platform.allocateDirectBuffer` instead of `ByteBuffer.allocateDirect`

This PR aims to use `Platform.allocateDirectBuffer` instead of `ByteBuffer.allocateDirect`.

https://github.com/apache/spark/pull/47733#pullrequestreview-2251276385

Allocating off-heap memory should use the `allocateDirectBuffer` API provided `by Platform`.

No

GA

No

Closes #47987 from cxzl25/SPARK-49509.

Authored-by: sychen <sychen@ctrip.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 2ed6c3e511f322c5fd01953736c376a85ff2c687)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../java/org/apache/spark/io/NioBufferedFileInputStream.java   | 3 ++-
 core/src/main/scala/org/apache/spark/storage/DiskStore.scala   | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 91910b99ac999..2a580e341dc33 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -14,6 +14,7 @@
 package org.apache.spark.io;
 
 import org.apache.spark.storage.StorageUtils;
+import org.apache.spark.unsafe.Platform;
 
 import java.io.File;
 import java.io.IOException;
@@ -39,7 +40,7 @@ public final class NioBufferedFileInputStream extends InputStream {
   private final FileChannel fileChannel;
 
   public NioBufferedFileInputStream(File file, int bufferSizeInBytes) throws IOException {
-    byteBuffer = ByteBuffer.allocateDirect(bufferSizeInBytes);
+    byteBuffer = Platform.allocateDirectBuffer(bufferSizeInBytes);
     fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
     byteBuffer.flip();
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 1cb5adef5f460..304aa01c7ee42 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -34,6 +34,7 @@ import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.util.{AbstractFileRegion, JavaUtils}
 import org.apache.spark.security.CryptoStreamUtils
+import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBuffer
@@ -309,7 +310,7 @@ private class ReadableChannelFileRegion(source: ReadableByteChannel, blockSize:
 
   private var _transferred = 0L
 
-  private val buffer = ByteBuffer.allocateDirect(64 * 1024)
+  private val buffer = Platform.allocateDirectBuffer(64 * 1024)
   buffer.flip()
 
   override def count(): Long = blockSize

From e5ec16efa1784e1ba88c683d50e589a7c8affe65 Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Wed, 4 Sep 2024 21:43:28 -0700
Subject: [PATCH 476/521] [SPARK-48965][SQL] Use the correct schema in
 `Dataset#toJSON`

In `Dataset#toJSON`, use the schema from `exprEnc`. This schema reflects any changes (e.g., decimal precision, column ordering) that `exprEnc` might make to input rows.

`Dataset#toJSON` currently uses the schema from the logical plan, but that schema does not necessarily describe the rows passed to `JacksonGenerator`: the function passed to `mapPartitions` uses `exprEnc` to serialize the input, and this could potentially change the precision on decimals or rearrange columns.

Here's an example that tricks `UnsafeRow#getDecimal` (called from `JacksonGenerator`) to mistakenly assume the decimal is stored as a Long:
```
scala> case class Data(a: BigDecimal)
class Data

scala> sql("select 123.456bd as a").as[Data].toJSON.collect
warning: 1 deprecation (since 2.13.3); for details, enable `:setting -deprecation` or `:replay -deprecation`
val res0: Array[String] = Array({"a":68719476.745})

scala>
```
Here's an example that tricks `JacksonGenerator` to ask for a string from an array and an array from a string. This case actually crashes the JVM:
```
scala> case class Data(x: Array[Int], y: String)
class Data

scala> sql("select repeat('Hey there', 17) as y, array_repeat(22, 17) as x").as[Data].toJSON.collect
warning: 1 deprecation (since 2.13.3); for details, enable `:setting -deprecation` or `:replay -deprecation`
Exception in task 0.0 in stage 0.0 (TID 0)
java.lang.InternalError: a fault occurred in a recent unsafe memory access operation in compiled Java code
	at org.apache.spark.sql.catalyst.json.JacksonGenerator.$anonfun$makeWriter$5(JacksonGenerator.scala:129) ~[spark-catalyst_2.13-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
	at org.apache.spark.sql.catalyst.json.JacksonGenerator.$anonfun$makeWriter$5$adapted(JacksonGenerator.scala:128) ~[spark-catalyst_2.13-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
	at org.apache.spark.sql.catalyst.json.JacksonGenerator.writeArrayData(JacksonGenerator.scala:258) ~[spark-catalyst_2.13-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
	at org.apache.spark.sql.catalyst.json.JacksonGenerator.$anonfun$makeWriter$23(JacksonGenerator.scala:201) ~[spark-catalyst_2.13-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
	at org.apache.spark.sql.catalyst.json.JacksonGenerator.writeArray(JacksonGenerator.scala:249) ~[spark-catalyst_2.13-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
...
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
	at java.base/java.lang.Thread.run(Thread.java:833)

bash-3.2$
```
Both these cases work correctly without `toJSON`.

Before the PR, converting the dataframe to a dataset of Tuple would preserve the column names in the JSON strings:
```
scala> sql("select 123.456d as a, 12 as b").as[(Double, Int)].toJSON.collect
warning: 1 deprecation (since 2.13.3); for details, enable `:setting -deprecation` or `:replay -deprecation`
val res0: Array[String] = Array({"a":123.456,"b":12})

scala>
```
After the PR, the JSON strings use the field name from the Tuple class:
```
scala> sql("select 123.456d as a, 12 as b").as[(Double, Int)].toJSON.collect
warning: 1 deprecation (since 2.13.3); for details, enable `:setting -deprecation` or `:replay -deprecation`
val res1: Array[String] = Array({"_1":123.456,"_2":12})

scala>
```

New tests.

No.

Closes #47982 from bersprockets/to_json_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 5375ce2acfe206eb64fb8bede44fe47c643fcd46)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/sql/Dataset.scala  |  3 +-
 .../datasources/json/JsonSuite.scala          | 29 +++++++++++++++++++
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9ef48c2ec42ea..a65aff4b228ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -4036,13 +4036,12 @@ class Dataset[T] private[sql](
     new DataStreamWriter[T](this)
   }
 
-
   /**
    * Returns the content of the Dataset as a Dataset of JSON strings.
    * @since 2.0.0
    */
   def toJSON: Dataset[String] = {
-    val rowSchema = this.schema
+    val rowSchema = exprEnc.schema
     val sessionLocalTimeZone = sparkSession.sessionState.conf.sessionLocalTimeZone
     mapPartitions { iter =>
       val writer = new CharArrayWriter()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index e8005b2041919..3bb193cb8f10b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, Da
 import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.SQLTestData.{DecimalData, TestData}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.StructType.fromDDL
 import org.apache.spark.sql.types.TestUDT.{MyDenseVector, MyDenseVectorUDT}
@@ -3698,6 +3699,34 @@ abstract class JsonSuite
       }
     }
   }
+
+  test("SPARK-48965: Dataset#toJSON should use correct schema #1: decimals") {
+    val numString = "123.456"
+    val bd = BigDecimal(numString)
+    val ds1 = sql(s"select ${numString}bd as a, ${numString}bd as b").as[DecimalData]
+    checkDataset(
+      ds1,
+      DecimalData(bd, bd)
+    )
+    val ds2 = ds1.toJSON
+    checkDataset(
+      ds2,
+      "{\"a\":123.456000000000000000,\"b\":123.456000000000000000}"
+    )
+  }
+
+  test("SPARK-48965: Dataset#toJSON should use correct schema #2: misaligned columns") {
+    val ds1 = sql("select 'Hey there' as value, 90000001 as key").as[TestData]
+    checkDataset(
+      ds1,
+      TestData(90000001, "Hey there")
+    )
+    val ds2 = ds1.toJSON
+    checkDataset(
+      ds2,
+      "{\"key\":90000001,\"value\":\"Hey there\"}"
+    )
+  }
 }
 
 class JsonV1Suite extends JsonSuite {

From 8fbeaf5dd7ba91151df6d15d115fb215ef19e545 Mon Sep 17 00:00:00 2001
From: wzx <masterwangzx@gmail.com>
Date: Thu, 5 Sep 2024 14:58:15 +0800
Subject: [PATCH 477/521] [SPARK-49408][SQL] Use IndexedSeq in
 ProjectingInternalRow

### What changes were proposed in this pull request?
In ProjectingInternalRow, accessing colOrdinals causes poor performace. Replace colOrdinals with the IndexedSeq type.

### Why are the changes needed?
Replace colOrdinals with the IndexedSeq type.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
No need to add UT

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #47890 from wzx140/project-row-fix.

Lead-authored-by: wzx <masterwangzx@gmail.com>
Co-authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 37f2fa99c31d7563f4020557fc50b74d7cb758bc)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../spark/sql/catalyst/ProjectingInternalRow.scala       | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
index 429ce805bf2c4..75573cb72d839 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
@@ -25,7 +25,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
  * An [[InternalRow]] that projects particular columns from another [[InternalRow]] without copying
  * the underlying data.
  */
-case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) extends InternalRow {
+case class ProjectingInternalRow(schema: StructType,
+    colOrdinals: IndexedSeq[Int]) extends InternalRow {
   assert(schema.size == colOrdinals.size)
 
   private var row: InternalRow = _
@@ -115,3 +116,9 @@ case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) exte
     row.get(colOrdinals(ordinal), dataType)
   }
 }
+
+object ProjectingInternalRow {
+  def apply(schema: StructType, colOrdinals: Seq[Int]): ProjectingInternalRow = {
+    new ProjectingInternalRow(schema, colOrdinals.toIndexedSeq)
+  }
+}

From d8f9d8d22eb6ac55b6505782a6a51d1b201a04a3 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 5 Sep 2024 21:02:20 +0800
Subject: [PATCH 478/521] [SPARK-49152][SQL][FOLLOWUP]
 DelegatingCatalogExtension should also use V1 commands

### What changes were proposed in this pull request?

This is a followup of https://github.com/apache/spark/pull/47660 . If users override `spark_catalog` with
`DelegatingCatalogExtension`, we should still use v1 commands as `DelegatingCatalogExtension` forwards requests to HMS and there are still behavior differences between v1 and v2 commands targeting HMS.

This PR also forces to use v1 commands for certain commands that do not have a v2 version.

### Why are the changes needed?

Avoid introducing behavior changes to Spark plugins that implements `DelegatingCatalogExtension` to override `spark_catalog`.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

new test case

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47995 from amaliujia/fix_catalog_v2.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Rui Wang <rui.wang@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit f7cfeb534d9285df381d147e01de47ec439c082e)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolveSessionCatalog.scala      | 41 ++++++++++++++-----
 .../DataSourceV2SQLSessionCatalogSuite.scala  |  8 ++++
 .../sql/connector/DataSourceV2SQLSuite.scala  | 16 ++++++--
 3 files changed, 51 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index c96ce2daa49e3..7500f32ac2b90 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, DelegatingCatalogExtension, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -271,10 +271,20 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
       AnalyzeColumnCommand(ident, columnNames, allColumns)
 
-    case RepairTable(ResolvedV1TableIdentifier(ident), addPartitions, dropPartitions) =>
+    // V2 catalog doesn't support REPAIR TABLE yet, we must use v1 command here.
+    case RepairTable(
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
+        addPartitions,
+        dropPartitions) =>
       RepairTableCommand(ident, addPartitions, dropPartitions)
 
-    case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
+    // V2 catalog doesn't support LOAD DATA yet, we must use v1 command here.
+    case LoadData(
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
+        path,
+        isLocal,
+        isOverwrite,
+        partition) =>
       LoadDataCommand(
         ident,
         path,
@@ -322,7 +332,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       }
       ShowColumnsCommand(db, v1TableName, output)
 
-    case RecoverPartitions(ResolvedV1TableIdentifier(ident)) =>
+    // V2 catalog doesn't support RECOVER PARTITIONS yet, we must use v1 command here.
+    case RecoverPartitions(ResolvedV1TableIdentifierInSessionCatalog(ident)) =>
       RepairTableCommand(
         ident,
         enableAddPartitions = true,
@@ -350,8 +361,9 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         purge,
         retainData = false)
 
+    // V2 catalog doesn't support setting serde properties yet, we must use v1 command here.
     case SetTableSerDeProperties(
-        ResolvedV1TableIdentifier(ident),
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
         serdeClassName,
         serdeProperties,
         partitionSpec) =>
@@ -366,10 +378,10 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // V2 catalog doesn't support setting partition location yet, we must use v1 command here.
     case SetTableLocation(
-        ResolvedTable(catalog, _, t: V1Table, _),
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
         Some(partitionSpec),
-        location) if isSessionCatalog(catalog) =>
-      AlterTableSetLocationCommand(t.v1Table.identifier, Some(partitionSpec), location)
+        location) =>
+      AlterTableSetLocationCommand(ident, Some(partitionSpec), location)
 
     case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
       AlterViewAsCommand(ident, originalText, query)
@@ -579,6 +591,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     }
   }
 
+  object ResolvedV1TableIdentifierInSessionCatalog {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedTable(catalog, _, t: V1Table, _) if isSessionCatalog(catalog) =>
+        Some(t.catalogTable.identifier)
+      case _ => None
+    }
+  }
+
   object ResolvedV1TableOrViewIdentifier {
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
       case ResolvedV1TableIdentifier(ident) => Some(ident)
@@ -669,7 +689,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
-    isSessionCatalog(catalog) &&
-      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty
+    isSessionCatalog(catalog) && (
+      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty ||
+        catalog.isInstanceOf[DelegatingCatalogExtension])
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
index 95624f3f61c5c..7463eb34d17ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
@@ -71,4 +71,12 @@ class DataSourceV2SQLSessionCatalogSuite
       sql(s"CREATE EXTERNAL TABLE t (i INT) USING $v2Format TBLPROPERTIES($prop)")
     }
   }
+
+  test("SPARK-49152: partition columns should be put at the end") {
+    withTable("t") {
+      sql("CREATE TABLE t (c1 INT, c2 INT) USING json PARTITIONED BY (c1)")
+      // partition columns should be put at the end.
+      assert(getTableMetadata("default.t").columns().map(_.name()) === Seq("c2", "c1"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index d55f7879262fc..f60e81d3a4c6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -2088,10 +2088,18 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("REPLACE TABLE: v1 table") {
-    sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
-    val v2Catalog = catalog("spark_catalog").asTableCatalog
-    val table = v2Catalog.loadTable(Identifier.of(Array("default"), "tbl"))
-    assert(table.properties().get(TableCatalog.PROP_PROVIDER) == classOf[SimpleScanSource].getName)
+    val e = intercept[AnalysisException] {
+      sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      sqlState = "0A000",
+      parameters = Map(
+        "tableName" -> "`spark_catalog`.`default`.`tbl`",
+        "operation" -> "REPLACE TABLE"
+      )
+    )
   }
 
   test("DeleteFrom: - delete with invalid predicate") {

From 1f8c71fbb0dc19382a2c518456dbd6b5aefa94cb Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Fri, 6 Sep 2024 03:08:33 +0000
Subject: [PATCH 479/521] Preparing Spark release v3.5.3-rc2

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3367c1629c578..9c47b16865791 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 014ff5bbaf209..46f4be085728c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ed2352fd1276e..9704e4aeee933 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b791a06aad43a..25c1b785961c2 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 685ada5194905..3c95492690393 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index b2e488c7bb222..2bf35c713923d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 3a260a8dff53f..509e067dd246a 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index fd0aa7ba2a3a2..8b0130e551292 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7c87be73d7d96..a402affc88229 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8bc2802ea5d0d..5380c3705f4f0 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 87f6a589261cc..56b66be3f7744 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 994179fd99ac8..2d209746dc7fc 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 801c28319ee84..983d4f087a686 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index d655d1a552814..c665e009199c7 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index ae11f0eac307d..f85820ba1454e 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 533a45e18f662..4a0d1af7968e7 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 07ca1c2b2f3c7..b7aff2d217bc3 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 176d92da63801..a1ebf137e324d 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index a6ef06142f5cb..5aa937d481582 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 4282e1f035716..ab48e0eeae2c6 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 2af6002b5c7db..9fdaaa925a759 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index a46c9bbfec2cf..e52b828bef975 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d1b0e82c7c0d5..17004f875869c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 3dea0c82204bd..2dfc5322b2ffa 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.3
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 26d91eff504f2..74f7a8562cbb0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c4f250b40f33d..8a562f6ab0a14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index a47d25015dfa9..72fb8e9ae9bef 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 5c1844be5782d..d548e91536d93 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index bb821190273e1..8733e01ac89b9 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 202b80d38e24f..395f00c4c42f4 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 04acbdb3cd6e4..c005af470cc8b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4-SNAPSHOT</version>
+  <version>3.5.3</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 002d06e28ea15..e12826098ccbf 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4.dev0"
+__version__: str = "3.5.3"
diff --git a/repl/pom.xml b/repl/pom.xml
index 5ef505bbc48e5..84d595c272e81 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index cd90f32d0814f..7c728c913f698 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index b72a3daea3c38..55131c524420a 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 31377cbda5d8e..c4c20d2d0948f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d7f3786e1050f..eff7a72e0194b 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 038b6c16a4e88..ace4be938d454 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 0564a6be7432a..4619c7e658b87 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 62d33dbfc2d41..19e35fa5a252f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5d2708dfdd714..e752ba0e5084f 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 9a313907eb130..54b7fdcdcc259 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6cbccb39772c9..a9965c1c35724 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index f23f4a4b50559..11b52018c765e 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 46214daba87e2307f1becbaa60183ec0b4a323bc Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Fri, 6 Sep 2024 03:08:37 +0000
Subject: [PATCH 480/521] Preparing development version 3.5.4-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9c47b16865791..3367c1629c578 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 46f4be085728c..014ff5bbaf209 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9704e4aeee933..ed2352fd1276e 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 25c1b785961c2..b791a06aad43a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 3c95492690393..685ada5194905 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2bf35c713923d..b2e488c7bb222 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 509e067dd246a..3a260a8dff53f 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 8b0130e551292..fd0aa7ba2a3a2 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index a402affc88229..7c87be73d7d96 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 5380c3705f4f0..8bc2802ea5d0d 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 56b66be3f7744..87f6a589261cc 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 2d209746dc7fc..994179fd99ac8 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 983d4f087a686..801c28319ee84 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index c665e009199c7..d655d1a552814 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f85820ba1454e..ae11f0eac307d 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 4a0d1af7968e7..533a45e18f662 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index b7aff2d217bc3..07ca1c2b2f3c7 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index a1ebf137e324d..176d92da63801 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5aa937d481582..a6ef06142f5cb 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ab48e0eeae2c6..4282e1f035716 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9fdaaa925a759..2af6002b5c7db 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index e52b828bef975..a46c9bbfec2cf 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 17004f875869c..d1b0e82c7c0d5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 2dfc5322b2ffa..3dea0c82204bd 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.4-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 74f7a8562cbb0..26d91eff504f2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8a562f6ab0a14..c4f250b40f33d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 72fb8e9ae9bef..a47d25015dfa9 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d548e91536d93..5c1844be5782d 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8733e01ac89b9..bb821190273e1 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 395f00c4c42f4..202b80d38e24f 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c005af470cc8b..04acbdb3cd6e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3</version>
+  <version>3.5.4-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e12826098ccbf..002d06e28ea15 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3"
+__version__: str = "3.5.4.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 84d595c272e81..5ef505bbc48e5 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 7c728c913f698..cd90f32d0814f 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 55131c524420a..b72a3daea3c38 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index c4c20d2d0948f..31377cbda5d8e 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index eff7a72e0194b..d7f3786e1050f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index ace4be938d454..038b6c16a4e88 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4619c7e658b87..0564a6be7432a 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19e35fa5a252f..62d33dbfc2d41 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index e752ba0e5084f..5d2708dfdd714 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 54b7fdcdcc259..9a313907eb130 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index a9965c1c35724..6cbccb39772c9 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b52018c765e..f23f4a4b50559 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 3f22ef1721738ebacba8a27854ea8f24e0c6e5b9 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 9 Sep 2024 10:45:14 +0800
Subject: [PATCH 481/521] [SPARK-49246][SQL][FOLLOW-UP] The behavior of
 SaveAsTable should not be changed by falling back to v1 command

This is a followup of https://github.com/apache/spark/pull/47772 . The behavior of SaveAsTable should not be changed by switching v1 to v2 command. This is similar to https://github.com/apache/spark/pull/47995. For the case of `DelegatingCatalogExtension` we need it goes to V1 commands to be consistent with previous behavior.

Behavior regression.

No

UT

No

Closes #48019 from amaliujia/regress_v2.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 37b39b41d07cf8f39dd54cc18342e4d7b8bc71a3)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/DataFrameWriter.scala    |  6 ++++--
 ...DataSourceV2DataFrameSessionCatalogSuite.scala | 12 ++++++++++--
 .../sql/connector/TestV2SessionCatalogBase.scala  | 15 +++++----------
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 84f02c723136b..8c945ef0dbcba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -565,8 +565,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
-    val canUseV2 = lookupV2Provider().isDefined ||
-      df.sparkSession.sessionState.conf.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined
+    val canUseV2 = lookupV2Provider().isDefined || (df.sparkSession.sessionState.conf.getConf(
+        SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined &&
+        !df.sparkSession.sessionState.catalogManager.catalog(CatalogManager.SESSION_CATALOG_NAME)
+          .isInstanceOf[DelegatingCatalogExtension])
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case nameParts @ NonSessionCatalogAndIdentifier(catalog, ident) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 79fbabbeacaa6..9dd20c906535e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -55,8 +55,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
     "and a same-name temp view exist") {
     withTable("same_name") {
       withTempView("same_name") {
-        val format = spark.sessionState.conf.defaultDataSourceName
-        sql(s"CREATE TABLE same_name(id LONG) USING $format")
+        sql(s"CREATE TABLE same_name(id LONG) USING $v2Format")
         spark.range(10).createTempView("same_name")
         spark.range(20).write.format(v2Format).mode(SaveMode.Append).saveAsTable("same_name")
         checkAnswer(spark.table("same_name"), spark.range(10).toDF())
@@ -88,6 +87,15 @@ class DataSourceV2DataFrameSessionCatalogSuite
       assert(tableInfo.properties().get("provider") === v2Format)
     }
   }
+
+  test("SPARK-49246: saveAsTable with v1 format") {
+    withTable("t") {
+      sql("CREATE TABLE t(c INT) USING csv")
+      val df = spark.range(10).toDF()
+      df.write.mode(SaveMode.Overwrite).format("csv").saveAsTable("t")
+      verifyTable("t", df)
+    }
+  }
 }
 
 class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 9144fb9390454..bd13123d587f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -22,8 +22,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.catalyst.catalog.CatalogTableType
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, DelegatingCatalogExtension, Identifier, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -53,14 +52,10 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
     if (tables.containsKey(ident)) {
       tables.get(ident)
     } else {
-      // Table was created through the built-in catalog
-      super.loadTable(ident) match {
-        case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => v1Table
-        case t =>
-          val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties())
-          addTable(ident, table)
-          table
-      }
+      // Table was created through the built-in catalog via v1 command, this is OK as the
+      // `loadTable` should always be invoked, and we set the `tableCreated` to pass validation.
+      tableCreated.set(true)
+      super.loadTable(ident)
     }
   }
 

From 6292cfc14aa1dfe8ce7979d5239c4621631462eb Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Mon, 9 Sep 2024 04:00:40 +0000
Subject: [PATCH 482/521] Preparing Spark release v3.5.3-rc3

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3367c1629c578..9c47b16865791 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 014ff5bbaf209..46f4be085728c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ed2352fd1276e..9704e4aeee933 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b791a06aad43a..25c1b785961c2 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 685ada5194905..3c95492690393 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index b2e488c7bb222..2bf35c713923d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 3a260a8dff53f..509e067dd246a 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index fd0aa7ba2a3a2..8b0130e551292 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7c87be73d7d96..a402affc88229 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8bc2802ea5d0d..5380c3705f4f0 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 87f6a589261cc..56b66be3f7744 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 994179fd99ac8..2d209746dc7fc 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 801c28319ee84..983d4f087a686 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index d655d1a552814..c665e009199c7 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index ae11f0eac307d..f85820ba1454e 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 533a45e18f662..4a0d1af7968e7 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 07ca1c2b2f3c7..b7aff2d217bc3 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 176d92da63801..a1ebf137e324d 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index a6ef06142f5cb..5aa937d481582 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 4282e1f035716..ab48e0eeae2c6 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 2af6002b5c7db..9fdaaa925a759 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index a46c9bbfec2cf..e52b828bef975 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d1b0e82c7c0d5..17004f875869c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 3dea0c82204bd..2dfc5322b2ffa 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.3
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 26d91eff504f2..74f7a8562cbb0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c4f250b40f33d..8a562f6ab0a14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index a47d25015dfa9..72fb8e9ae9bef 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 5c1844be5782d..d548e91536d93 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index bb821190273e1..8733e01ac89b9 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 202b80d38e24f..395f00c4c42f4 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 04acbdb3cd6e4..c005af470cc8b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4-SNAPSHOT</version>
+  <version>3.5.3</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 002d06e28ea15..e12826098ccbf 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4.dev0"
+__version__: str = "3.5.3"
diff --git a/repl/pom.xml b/repl/pom.xml
index 5ef505bbc48e5..84d595c272e81 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index cd90f32d0814f..7c728c913f698 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index b72a3daea3c38..55131c524420a 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 31377cbda5d8e..c4c20d2d0948f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d7f3786e1050f..eff7a72e0194b 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 038b6c16a4e88..ace4be938d454 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 0564a6be7432a..4619c7e658b87 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 62d33dbfc2d41..19e35fa5a252f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5d2708dfdd714..e752ba0e5084f 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 9a313907eb130..54b7fdcdcc259 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6cbccb39772c9..a9965c1c35724 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index f23f4a4b50559..11b52018c765e 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From e923790a9a4450cbd9331f2b310847c64605ef64 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Mon, 9 Sep 2024 04:00:44 +0000
Subject: [PATCH 483/521] Preparing development version 3.5.4-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9c47b16865791..3367c1629c578 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 46f4be085728c..014ff5bbaf209 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9704e4aeee933..ed2352fd1276e 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 25c1b785961c2..b791a06aad43a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 3c95492690393..685ada5194905 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2bf35c713923d..b2e488c7bb222 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 509e067dd246a..3a260a8dff53f 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 8b0130e551292..fd0aa7ba2a3a2 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index a402affc88229..7c87be73d7d96 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 5380c3705f4f0..8bc2802ea5d0d 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 56b66be3f7744..87f6a589261cc 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 2d209746dc7fc..994179fd99ac8 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 983d4f087a686..801c28319ee84 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index c665e009199c7..d655d1a552814 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f85820ba1454e..ae11f0eac307d 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 4a0d1af7968e7..533a45e18f662 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index b7aff2d217bc3..07ca1c2b2f3c7 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index a1ebf137e324d..176d92da63801 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5aa937d481582..a6ef06142f5cb 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ab48e0eeae2c6..4282e1f035716 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9fdaaa925a759..2af6002b5c7db 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index e52b828bef975..a46c9bbfec2cf 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 17004f875869c..d1b0e82c7c0d5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 2dfc5322b2ffa..3dea0c82204bd 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.4-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 74f7a8562cbb0..26d91eff504f2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8a562f6ab0a14..c4f250b40f33d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 72fb8e9ae9bef..a47d25015dfa9 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d548e91536d93..5c1844be5782d 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8733e01ac89b9..bb821190273e1 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 395f00c4c42f4..202b80d38e24f 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c005af470cc8b..04acbdb3cd6e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3</version>
+  <version>3.5.4-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e12826098ccbf..002d06e28ea15 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3"
+__version__: str = "3.5.4.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 84d595c272e81..5ef505bbc48e5 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 7c728c913f698..cd90f32d0814f 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 55131c524420a..b72a3daea3c38 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index c4c20d2d0948f..31377cbda5d8e 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index eff7a72e0194b..d7f3786e1050f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index ace4be938d454..038b6c16a4e88 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4619c7e658b87..0564a6be7432a 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19e35fa5a252f..62d33dbfc2d41 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index e752ba0e5084f..5d2708dfdd714 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 54b7fdcdcc259..9a313907eb130 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index a9965c1c35724..6cbccb39772c9 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b52018c765e..f23f4a4b50559 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From ba374c673634511373af551ba55691debd35904f Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 9 Sep 2024 12:48:41 +0800
Subject: [PATCH 484/521] fix import

---
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 8c945ef0dbcba..2506cb736f18b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoStatement, LogicalPlan, OptionList, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, UnresolvedTableSpec}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Implicits, CatalogV2Util, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Implicits, CatalogV2Util, DelegatingCatalogExtension, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege._

From 32232e9ed33bb16b93ad58cfde8b82e0f07c0970 Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Mon, 9 Sep 2024 04:52:20 +0000
Subject: [PATCH 485/521] Preparing Spark release v3.5.3-rc3

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 8657755b8d0ea..ea3d7d2a63caf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.4
+Version: 3.5.3
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 3367c1629c578..9c47b16865791 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 014ff5bbaf209..46f4be085728c 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index ed2352fd1276e..9704e4aeee933 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index b791a06aad43a..25c1b785961c2 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 685ada5194905..3c95492690393 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index b2e488c7bb222..2bf35c713923d 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 3a260a8dff53f..509e067dd246a 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index fd0aa7ba2a3a2..8b0130e551292 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index 7c87be73d7d96..a402affc88229 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 8bc2802ea5d0d..5380c3705f4f0 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 87f6a589261cc..56b66be3f7744 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 994179fd99ac8..2d209746dc7fc 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 801c28319ee84..983d4f087a686 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index d655d1a552814..c665e009199c7 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index ae11f0eac307d..f85820ba1454e 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 533a45e18f662..4a0d1af7968e7 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 07ca1c2b2f3c7..b7aff2d217bc3 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 176d92da63801..a1ebf137e324d 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index a6ef06142f5cb..5aa937d481582 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 4282e1f035716..ab48e0eeae2c6 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 2af6002b5c7db..9fdaaa925a759 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index a46c9bbfec2cf..e52b828bef975 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index d1b0e82c7c0d5..17004f875869c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 3dea0c82204bd..2dfc5322b2ffa 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.4-SNAPSHOT
-SPARK_VERSION_SHORT: 3.5.4
+SPARK_VERSION: 3.5.3
+SPARK_VERSION_SHORT: 3.5.3
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.4"]
+        'facetFilters': ["version:3.5.3"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 26d91eff504f2..74f7a8562cbb0 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index c4f250b40f33d..8a562f6ab0a14 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index a47d25015dfa9..72fb8e9ae9bef 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index 5c1844be5782d..d548e91536d93 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index bb821190273e1..8733e01ac89b9 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 202b80d38e24f..395f00c4c42f4 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index 04acbdb3cd6e4..c005af470cc8b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.4-SNAPSHOT</version>
+  <version>3.5.3</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 002d06e28ea15..e12826098ccbf 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.4.dev0"
+__version__: str = "3.5.3"
diff --git a/repl/pom.xml b/repl/pom.xml
index 5ef505bbc48e5..84d595c272e81 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index cd90f32d0814f..7c728c913f698 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index b72a3daea3c38..55131c524420a 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 31377cbda5d8e..c4c20d2d0948f 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index d7f3786e1050f..eff7a72e0194b 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 038b6c16a4e88..ace4be938d454 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.4-SNAPSHOT</version>
+        <version>3.5.3</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 0564a6be7432a..4619c7e658b87 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 62d33dbfc2d41..19e35fa5a252f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 5d2708dfdd714..e752ba0e5084f 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 9a313907eb130..54b7fdcdcc259 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 6cbccb39772c9..a9965c1c35724 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index f23f4a4b50559..11b52018c765e 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.4-SNAPSHOT</version>
+    <version>3.5.3</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 67421bb4123e625a31763893011a3addde0be7da Mon Sep 17 00:00:00 2001
From: Haejoon Lee <haejoon@apache.org>
Date: Mon, 9 Sep 2024 04:52:24 +0000
Subject: [PATCH 486/521] Preparing development version 3.5.4-SNAPSHOT

---
 R/pkg/DESCRIPTION                                      | 2 +-
 assembly/pom.xml                                       | 2 +-
 common/kvstore/pom.xml                                 | 2 +-
 common/network-common/pom.xml                          | 2 +-
 common/network-shuffle/pom.xml                         | 2 +-
 common/network-yarn/pom.xml                            | 2 +-
 common/sketch/pom.xml                                  | 2 +-
 common/tags/pom.xml                                    | 2 +-
 common/unsafe/pom.xml                                  | 2 +-
 common/utils/pom.xml                                   | 2 +-
 connector/avro/pom.xml                                 | 2 +-
 connector/connect/client/jvm/pom.xml                   | 2 +-
 connector/connect/common/pom.xml                       | 2 +-
 connector/connect/server/pom.xml                       | 2 +-
 connector/docker-integration-tests/pom.xml             | 2 +-
 connector/kafka-0-10-assembly/pom.xml                  | 2 +-
 connector/kafka-0-10-sql/pom.xml                       | 2 +-
 connector/kafka-0-10-token-provider/pom.xml            | 2 +-
 connector/kafka-0-10/pom.xml                           | 2 +-
 connector/kinesis-asl-assembly/pom.xml                 | 2 +-
 connector/kinesis-asl/pom.xml                          | 2 +-
 connector/protobuf/pom.xml                             | 2 +-
 connector/spark-ganglia-lgpl/pom.xml                   | 2 +-
 core/pom.xml                                           | 2 +-
 docs/_config.yml                                       | 6 +++---
 examples/pom.xml                                       | 2 +-
 graphx/pom.xml                                         | 2 +-
 hadoop-cloud/pom.xml                                   | 2 +-
 launcher/pom.xml                                       | 2 +-
 mllib-local/pom.xml                                    | 2 +-
 mllib/pom.xml                                          | 2 +-
 pom.xml                                                | 2 +-
 python/pyspark/version.py                              | 2 +-
 repl/pom.xml                                           | 2 +-
 resource-managers/kubernetes/core/pom.xml              | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml                        | 2 +-
 resource-managers/yarn/pom.xml                         | 2 +-
 sql/api/pom.xml                                        | 2 +-
 sql/catalyst/pom.xml                                   | 2 +-
 sql/core/pom.xml                                       | 2 +-
 sql/hive-thriftserver/pom.xml                          | 2 +-
 sql/hive/pom.xml                                       | 2 +-
 streaming/pom.xml                                      | 2 +-
 tools/pom.xml                                          | 2 +-
 45 files changed, 47 insertions(+), 47 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index ea3d7d2a63caf..8657755b8d0ea 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.5.3
+Version: 3.5.4
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9c47b16865791..3367c1629c578 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 46f4be085728c..014ff5bbaf209 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 9704e4aeee933..ed2352fd1276e 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 25c1b785961c2..b791a06aad43a 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 3c95492690393..685ada5194905 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 2bf35c713923d..b2e488c7bb222 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 509e067dd246a..3a260a8dff53f 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 8b0130e551292..fd0aa7ba2a3a2 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/utils/pom.xml b/common/utils/pom.xml
index a402affc88229..7c87be73d7d96 100644
--- a/common/utils/pom.xml
+++ b/common/utils/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
index 5380c3705f4f0..8bc2802ea5d0d 100644
--- a/connector/avro/pom.xml
+++ b/connector/avro/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 56b66be3f7744..87f6a589261cc 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
index 2d209746dc7fc..994179fd99ac8 100644
--- a/connector/connect/common/pom.xml
+++ b/connector/connect/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../../pom.xml</relativePath>
     </parent>
 
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
index 983d4f087a686..801c28319ee84 100644
--- a/connector/connect/server/pom.xml
+++ b/connector/connect/server/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index c665e009199c7..d655d1a552814 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
index f85820ba1454e..ae11f0eac307d 100644
--- a/connector/kafka-0-10-assembly/pom.xml
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 4a0d1af7968e7..533a45e18f662 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index b7aff2d217bc3..07ca1c2b2f3c7 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index a1ebf137e324d..176d92da63801 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
index 5aa937d481582..a6ef06142f5cb 100644
--- a/connector/kinesis-asl-assembly/pom.xml
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index ab48e0eeae2c6..4282e1f035716 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 9fdaaa925a759..2af6002b5c7db 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
index e52b828bef975..a46c9bbfec2cf 100644
--- a/connector/spark-ganglia-lgpl/pom.xml
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/core/pom.xml b/core/pom.xml
index 17004f875869c..d1b0e82c7c0d5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/docs/_config.yml b/docs/_config.yml
index 2dfc5322b2ffa..3dea0c82204bd 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,8 +19,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.5.3
-SPARK_VERSION_SHORT: 3.5.3
+SPARK_VERSION: 3.5.4-SNAPSHOT
+SPARK_VERSION_SHORT: 3.5.4
 SCALA_BINARY_VERSION: "2.12"
 SCALA_VERSION: "2.12.18"
 MESOS_VERSION: 1.0.0
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.5.3"]
+        'facetFilters': ["version:3.5.4"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/examples/pom.xml b/examples/pom.xml
index 74f7a8562cbb0..26d91eff504f2 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/pom.xml b/graphx/pom.xml
index 8a562f6ab0a14..c4f250b40f33d 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 72fb8e9ae9bef..a47d25015dfa9 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/launcher/pom.xml b/launcher/pom.xml
index d548e91536d93..5c1844be5782d 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 8733e01ac89b9..bb821190273e1 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 395f00c4c42f4..202b80d38e24f 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/pom.xml b/pom.xml
index c005af470cc8b..04acbdb3cd6e4 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.5.3</version>
+  <version>3.5.4-SNAPSHOT</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index e12826098ccbf..002d06e28ea15 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.5.3"
+__version__: str = "3.5.4.dev0"
diff --git a/repl/pom.xml b/repl/pom.xml
index 84d595c272e81..5ef505bbc48e5 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 7c728c913f698..cd90f32d0814f 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 55131c524420a..b72a3daea3c38 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index c4c20d2d0948f..31377cbda5d8e 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index eff7a72e0194b..d7f3786e1050f 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index ace4be938d454..038b6c16a4e88 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.12</artifactId>
-        <version>3.5.3</version>
+        <version>3.5.4-SNAPSHOT</version>
         <relativePath>../../pom.xml</relativePath>
     </parent>
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 4619c7e658b87..0564a6be7432a 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index 19e35fa5a252f..62d33dbfc2d41 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index e752ba0e5084f..5d2708dfdd714 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 54b7fdcdcc259..9a313907eb130 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/pom.xml b/streaming/pom.xml
index a9965c1c35724..6cbccb39772c9 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/pom.xml b/tools/pom.xml
index 11b52018c765e..f23f4a4b50559 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.5.3</version>
+    <version>3.5.4-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

From 1206b5217fb77e235d65598b42ac204c2074844f Mon Sep 17 00:00:00 2001
From: Christos Stavrakakis <christos.stavrakakis@databricks.com>
Date: Mon, 9 Sep 2024 17:20:36 +0800
Subject: [PATCH 487/521] [SPARK-49501][SQL] Fix double-escaping of table
 location

Change the implementation of `createTable` to avoid escaping of special chars in `UnresolvedTableSpec.location`. This field should contain the original user-provided `path` option and not the URI that is constructed by the `buildStorageFormatFromOptions()` call.

In addition this commit extends `SparkFunSuite` and `SQLTestUtils` to allow creating temporary directories with a custom prefix. This can be used to create temporary directories with special chars.

Bug fix. The following code would result in the creation of a table that is stored in `/tmp/test%table` instead of `/tmp/test table`:
```
spark.catalog.createTable("testTable", source = "parquet", schema = new StructType().add("id", "int"), description = "", options = Map("path" -> "/tmp/test table"))
```

Note that this was not consistent with the SQL API, e.g. `create table testTable(id int) using parquet location '/tmp/test table'`

Yes. The previous behaviour would result in table path be escaped. After this change the path will not be escaped.

Updated existing test in `CatalogSuite`.

No

Closes #47976 from cstavr/location-double-escaping.

Authored-by: Christos Stavrakakis <christos.stavrakakis@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit dc3333bcc5994d2e25a16ff6ef8bf551cf56c130)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../org/apache/spark/sql/internal/CatalogImpl.scala    | 10 ++++------
 .../org/apache/spark/sql/internal/CatalogSuite.scala   |  3 ++-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 796710a35672f..4e556ad846862 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTable, LocalRelation, LogicalPlan, OptionList, RecoverPartitions, ShowFunctions, ShowNamespaces, ShowTables, UnresolvedTableSpec, View}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, FunctionCatalog, Identifier, SupportsNamespaces, Table => V2Table, TableCatalog, V1Table}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{CatalogHelper, MultipartIdentifierHelper, NamespaceHelper, TransformHelper}
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -656,12 +657,9 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     } else {
       CatalogTableType.MANAGED
     }
-    val location = if (storage.locationUri.isDefined) {
-      val locationStr = storage.locationUri.get.toString
-      Some(locationStr)
-    } else {
-      None
-    }
+
+    // The location in UnresolvedTableSpec should be the original user-provided path string.
+    val location = CaseInsensitiveMap(options).get("path")
 
     val newOptions = OptionList(options.map { case (key, value) =>
       (key, Literal(value).asInstanceOf[Expression])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index c6bf220e45d52..470bd15cc418c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -635,7 +635,8 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     val description = "this is a test table"
 
     withTable("t") {
-      withTempDir { dir =>
+      withTempDir { baseDir =>
+        val dir = new File(baseDir, "test%prefix")
         spark.catalog.createTable(
           tableName = "t",
           source = "json",

From 96eebebaf5146144ff900c8081dfa5c5960b3bb2 Mon Sep 17 00:00:00 2001
From: Xinrong Meng <xinrong@apache.org>
Date: Wed, 11 Sep 2024 08:52:33 -0700
Subject: [PATCH 488/521] [SPARK-49595][CONNECT][SQL] Fix
 `DataFrame.unpivot/melt` in Spark Connect Scala Client

Fix DataFrame.unpivot/melt in Spark Connect Scala Client by correctly assigning the name for the variable column.

The original code used `setValueColumnName` for both the variable and value columns.

This fix is necessary to ensure the correct behavior of the unpivot/melt operation.

Yes. Variable and value columns can be set correctly as shown below.

```scala
scala> val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
df: org.apache.spark.sql.package.DataFrame = [id: int, int: int ... 1 more field]

scala> df.show()
+---+---+----+
| id|int|long|
+---+---+----+
|  1| 11|  12|
|  2| 21|  22|
+---+---+----+
```
FROM (current master)
```scala
scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
+---+----+-----+
| id|    |value|
+---+----+-----+
|  1| int|   11|
|  1|long|   12|
|  2| int|   21|
|  2|long|   22|
+---+----+-----+

```

TO
```scala
scala> df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
+---+--------+-----+
| id|variable|value|
+---+--------+-----+
|  1|     int|   11|
|  1|    long|   12|
|  2|     int|   21|
|  2|    long|   22|
+---+--------+-----+
```

Existing tests.

No.

Closes #48069 from xinrong-meng/fix_unpivot.

Authored-by: Xinrong Meng <xinrong@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit e63b5601c1bd74b2b0054d48f944424d12b79835)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../main/scala/org/apache/spark/sql/Dataset.scala |   2 +-
 .../explain-results/melt_no_values.explain        |   2 +-
 .../explain-results/melt_values.explain           |   2 +-
 .../explain-results/unpivot_no_values.explain     |   2 +-
 .../explain-results/unpivot_values.explain        |   2 +-
 .../query-tests/queries/melt_no_values.json       |   1 +
 .../query-tests/queries/melt_no_values.proto.bin  | Bin 71 -> 77 bytes
 .../query-tests/queries/melt_values.json          |   1 +
 .../query-tests/queries/melt_values.proto.bin     | Bin 73 -> 79 bytes
 .../query-tests/queries/unpivot_no_values.json    |   1 +
 .../queries/unpivot_no_values.proto.bin           | Bin 64 -> 70 bytes
 .../query-tests/queries/unpivot_values.json       |   1 +
 .../query-tests/queries/unpivot_values.proto.bin  | Bin 80 -> 86 bytes
 13 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index bdaa4e28ba892..865596a669a09 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -1291,7 +1291,7 @@ class Dataset[T] private[sql] (
     val unpivot = builder.getUnpivotBuilder
       .setInput(plan.getRoot)
       .addAllIds(ids.toSeq.map(_.expr).asJava)
-      .setValueColumnName(variableColumnName)
+      .setVariableColumnName(variableColumnName)
       .setValueColumnName(valueColumnName)
     valuesOption.foreach { values =>
       unpivot.getValuesBuilder
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
index f61fc30a3a529..053937d84ec8f 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
index b5742d976dee9..5a953f792cd35 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
@@ -1,2 +1,2 @@
-Expand [[a#0, id, id#0L]], [a#0, #0, value#0L]
+Expand [[a#0, id, id#0L]], [a#0, name#0, value#0L]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
index 8d1749ee74c5a..2b2ba19d0c3db 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, #0, value#0]
+Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
index f61fc30a3a529..053937d84ec8f 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
index 12db0a5abe368..a17da06b925b9 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
@@ -20,6 +20,7 @@
         "unparsedIdentifier": "a"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
index 23a6aa1289a998657a7a4699cc50d73075e6062e..eebb7ad6df8e24ef5688b8d5029a324be1a83236 100644
GIT binary patch
delta 27
icmZ?wW#?iNU_8a>Hj&+uSBWJrF*j9<wJb5GG!+0&r3SkI

delta 21
ccmeZuXXj!PU_8a>Fp=GoU5m9WF{d;Y04&%9=l}o!

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json
index e2a004f46e781..a8142ee3a8461 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json
@@ -23,6 +23,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
index e021e1110def58b699563bc6a1e0add206b201fa..35829fc62dae9e7761151a06caed4203168a9e00 100644
GIT binary patch
delta 27
icmebDXXj!PU_8a>F_GPxSBWJrF*j9<wJb5GG!+0(Q3k^R

delta 21
ccmebGWanZMU_8a>G?Cq!U5m9WF{d;Y04-Dm@Bjb+

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
index 9f550c0319147..96b76443b6790 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
@@ -16,6 +16,7 @@
         "unparsedIdentifier": "id"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
index ac3bad8bd04ed98145b1856a51af96a6819e6bd1..b700190a9f667c97a80c37802035d2e9458b492a 100644
GIT binary patch
delta 27
icmZ>8W9MQLU_8ZWKat&tSBWJrF*j9<wJb5GG!+0$O9r0+

delta 21
ccmZ>BVCP~IU_8ZWIg#CnU5m9WF{d;Y04pX0%m4rY

diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
index 92bc19d195c6e..6c31afb04e741 100644
--- a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
@@ -27,6 +27,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
index 7f717cb23517b2ae83cc049e87fb0bdb71d30495..a1cd388fd8a46cdcfd23e7fbc632a7f0e892edca 100644
GIT binary patch
delta 27
icmWFtW9MQLU_8a>Kat&uSBWJrF*j9<wJb5GG!+0*s|Mcy

delta 21
ccmWFwVCP~IU_8a>Ig#CoU5m9WF{d;Y051jw3;+NC


From 560efed3b00f4ac9be4356714c664bf0e9341c0b Mon Sep 17 00:00:00 2001
From: Bruce Robbins <bersprockets@gmail.com>
Date: Thu, 12 Sep 2024 08:11:03 -0700
Subject: [PATCH 489/521] [SPARK-49261][SQL] Don't replace literals in
 aggregate expressions with group-by expressions

### What changes were proposed in this pull request?

Before this PR, `RewriteDistinctAggregates` could potentially replace literals in the aggregate expressions with output attributes from the `Expand` operator. This can occur when a group-by expression is a literal that happens by chance to match a literal used in an aggregate expression. E.g.:

```
create or replace temp view v1(a, b, c) as values
(1, 1.001d, 2), (2, 3.001d, 4), (2, 3.001, 4);

cache table v1;

select
  round(sum(b), 6) as sum1,
  count(distinct a) as count1,
  count(distinct c) as count2
from (
  select
    6 as gb,
    *
  from v1
)
group by a, gb;
```
In the optimized plan, you can see that the literal 6 in the `round` function invocation has been patched with an output attribute (6#163) from the `Expand` operator:
```
== Optimized Logical Plan ==
'Aggregate [a#123, 6#163], [round(first(sum(__auto_generated_subquery_name.b)#167, true) FILTER (WHERE (gid#162 = 0)), 6#163) AS sum1#114, count(__auto_generated_subquery_name.a#164) FILTER (WHERE (gid#162 = 1)) AS count1#115L, count(__auto_generated_subquery_name.c#165) FILTER (WHERE (gid#162 = 2)) AS count2#116L]
+- Aggregate [a#123, 6#163, __auto_generated_subquery_name.a#164, __auto_generated_subquery_name.c#165, gid#162], [a#123, 6#163, __auto_generated_subquery_name.a#164, __auto_generated_subquery_name.c#165, gid#162, sum(__auto_generated_subquery_name.b#166) AS sum(__auto_generated_subquery_name.b)#167]
   +- Expand [[a#123, 6, null, null, 0, b#124], [a#123, 6, a#123, null, 1, null], [a#123, 6, null, c#125, 2, null]], [a#123, 6#163, __auto_generated_subquery_name.a#164, __auto_generated_subquery_name.c#165, gid#162, __auto_generated_subquery_name.b#166]
      +- InMemoryRelation [a#123, b#124, c#125], StorageLevel(disk, memory, deserialized, 1 replicas)
            +- LocalTableScan [a#6, b#7, c#8]
```
This is because the literal 6 was used in the group-by expressions (referred to as gb in the query, and renamed 6#163 in the `Expand` operator's output attributes).

After this PR, foldable expressions in the aggregate expressions are kept as-is.

### Why are the changes needed?

Some expressions require a foldable argument. In the above example, the `round` function requires a foldable expression as the scale argument. Because the scale argument is patched with an attribute, `RoundBase#checkInputDataTypes` returns an error, which leaves the `Aggregate` operator unresolved:
```
[INTERNAL_ERROR] Invalid call to dataType on unresolved object SQLSTATE: XX000
org.apache.spark.sql.catalyst.analysis.UnresolvedException: [INTERNAL_ERROR] Invalid call to dataType on unresolved object SQLSTATE: XX000
	at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:255)
	at org.apache.spark.sql.catalyst.types.DataTypeUtils$.$anonfun$fromAttributes$1(DataTypeUtils.scala:241)
	at scala.collection.immutable.List.map(List.scala:247)
	at scala.collection.immutable.List.map(List.scala:79)
	at org.apache.spark.sql.catalyst.types.DataTypeUtils$.fromAttributes(DataTypeUtils.scala:241)
	at org.apache.spark.sql.catalyst.plans.QueryPlan.schema$lzycompute(QueryPlan.scala:428)
	at org.apache.spark.sql.catalyst.plans.QueryPlan.schema(QueryPlan.scala:428)
	at org.apache.spark.sql.execution.SparkPlan.executeCollectPublic(SparkPlan.scala:474)
        ...
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

New tests.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #47876 from bersprockets/group_by_lit_issue.

Authored-by: Bruce Robbins <bersprockets@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 1a0791d006e25898b67cc17e1420f053a39091b9)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../optimizer/RewriteDistinctAggregates.scala |  3 ++-
 .../RewriteDistinctAggregatesSuite.scala      | 18 +++++++++++++++-
 .../spark/sql/DataFrameAggregateSuite.scala   | 21 +++++++++++++++++++
 3 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index 801bd2693af42..5aef82b64ed32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -400,13 +400,14 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
         (distinctAggOperatorMap.flatMap(_._2) ++
           regularAggOperatorMap.map(e => (e._1, e._3))).toMap
 
+      val groupByMapNonFoldable = groupByMap.filter(!_._1.foldable)
       val patchedAggExpressions = a.aggregateExpressions.map { e =>
         e.transformDown {
           case e: Expression =>
             // The same GROUP BY clauses can have different forms (different names for instance) in
             // the groupBy and aggregate expressions of an aggregate. This makes a map lookup
             // tricky. So we do a linear search for a semantically equal group by expression.
-            groupByMap
+            groupByMapNonFoldable
               .find(ge => e.semanticEquals(ge._1))
               .map(_._2)
               .getOrElse(transformations.getOrElse(e, e))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
index ac136dfb898ef..4d31999ded655 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{Literal, Round}
 import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
@@ -109,4 +109,20 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
       case _ => fail(s"Plan is not rewritten:\n$rewrite")
     }
   }
+
+  test("SPARK-49261: Literals in grouping expressions shouldn't result in unresolved aggregation") {
+    val relation = testRelation2
+      .select(Literal(6).as("gb"), $"a", $"b", $"c", $"d")
+    val input = relation
+      .groupBy($"a", $"gb")(
+        countDistinct($"b").as("agg1"),
+        countDistinct($"d").as("agg2"),
+        Round(sum($"c").as("sum1"), 6)).analyze
+    val rewriteFold = FoldablePropagation(input)
+    // without the fix, the below produces an unresolved plan
+    val rewrite = RewriteDistinctAggregates(rewriteFold)
+    if (!rewrite.resolved) {
+      fail(s"Plan is not as expected:\n$rewrite")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 764b7a9719d29..5a8681aed973a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -2261,6 +2261,27 @@ class DataFrameAggregateSuite extends QueryTest
       })
     }
   }
+
+  test("SPARK-49261: Literals in grouping expressions shouldn't result in unresolved aggregation") {
+    val data = Seq((1, 1.001d, 2), (2, 3.001d, 4), (2, 3.001, 4)).toDF("a", "b", "c")
+    withTempView("v1") {
+      data.createOrReplaceTempView("v1")
+      val df =
+        sql("""SELECT
+              |  ROUND(SUM(b), 6) AS sum1,
+              |  COUNT(DISTINCT a) AS count1,
+              |  COUNT(DISTINCT c) AS count2
+              |FROM (
+              |  SELECT
+              |    6 AS gb,
+              |    *
+              |  FROM v1
+              |)
+              |GROUP BY a, gb
+              |""".stripMargin)
+      checkAnswer(df, Row(1.001d, 1, 1) :: Row(6.002d, 1, 1) :: Nil)
+    }
+  }
 }
 
 case class B(c: Option[Double])

From e693e18c7d0e9a495dcb6e9b31dac9ce2b98428c Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Fri, 13 Sep 2024 15:00:53 +0800
Subject: [PATCH 490/521] [SPARK-49628][SQL] ConstantFolding should copy
 stateful expression before evaluating

### What changes were proposed in this pull request?

It's possible that a logical plan instance is being shared by multiple DFs and these DFs are executed in parallel. Spark always copy stateful expressions before evaluating them, but one place is missed: `ConstantFolding` can also execute expressions. This PR fixes it.

### Why are the changes needed?

avoid concurrency issues.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

Not able to write a test for it, but this concurrency issue is quite obvious

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #48104 from cloud-fan/constant.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Kent Yao <yao@apache.org>
(cherry picked from commit 319e7cc7d0e7ba9a99f808d51a8d635a6159ce8f)
Signed-off-by: Kent Yao <yao@apache.org>
---
 .../org/apache/spark/sql/catalyst/optimizer/expressions.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 456d0da54de5f..c0995b273bd03 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -78,7 +78,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
     // Fold expressions that are foldable.
     case e if e.foldable =>
       try {
-        Literal.create(e.eval(EmptyRow), e.dataType)
+        Literal.create(e.freshCopyIfContainsStatefulExpression().eval(EmptyRow), e.dataType)
       } catch {
         case NonFatal(_) if isConditionalBranch =>
           // When doing constant folding inside conditional expressions, we should not fail

From e7ca790ed4f0b4d7c19d849b00a23474c391b79f Mon Sep 17 00:00:00 2001
From: Nick Young <nick.young@databricks.com>
Date: Fri, 20 Sep 2024 18:05:28 +0900
Subject: [PATCH 491/521] [SPARK-49699][SS] Disable PruneFilters for streaming
 workloads

The PR proposes to disable PruneFilters if the predicate of the filter is evaluated to `null` / `false` and the filter (and subtree) is streaming.

PruneFilters replaces the `null` / `false` filter with an empty relation, which means the subtree of the filter is also lost. The optimization does not care about whichever operator is in the subtree, hence some important operators like stateful operator, watermark node, observe node could be lost.

The filter could be evaluated to `null` / `false` selectively among microbatches in various reasons (one simple example is the modification of the query during restart), which means stateful operator might not be available for batch N and be available for batch N + 1. For this case, streaming query will fail as batch N + 1 cannot load the state from batch N, and it's not recoverable in most cases.

See new tests in StreamingQueryOptimizationCorrectnessSuite for details.

No.

UT.

No.

Closes #48149 from n-young-db/n-young-db/disable-streaming-prune-filters.

Lead-authored-by: Nick Young <nick.young@databricks.com>
Co-authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 46b0210edb4ef8490ee4bbc4a40baf202a531b33)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala    |  7 +-
 .../apache/spark/sql/internal/SQLConf.scala   |  9 +++
 .../PropagateEmptyRelationSuite.scala         | 27 ++++++--
 .../optimizer/PruneFiltersSuite.scala         | 34 ++++++++++
 .../sql/execution/streaming/OffsetSeq.scala   |  7 +-
 ...ingQueryOptimizationCorrectnessSuite.scala | 64 ++++++++++++++++++-
 6 files changed, 137 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 239682ab1f847..106b5396d3a8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1661,15 +1661,18 @@ object EliminateSorts extends Rule[LogicalPlan] {
  * 3) by eliminating the always-true conditions given the constraints on the child's output.
  */
 object PruneFilters extends Rule[LogicalPlan] with PredicateHelper {
+  private def shouldApply(child: LogicalPlan): Boolean =
+    SQLConf.get.getConf(SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN) || !child.isStreaming
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(FILTER), ruleId) {
     // If the filter condition always evaluate to true, remove the filter.
     case Filter(Literal(true, BooleanType), child) => child
     // If the filter condition always evaluate to null or false,
     // replace the input with an empty relation.
-    case Filter(Literal(null, _), child) =>
+    case Filter(Literal(null, _), child) if shouldApply(child) =>
       LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
-    case Filter(Literal(false, BooleanType), child) =>
+    case Filter(Literal(false, BooleanType), child) if shouldApply(child) =>
       LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
     // If any deterministic condition is guaranteed to be true given the constraints on the child's
     // output, remove the condition
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 44c58cd13ea84..6f2f0088fccd1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -3420,6 +3420,15 @@ object SQLConf {
       .intConf
       .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
 
+  val PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN =
+    buildConf("spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan")
+      .internal()
+      .doc("Allow PruneFilters to remove streaming subplans when we encounter a false filter. " +
+        "This flag is to restore prior buggy behavior for broken pipelines.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 5aeb27f7ee6b4..451236162343b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -27,12 +27,13 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, Filter, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder}
 
-class PropagateEmptyRelationSuite extends PlanTest {
+class   PropagateEmptyRelationSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("PropagateEmptyRelation", Once,
+      Batch("PropagateEmptyRelation", FixedPoint(1),
         CombineUnions,
         ReplaceDistinctWithAggregate,
         ReplaceExceptWithAntiJoin,
@@ -45,7 +46,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
   object OptimizeWithoutPropagateEmptyRelation extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("OptimizeWithoutPropagateEmptyRelation", Once,
+      Batch("OptimizeWithoutPropagateEmptyRelation", FixedPoint(1),
         CombineUnions,
         ReplaceDistinctWithAggregate,
         ReplaceExceptWithAntiJoin,
@@ -216,10 +217,24 @@ class PropagateEmptyRelationSuite extends PlanTest {
       .where($"a" =!= 200)
       .orderBy($"a".asc)
 
-    val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation(output, isStreaming = true)
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true") {
+      val optimized = Optimize.execute(query.analyze)
+      val correctAnswer = LocalRelation(output, isStreaming = true)
+      comparePlans(optimized, correctAnswer)
+    }
 
-    comparePlans(optimized, correctAnswer)
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "false") {
+      val optimized = Optimize.execute(query.analyze)
+      val correctAnswer = relation
+        .where(false)
+        .where($"a" > 1)
+        .select($"a")
+        .where($"a" =!= 200)
+        .orderBy($"a".asc).analyze
+      comparePlans(optimized, correctAnswer)
+    }
   }
 
   test("SPARK-47305 correctly tag isStreaming when propagating empty relation " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
index b81a57f4f8cd5..66ded338340f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
@@ -174,4 +174,38 @@ class PruneFiltersSuite extends PlanTest {
       testRelation.where(!$"a".attr.in(1, 3, 5) && $"a".attr === 7 && $"b".attr === 1)
         .where(Rand(10) > 0.1 && Rand(10) < 1.1).analyze)
   }
+
+  test("Streaming relation is not lost under true filter") {
+    Seq("true", "false").foreach(x => withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> x) {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 > 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    })
+  }
+
+  test("Streaming relation is not lost under false filter") {
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true") {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 < 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    }
+
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "false") {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 < 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.where(10 < 5).select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index 913805d1a074d..cea7ec432aad9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -98,7 +98,9 @@ object OffsetSeqMetadata extends Logging {
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
     STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC,
-    STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION)
+    STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION,
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN
+  )
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -119,7 +121,8 @@ object OffsetSeqMetadata extends Logging {
     STREAMING_JOIN_STATE_FORMAT_VERSION.key ->
       SymmetricHashJoinStateManager.legacyVersion.toString,
     STATE_STORE_COMPRESSION_CODEC.key -> "lz4",
-    STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false"
+    STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false",
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true"
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
index 782badaef924f..f651bfb7f3c72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -21,7 +21,7 @@ import java.sql.Timestamp
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions.{expr, lit, window}
+import org.apache.spark.sql.functions.{count, expr, lit, timestamp_seconds, window}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -524,4 +524,66 @@ class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
       doTest(numExpectedStatefulOperatorsForOneEmptySource = 1)
     }
   }
+
+  test("SPARK-49699: observe node is not pruned out from PruneFilters") {
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .observe("observation", count(lit(1)).as("rows"))
+      // Enforce PruneFilters to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df)(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        val observeRow = qe.lastExecution.observedMetrics.get("observation")
+        assert(observeRow.get.getAs[Long]("rows") == 3L)
+      }
+    )
+  }
+
+  test("SPARK-49699: watermark node is not pruned out from PruneFilters") {
+    // NOTE: The test actually passes without SPARK-49699, because of the trickiness of
+    // filter pushdown and PruneFilters. Unlike observe node, the `false` filter is pushed down
+    // below to watermark node, hence PruneFilters rule does not prune out watermark node even
+    // before SPARK-49699. Propagate empty relation does not also propagate emptiness into
+    // watermark node, so the node is retained. The test is added for preventing regression.
+
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "0 second")
+      // Enforce PruneFilter to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df)(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        // If the watermark node is pruned out, this would be null.
+        assert(qe.lastProgress.eventTime.get("watermark") != null)
+      }
+    )
+  }
+
+  test("SPARK-49699: stateful operator node is not pruned out from PruneFilters") {
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .groupBy("value")
+      .count()
+      // Enforce PruneFilter to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df, OutputMode.Complete())(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        assert(qe.lastProgress.stateOperators.length == 1)
+      }
+    )
+  }
 }

From dd76a82734564afeed4225d19331243f7b926ae8 Mon Sep 17 00:00:00 2001
From: Chris Nauroth <cnauroth@apache.org>
Date: Mon, 23 Sep 2024 21:36:48 -0700
Subject: [PATCH 492/521] [SPARK-49760][YARN] Correct handling of `SPARK_USER`
 env variable override in app master

### What changes were proposed in this pull request?

This patch corrects handling of a user-supplied `SPARK_USER` environment variable in the YARN app master. Currently, the user-supplied value gets appended to the default, like a classpath entry. The patch fixes it by using only the user-supplied value.

### Why are the changes needed?

Overriding the `SPARK_USER` environment variable in the YARN app master with configuration property `spark.yarn.appMasterEnv.SPARK_USER` currently results in an incorrect value. `Client#setupLaunchEnv` first sets a default in the environment map using the Hadoop user. After that, `YarnSparkHadoopUtil.addPathToEnvironment` sees the existing value in the map and interprets the user-supplied value as needing to be appended like a classpath entry. The end result is the Hadoop user appended with the classpath delimiter and user-supplied value, e.g. `cnauroth:overrideuser`.

### Does this PR introduce _any_ user-facing change?

Yes, the app master now uses the user-supplied `SPARK_USER` if specified. (The default is still the Hadoop user.)

### How was this patch tested?

* Existing unit tests pass.
* Added new unit tests covering default and overridden `SPARK_USER` for the app master. The override test fails without this patch, and then passes after the patch is applied.
* Manually tested in a live YARN cluster as shown below.

Manual testing used the `DFSReadWriteTest` job with overrides of `SPARK_USER`:

```
spark-submit \
    --deploy-mode cluster \
    --files all-lines.txt \
    --class org.apache.spark.examples.DFSReadWriteTest \
    --conf spark.yarn.appMasterEnv.SPARK_USER=sparkuser_appMaster \
    --conf spark.driverEnv.SPARK_USER=sparkuser_driver \
    --conf spark.executorEnv.SPARK_USER=sparkuser_executor \
    /usr/lib/spark/examples/jars/spark-examples.jar \
    all-lines.txt /tmp/DFSReadWriteTest
```

Before the patch, we can see the app master's `SPARK_USER` mishandled by looking at the `_SUCCESS` file in HDFS:

```
hdfs dfs -ls -R /tmp/DFSReadWriteTest

drwxr-xr-x   - cnauroth:sparkuser_appMaster hadoop          0 2024-09-20 23:35 /tmp/DFSReadWriteTest/dfs_read_write_test
-rw-r--r--   1 cnauroth:sparkuser_appMaster hadoop          0 2024-09-20 23:35 /tmp/DFSReadWriteTest/dfs_read_write_test/_SUCCESS
-rw-r--r--   1 sparkuser_executor                      hadoop    2295080 2024-09-20 23:35 /tmp/DFSReadWriteTest/dfs_read_write_test/part-00000
-rw-r--r--   1 sparkuser_executor                      hadoop    2288718 2024-09-20 23:35 /tmp/DFSReadWriteTest/dfs_read_write_test/part-00001
```

After the patch, we can see it working correctly:

```
hdfs dfs -ls -R /tmp/DFSReadWriteTest
drwxr-xr-x   - sparkuser_appMaster hadoop          0 2024-09-23 17:13 /tmp/DFSReadWriteTest/dfs_read_write_test
-rw-r--r--   1 sparkuser_appMaster hadoop          0 2024-09-23 17:13 /tmp/DFSReadWriteTest/dfs_read_write_test/_SUCCESS
-rw-r--r--   1 sparkuser_executor  hadoop    2295080 2024-09-23 17:13 /tmp/DFSReadWriteTest/dfs_read_write_test/part-00000
-rw-r--r--   1 sparkuser_executor  hadoop    2288718 2024-09-23 17:13 /tmp/DFSReadWriteTest/dfs_read_write_test/part-00001
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48216 from cnauroth/SPARK-49760-branch-3.5.

Authored-by: Chris Nauroth <cnauroth@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/deploy/yarn/Client.scala    |  7 +++++--
 .../apache/spark/deploy/yarn/ClientSuite.scala   | 16 ++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index 8257a08fd14e2..9c8a6dd8db069 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -883,14 +883,13 @@ private[spark] class Client(
   /**
    * Set up the environment for launching our ApplicationMaster container.
    */
-  private def setupLaunchEnv(
+  private[yarn] def setupLaunchEnv(
       stagingDirPath: Path,
       pySparkArchives: Seq[String]): HashMap[String, String] = {
     logInfo("Setting up the launch environment for our AM container")
     val env = new HashMap[String, String]()
     populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
     env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
-    env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
     env("SPARK_PREFER_IPV6") = Utils.preferIPv6.toString
 
     // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
@@ -900,6 +899,10 @@ private[spark] class Client(
       .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
       .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
 
+    if (!env.contains("SPARK_USER")) {
+      env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
+    }
+
     // If pyFiles contains any .py files, we need to add LOCALIZED_PYTHON_DIR to the PYTHONPATH
     // of the container processes too. Add all non-.py files directly to PYTHONPATH.
     //
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index b7fb409ebc359..a59a0112c78dc 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -29,6 +29,7 @@ import scala.collection.mutable.{HashMap => MutableHashMap}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.MRJobConfig
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords.{GetNewApplicationResponse, SubmitApplicationRequest}
 import org.apache.hadoop.yarn.api.records._
@@ -670,6 +671,21 @@ class ClientSuite extends SparkFunSuite
     assertUserClasspathUrls(cluster = true, replacementRootPath)
   }
 
+  test("default app master SPARK_USER") {
+    val sparkConf = new SparkConf()
+    val client = createClient(sparkConf)
+    val env = client.setupLaunchEnv(new Path("/staging/dir/path"), Seq())
+    env("SPARK_USER") should be (UserGroupInformation.getCurrentUser().getShortUserName())
+  }
+
+  test("override app master SPARK_USER") {
+    val sparkConf = new SparkConf()
+        .set("spark.yarn.appMasterEnv.SPARK_USER", "overrideuser")
+    val client = createClient(sparkConf)
+    val env = client.setupLaunchEnv(new Path("/staging/dir/path"), Seq())
+    env("SPARK_USER") should be ("overrideuser")
+  }
+
   private val matching = Seq(
     ("files URI match test1", "file:///file1", "file:///file2"),
     ("files URI match test2", "file:///c:file1", "file://c:file2"),

From b513297f661bf314bcb47033f408810b14ea39b8 Mon Sep 17 00:00:00 2001
From: Cheng Pan <chengpan@apache.org>
Date: Tue, 24 Sep 2024 07:40:58 -0700
Subject: [PATCH 493/521] [SPARK-49750][DOC] Mention delegation token support
 in K8s mode

Update docs to mention delegation token support in K8s mode.

The delegation token support in K8s mode has been implemented since 3.0.0 via SPARK-23257.

Yes, docs are updated.

Review.

No.

Closes #48199 from pan3793/SPARK-49750.

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit dedf5aa91827f32736ce5dae2eb123ba4e244c3b)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 docs/security.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/security.md b/docs/security.md
index 10201e6ed5406..e6ef9ea584a1b 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -840,7 +840,7 @@ mechanism (see `java.util.ServiceLoader`). Implementations of
 `org.apache.spark.security.HadoopDelegationTokenProvider` can be made available to Spark
 by listing their names in the corresponding file in the jar's `META-INF/services` directory.
 
-Delegation token support is currently only supported in YARN and Mesos modes. Consult the
+Delegation token support is currently only supported in YARN, Kubernetes and Mesos modes. Consult the
 deployment-specific page for more information.
 
 The following options provides finer-grained control for this feature:

From f1c69a5a687fdb4e5a613fe43bbf6f6366f63fda Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Thu, 26 Sep 2024 13:39:02 -0700
Subject: [PATCH 494/521] [SPARK-49791][SQL] Make DelegatingCatalogExtension
 more extendable

### What changes were proposed in this pull request?

This PR updates `DelegatingCatalogExtension` so that it's more extendable
- `initialize` becomes not final, so that sub-classes can overwrite it
- `delegate` becomes `protected`, so that sub-classes can access it

In addition, this PR fixes a mistake that `DelegatingCatalogExtension` is just a convenient default implementation, it's actually the `CatalogExtension` interface that indicates this catalog implementation will delegate requests to the Spark session catalog. https://github.com/apache/spark/pull/47724 should use `CatalogExtension` instead.

### Why are the changes needed?

Unblock the Iceberg extension.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #48257 from cloud-fan/catalog.

Lead-authored-by: Wenchen Fan <wenchen@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 339dd5b93316fecd0455b53b2cedee2b5333a184)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/connector/catalog/DelegatingCatalogExtension.java     | 4 ++--
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 2 +-
 .../spark/sql/catalyst/analysis/ResolveSessionCatalog.scala   | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index f6686d2e4d3b6..786821514822e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -38,7 +38,7 @@
 @Evolving
 public abstract class DelegatingCatalogExtension implements CatalogExtension {
 
-  private CatalogPlugin delegate;
+  protected CatalogPlugin delegate;
 
   @Override
   public final void setDelegateCatalog(CatalogPlugin delegate) {
@@ -51,7 +51,7 @@ public String name() {
   }
 
   @Override
-  public final void initialize(String name, CaseInsensitiveStringMap options) {}
+  public void initialize(String name, CaseInsensitiveStringMap options) {}
 
   @Override
   public Set<TableCatalogCapability> capabilities() {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 2506cb736f18b..f1664f66b7f8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -568,7 +568,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val canUseV2 = lookupV2Provider().isDefined || (df.sparkSession.sessionState.conf.getConf(
         SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined &&
         !df.sparkSession.sessionState.catalogManager.catalog(CatalogManager.SESSION_CATALOG_NAME)
-          .isInstanceOf[DelegatingCatalogExtension])
+          .isInstanceOf[CatalogExtension])
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case nameParts @ NonSessionCatalogAndIdentifier(catalog, ident) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 7500f32ac2b90..0a86a043985eb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, DelegatingCatalogExtension, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -691,6 +691,6 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
     isSessionCatalog(catalog) && (
       SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty ||
-        catalog.isInstanceOf[DelegatingCatalogExtension])
+        catalog.isInstanceOf[CatalogExtension])
   }
 }

From b51db8bcf80cf070f93a05345640ca594301899d Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 26 Sep 2024 14:51:57 -0700
Subject: [PATCH 495/521] [SPARK-49791][SQL][FOLLOWUP][3.5] Fix `import`
 statement

### What changes were proposed in this pull request?

This PR is a follow-up for `branch-3.5` due to the difference of `import` statement.
- #48257

### Why are the changes needed?

To fix the compilation.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48271 from dongjoon-hyun/SPARK-49791.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../src/main/scala/org/apache/spark/sql/DataFrameWriter.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index f1664f66b7f8c..4c0c750246f8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoStatement, LogicalPlan, OptionList, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, UnresolvedTableSpec}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Implicits, CatalogV2Util, DelegatingCatalogExtension, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Implicits, CatalogV2Util, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.connector.catalog.TableWritePrivilege._

From 1040657ecbce001d7233f4607009d1bcc3fecc18 Mon Sep 17 00:00:00 2001
From: Rui Wang <rui.wang@databricks.com>
Date: Fri, 27 Sep 2024 08:26:24 +0800
Subject: [PATCH 496/521] [SPARK-49211][SQL][FOLLOW-UP] Support catalog in
 QualifiedTableName

Support catalog in QualifiedTableName and remove `FullQualifiedTableName`.

Consolidate and remove duplicate code.

No

Existing UT

No

Closes #48255 from amaliujia/qualifedtablename.

Authored-by: Rui Wang <rui.wang@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit fc9d421a2345987105aa97947c867ac80ba48a05)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/catalog/SessionCatalog.scala | 18 ++++++++--------
 .../spark/sql/catalyst/identifiers.scala      | 21 +++++++++++++++----
 .../catalog/SessionCatalogSuite.scala         |  8 +++----
 .../datasources/DataSourceStrategy.scala      |  4 ++--
 .../sql/StatisticsCollectionTestBase.scala    |  4 ++--
 .../sql/connector/DataSourceV2SQLSuite.scala  | 10 ++++-----
 .../sql/execution/command/DDLSuite.scala      |  6 +++---
 .../command/v1/TruncateTableSuite.scala       |  4 ++--
 .../spark/sql/hive/HiveMetastoreCatalog.scala | 10 ++++-----
 9 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 99074b859a7fa..0de9673a5f968 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -194,7 +194,7 @@ class SessionCatalog(
     }
   }
 
-  private val tableRelationCache: Cache[FullQualifiedTableName, LogicalPlan] = {
+  private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
     var builder = CacheBuilder.newBuilder()
       .maximumSize(cacheSize)
 
@@ -202,33 +202,33 @@ class SessionCatalog(
       builder = builder.expireAfterWrite(cacheTTL, TimeUnit.SECONDS)
     }
 
-    builder.build[FullQualifiedTableName, LogicalPlan]()
+    builder.build[QualifiedTableName, LogicalPlan]()
   }
 
   /** This method provides a way to get a cached plan. */
-  def getCachedPlan(t: FullQualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
+  def getCachedPlan(t: QualifiedTableName, c: Callable[LogicalPlan]): LogicalPlan = {
     tableRelationCache.get(t, c)
   }
 
   /** This method provides a way to get a cached plan if the key exists. */
-  def getCachedTable(key: FullQualifiedTableName): LogicalPlan = {
+  def getCachedTable(key: QualifiedTableName): LogicalPlan = {
     tableRelationCache.getIfPresent(key)
   }
 
   /** This method provides a way to cache a plan. */
-  def cacheTable(t: FullQualifiedTableName, l: LogicalPlan): Unit = {
+  def cacheTable(t: QualifiedTableName, l: LogicalPlan): Unit = {
     tableRelationCache.put(t, l)
   }
 
   /** This method provides a way to invalidate a cached plan. */
-  def invalidateCachedTable(key: FullQualifiedTableName): Unit = {
+  def invalidateCachedTable(key: QualifiedTableName): Unit = {
     tableRelationCache.invalidate(key)
   }
 
   /** This method discards any cached table relation plans for the given table identifier. */
   def invalidateCachedTable(name: TableIdentifier): Unit = {
     val qualified = qualifyIdentifier(name)
-    invalidateCachedTable(FullQualifiedTableName(
+    invalidateCachedTable(QualifiedTableName(
       qualified.catalog.get, qualified.database.get, qualified.table))
   }
 
@@ -297,7 +297,7 @@ class SessionCatalog(
     }
     if (cascade && databaseExists(dbName)) {
       listTables(dbName).foreach { t =>
-        invalidateCachedTable(FullQualifiedTableName(SESSION_CATALOG_NAME, dbName, t.table))
+        invalidateCachedTable(QualifiedTableName(SESSION_CATALOG_NAME, dbName, t.table))
       }
     }
     externalCatalog.dropDatabase(dbName, ignoreIfNotExists, cascade)
@@ -1132,7 +1132,7 @@ class SessionCatalog(
   def refreshTable(name: TableIdentifier): Unit = synchronized {
     getLocalOrGlobalTempView(name).map(_.refresh).getOrElse {
       val qualifiedIdent = qualifyIdentifier(name)
-      val qualifiedTableName = FullQualifiedTableName(
+      val qualifiedTableName = QualifiedTableName(
         qualifiedIdent.catalog.get, qualifiedIdent.database.get, qualifiedIdent.table)
       tableRelationCache.invalidate(qualifiedTableName)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index cc881539002b6..ceced9313940a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst
 
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
 /**
  * An identifier that optionally specifies a database.
  *
@@ -107,14 +109,25 @@ case class TableIdentifier(table: String, database: Option[String], catalog: Opt
 }
 
 /** A fully qualified identifier for a table (i.e., database.tableName) */
-case class QualifiedTableName(database: String, name: String) {
-  override def toString: String = s"$database.$name"
-}
+case class QualifiedTableName(catalog: String, database: String, name: String) {
+  /** Two argument ctor for backward compatibility. */
+  def this(database: String, name: String) = this(
+    catalog = CatalogManager.SESSION_CATALOG_NAME,
+    database = database,
+    name = name)
 
-case class FullQualifiedTableName(catalog: String, database: String, name: String) {
   override def toString: String = s"$catalog.$database.$name"
 }
 
+object QualifiedTableName {
+  def apply(catalog: String, database: String, name: String): QualifiedTableName = {
+    new QualifiedTableName(catalog, database, name)
+  }
+
+  def apply(database: String, name: String): QualifiedTableName =
+    new QualifiedTableName(database = database, name = name)
+}
+
 object TableIdentifier {
   def apply(tableName: String): TableIdentifier = new TableIdentifier(tableName)
   def apply(table: String, database: Option[String]): TableIdentifier =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 447a85fe4a41e..05c1c33520dac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -22,7 +22,7 @@ import scala.concurrent.duration._
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{AliasIdentifier, FullQualifiedTableName, FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, FunctionIdentifier, QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -1880,7 +1880,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     conf.setConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS, 1L)
 
     withConfAndEmptyCatalog(conf) { catalog =>
-      val table = FullQualifiedTableName(
+      val table = QualifiedTableName(
         CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "test")
 
       // First, make sure the test table is not cached.
@@ -1900,14 +1900,14 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
   test("SPARK-34197: refreshTable should not invalidate the relation cache for temporary views") {
     withBasicCatalog { catalog =>
       createTempView(catalog, "tbl1", Range(1, 10, 1, 10), false)
-      val qualifiedName1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "tbl1")
+      val qualifiedName1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "tbl1")
       catalog.cacheTable(qualifiedName1, Range(1, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl1"))
       assert(catalog.getCachedTable(qualifiedName1) != null)
 
       createGlobalTempView(catalog, "tbl2", Range(2, 10, 1, 10), false)
       val qualifiedName2 =
-        FullQualifiedTableName(SESSION_CATALOG_NAME, catalog.globalTempViewManager.database, "tbl2")
+        QualifiedTableName(SESSION_CATALOG_NAME, catalog.globalTempViewManager.database, "tbl2")
       catalog.cacheTable(qualifiedName2, Range(2, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl2", Some(catalog.globalTempViewManager.database)))
       assert(catalog.getCachedTable(qualifiedName2) != null)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 548a5734da06c..431480bb2edf2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, FullQualifiedTableName, InternalRow, SQLConfHelper}
+import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
@@ -242,7 +242,7 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
   private def readDataSourceTable(
       table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
     val qualifiedTableName =
-      FullQualifiedTableName(table.identifier.catalog.get, table.database, table.identifier.table)
+      QualifiedTableName(table.identifier.catalog.get, table.database, table.identifier.table)
     val catalog = sparkSession.sessionState.catalog
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 2e237a9b9ea11..4468edc483735 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -25,7 +25,7 @@ import java.time.LocalDateTime
 import scala.collection.mutable
 import scala.util.Random
 
-import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, CatalogTable, HiveTableRelation}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.AttributeMap
@@ -270,7 +270,7 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
 
   def getTableFromCatalogCache(tableName: String): LogicalPlan = {
     val catalog = spark.sessionState.catalog
-    val qualifiedTableName = FullQualifiedTableName(
+    val qualifiedTableName = QualifiedTableName(
       CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, tableName)
     catalog.getCachedTable(qualifiedTableName)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index f60e81d3a4c6f..9df4b0932f25d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -27,7 +27,7 @@ import scala.concurrent.duration.MICROSECONDS
 
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{FullQualifiedTableName, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{InternalRow, QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.parser.ParseException
@@ -3404,7 +3404,7 @@ class DataSourceV2SQLSuiteV1Filter
 
     // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
     // configure a new implementation.
-    val table1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    val table1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
     spark.sessionState.catalogManager.reset()
     withSQLConf(
       V2_SESSION_CATALOG_IMPLEMENTATION.key ->
@@ -3413,7 +3413,7 @@ class DataSourceV2SQLSuiteV1Filter
         checkParquet(table1.toString, path.getAbsolutePath)
       }
     }
-    val table2 = FullQualifiedTableName("testcat3", "default", "t")
+    val table2 = QualifiedTableName("testcat3", "default", "t")
     withSQLConf(
       "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
       withTempPath { path =>
@@ -3432,7 +3432,7 @@ class DataSourceV2SQLSuiteV1Filter
     // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
     // configure a new implementation.
     spark.sessionState.catalogManager.reset()
-    val table1 = FullQualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    val table1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
     withSQLConf(
       V2_SESSION_CATALOG_IMPLEMENTATION.key ->
         classOf[V2CatalogSupportBuiltinDataSource].getName) {
@@ -3441,7 +3441,7 @@ class DataSourceV2SQLSuiteV1Filter
       }
     }
 
-    val table2 = FullQualifiedTableName("testcat3", "default", "t")
+    val table2 = QualifiedTableName("testcat3", "default", "t")
     withSQLConf(
       "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
       withTempPath { path =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 0466781c119aa..1124184dded7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.permission.{AclEntry, AclStatus}
 import org.apache.spark.{SparkClassNotFoundException, SparkException, SparkFiles, SparkRuntimeException}
 import org.apache.spark.internal.config
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
-import org.apache.spark.sql.catalyst.{FullQualifiedTableName, FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
@@ -220,7 +220,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
   test("SPARK-25403 refresh the table after inserting data") {
     withTable("t") {
       val catalog = spark.sessionState.catalog
-      val table = FullQualifiedTableName(
+      val table = QualifiedTableName(
         CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
       sql("CREATE TABLE t (a INT) USING parquet")
       sql("INSERT INTO TABLE t VALUES (1)")
@@ -234,7 +234,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     withTable("t") {
       withTempDir { dir =>
         val catalog = spark.sessionState.catalog
-        val table = FullQualifiedTableName(
+        val table = QualifiedTableName(
           CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
         val p1 = s"${dir.getCanonicalPath}/p1"
         val p2 = s"${dir.getCanonicalPath}/p2"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
index 5810a35ddcf8b..747a378275019 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, FsAction, FsPermission}
 
 import org.apache.spark.sql.{AnalysisException, Row}
-import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.execution.command.FakeLocalFsFileSystem
@@ -148,7 +148,7 @@ trait TruncateTableSuiteBase extends command.TruncateTableSuiteBase {
 
           val catalog = spark.sessionState.catalog
           val qualifiedTableName =
-            FullQualifiedTableName(CatalogManager.SESSION_CATALOG_NAME, "ns", "tbl")
+            QualifiedTableName(CatalogManager.SESSION_CATALOG_NAME, "ns", "tbl")
           val cachedPlan = catalog.getCachedTable(qualifiedTableName)
           assert(cachedPlan.stats.sizeInBytes == 0)
         }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 00536135a1b1b..54b9db967d2dd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{AnalysisException, SparkSession}
-import org.apache.spark.sql.catalyst.{FullQualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
@@ -55,7 +55,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   private val tableCreationLocks = Striped.lazyWeakLock(100)
 
   /** Acquires a lock on the table cache for the duration of `f`. */
-  private def withTableCreationLock[A](tableName: FullQualifiedTableName, f: => A): A = {
+  private def withTableCreationLock[A](tableName: QualifiedTableName, f: => A): A = {
     val lock = tableCreationLocks.get(tableName)
     lock.lock()
     try f finally {
@@ -65,7 +65,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
 
   // For testing only
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
-    val key = FullQualifiedTableName(
+    val key = QualifiedTableName(
       // scalastyle:off caselocale
       table.catalog.getOrElse(CatalogManager.SESSION_CATALOG_NAME).toLowerCase,
       table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase,
@@ -75,7 +75,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   }
 
   private def getCached(
-      tableIdentifier: FullQualifiedTableName,
+      tableIdentifier: QualifiedTableName,
       pathsInMetastore: Seq[Path],
       schemaInMetastore: StructType,
       expectedFileFormat: Class[_ <: FileFormat],
@@ -193,7 +193,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       fileType: String,
       isWrite: Boolean): LogicalRelation = {
     val metastoreSchema = relation.tableMeta.schema
-    val tableIdentifier = FullQualifiedTableName(relation.tableMeta.identifier.catalog.get,
+    val tableIdentifier = QualifiedTableName(relation.tableMeta.identifier.catalog.get,
       relation.tableMeta.database, relation.tableMeta.identifier.table)
 
     val lazyPruningEnabled = sparkSession.sqlContext.conf.manageFilesourcePartitions

From 50c1783a1f97e336c8560fc03ef85ec7319672ea Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Thu, 26 Sep 2024 17:32:29 -0700
Subject: [PATCH 497/521] [SPARK-49803][SQL][TESTS] Increase
 `spark.test.docker.connectionTimeout` to 10min

This PR aims to increase `spark.test.docker.connectionTimeout` to 10min.

Recently, various DB images fails at `connection` stage on multiple branches.

**MASTER** branch
https://github.com/apache/spark/actions/runs/11045311764/job/30682732260

```
[info] OracleIntegrationSuite:
[info] org.apache.spark.sql.jdbc.OracleIntegrationSuite *** ABORTED *** (5 minutes, 17 seconds)
[info]   The code passed to eventually never returned normally. Attempted 298 times over 5.0045005511500005 minutes. Last failure message: ORA-12541: Cannot connect. No listener at host 10.1.0.41 port 41079. (CONNECTION_ID=n9ZWIh+nQn+G9fkwKyoBQA==)
```

**branch-3.5** branch
https://github.com/apache/spark/actions/runs/10939696926/job/30370552237

```
[info] MsSqlServerNamespaceSuite:
[info] org.apache.spark.sql.jdbc.v2.MsSqlServerNamespaceSuite *** ABORTED *** (5 minutes, 42 seconds)
[info]   The code passed to eventually never returned normally. Attempted 11 times over 5.487631282400001 minutes. Last failure message: The TCP/IP connection to the host 10.1.0.56, port 35345 has failed. Error: "Connection refused (Connection refused). Verify the connection properties. Make sure that an instance of SQL Server is running on the host and accepting TCP/IP connections at the port. Make sure that TCP connections to the port are not blocked by a firewall.".. (DockerJDBCIntegrationSuite.scala:166)
```

**branch-3.4** branch
https://github.com/apache/spark/actions/runs/10937842509/job/30364658576

```
[info] MsSqlServerNamespaceSuite:
[info] org.apache.spark.sql.jdbc.v2.MsSqlServerNamespaceSuite *** ABORTED *** (5 minutes, 42 seconds)
[info]   The code passed to eventually never returned normally. Attempted 11 times over 5.487555645633333 minutes. Last failure message: The TCP/IP connection to the host 10.1.0.153, port 46153 has failed. Error: "Connection refused (Connection refused). Verify the connection properties. Make sure that an instance of SQL Server is running on the host and accepting TCP/IP connections at the port. Make sure that TCP connections to the port are not blocked by a firewall.".. (DockerJDBCIntegrationSuite.scala:166)
```

No, this is a test-only change.

Pass the CIs.

No.

Closes #48272 from dongjoon-hyun/SPARK-49803.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 09b7aa67ce64d7d4ecc803215eaf85464df181c5)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 40e8cbb6546b5..55142e6d8de8b 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -97,7 +97,7 @@ abstract class DockerJDBCIntegrationSuite
 
   protected val dockerIp = DockerUtils.getDockerIp()
   val db: DatabaseOnDocker
-  val connectionTimeout = timeout(5.minutes)
+  val connectionTimeout = timeout(10.minutes)
   val keepContainer =
     sys.props.getOrElse("spark.test.docker.keepContainer", "false").toBoolean
   val removePulledImage =

From be254c5296427a7e11ac9342f25c31dfb5985018 Mon Sep 17 00:00:00 2001
From: "oleksii.diagiliev" <oleksii.diagiliev@workday.com>
Date: Thu, 26 Sep 2024 21:59:12 -0700
Subject: [PATCH 498/521] [SPARK-49804][K8S] Fix to use the exit code of
 executor container always

### What changes were proposed in this pull request?

When deploying Spark pods on Kubernetes with sidecars, the reported executor's exit code may be incorrect.
For example, the reported executor's exit code is 0(success), but the actual is 52 (OOM).
```
2024-09-25 02:35:29,383 ERROR TaskSchedulerImpl.logExecutorLoss - Lost executor 1 on XXXXX: The executor with
 id 1 exited with exit code 0(success).

The API gave the following container statuses:

     container name: fluentd
     container image: docker-images-release.XXXXX.com/XXXXX/fluentd:XXXXX
     container state: terminated
     container started at: 2024-09-25T02:32:17Z
     container finished at: 2024-09-25T02:34:52Z
     exit code: 0
     termination reason: Completed

     container name: istio-proxy
     container image: docker-images-release.XXXXX.com/XXXXX-istio/proxyv2:XXXXX
     container state: running
     container started at: 2024-09-25T02:32:16Z

     container name: spark-kubernetes-executor
     container image: docker-dev-artifactory.XXXXX.com/XXXXX/spark-XXXXX:XXXXX
     container state: terminated
     container started at: 2024-09-25T02:32:17Z
     container finished at: 2024-09-25T02:35:28Z
     exit code: 52
     termination reason: Error
```
The `ExecutorPodsLifecycleManager.findExitCode()` looks for any terminated container and may choose the sidecar instead of the main executor container. I'm changing it to look for the executor container always.
Note, it may happen that the pod fails because of the failure of the sidecar container while executor's container is still running, with my changes the reported exit code will be -1 (`UNKNOWN_EXIT_CODE`).

### Why are the changes needed?

To correctly report executor failure reason on UI, in the logs and for the event listeners `SparkListener.onExecutorRemoved()`

### Does this PR introduce _any_ user-facing change?

Yes, the executor's exit code is taken from the main container instead of the sidecar.

### How was this patch tested?

Added unit test and tested manually on the Kubernetes cluster by simulating different types of executor failure (JVM OOM and container eviction due to disk pressure on the node).

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #48275 from fe2s/SPARK-49804-fix-exit-code.

Lead-authored-by: oleksii.diagiliev <oleksii.diagiliev@workday.com>
Co-authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit 5d701f2d5add05b7af3889d6b87a192c11872298)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../k8s/ExecutorPodsLifecycleManager.scala    |  6 ++-
 .../k8s/ExecutorLifecycleTestUtils.scala      | 37 ++++++++++++++++++-
 .../ExecutorPodsLifecycleManagerSuite.scala   | 14 ++++++-
 3 files changed, 53 insertions(+), 4 deletions(-)

diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index 5d91070bcab20..5b645e6e97508 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -61,6 +61,9 @@ private[spark] class ExecutorPodsLifecycleManager(
 
   private val namespace = conf.get(KUBERNETES_NAMESPACE)
 
+  private val sparkContainerName = conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME)
+    .getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME)
+
   def start(schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     val eventProcessingInterval = conf.get(KUBERNETES_EXECUTOR_EVENT_PROCESSING_INTERVAL)
     snapshotsStore.addSubscriber(eventProcessingInterval) {
@@ -240,7 +243,8 @@ private[spark] class ExecutorPodsLifecycleManager(
 
   private def findExitCode(podState: FinalPodState): Int = {
     podState.pod.getStatus.getContainerStatuses.asScala.find { containerStatus =>
-      containerStatus.getState.getTerminated != null
+      containerStatus.getName == sparkContainerName &&
+        containerStatus.getState.getTerminated != null
     }.map { terminatedContainer =>
       terminatedContainer.getState.getTerminated.getExitCode.toInt
     }.getOrElse(UNKNOWN_EXIT_CODE)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index de9da0de7da2f..c19955424c052 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -29,6 +29,7 @@ import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 object ExecutorLifecycleTestUtils {
 
   val TEST_SPARK_APP_ID = "spark-app-id"
+  val TEST_SPARK_EXECUTOR_CONTAINER_NAME = "spark-executor"
 
   def failedExecutorWithoutDeletion(
       executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
@@ -37,7 +38,7 @@ object ExecutorLifecycleTestUtils {
         .withPhase("failed")
         .withStartTime(Instant.now.toString)
         .addNewContainerStatus()
-          .withName("spark-executor")
+          .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
           .withImage("k8s-spark")
           .withNewState()
             .withNewTerminated()
@@ -49,6 +50,38 @@ object ExecutorLifecycleTestUtils {
         .addNewContainerStatus()
           .withName("spark-executor-sidecar")
           .withImage("k8s-spark-sidecar")
+          .withNewState()
+            .withNewTerminated()
+              .withMessage("Failed")
+              .withExitCode(2)
+              .endTerminated()
+            .endState()
+          .endContainerStatus()
+        .withMessage("Executor failed.")
+        .withReason("Executor failed because of a thrown error.")
+        .endStatus()
+      .build()
+  }
+
+  def failedExecutorWithSidecarStatusListedFirst(
+      executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
+      .editOrNewStatus()
+        .withPhase("failed")
+        .withStartTime(Instant.now.toString)
+        .addNewContainerStatus() // sidecar status listed before executor's container status
+          .withName("spark-executor-sidecar")
+          .withImage("k8s-spark-sidecar")
+          .withNewState()
+            .withNewTerminated()
+              .withMessage("Failed")
+              .withExitCode(2)
+              .endTerminated()
+            .endState()
+          .endContainerStatus()
+        .addNewContainerStatus()
+          .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
+          .withImage("k8s-spark")
           .withNewState()
             .withNewTerminated()
               .withMessage("Failed")
@@ -200,7 +233,7 @@ object ExecutorLifecycleTestUtils {
       .endSpec()
       .build()
     val container = new ContainerBuilder()
-      .withName("spark-executor")
+      .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
       .withImage("k8s-spark")
       .build()
     SparkPod(pod, container)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index c34938caeca70..972cd79088dfc 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -33,6 +33,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config
+import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.deploy.k8s.KubernetesUtils._
@@ -60,6 +61,8 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
 
   before {
     MockitoAnnotations.openMocks(this).close()
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME, TEST_SPARK_EXECUTOR_CONTAINER_NAME)
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     namedExecutorPods = mutable.Map.empty[String, PodResource]
     when(schedulerBackend.getExecutorsWithRegistrationTs()).thenReturn(Map.empty[String, Long])
@@ -67,7 +70,7 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
     when(podOperations.inNamespace(anyString())).thenReturn(podsWithNamespace)
     when(podsWithNamespace.withName(any(classOf[String]))).thenAnswer(namedPodsAnswer())
     eventHandlerUnderTest = new ExecutorPodsLifecycleManager(
-      new SparkConf(),
+      sparkConf,
       kubernetesClient,
       snapshotsStore)
     eventHandlerUnderTest.start(schedulerBackend)
@@ -136,6 +139,15 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
       .edit(any[UnaryOperator[Pod]]())
   }
 
+  test("SPARK-49804: Use the exit code of executor container always") {
+    val failedPod = failedExecutorWithSidecarStatusListedFirst(1)
+    snapshotsStore.updatePod(failedPod)
+    snapshotsStore.notifySubscribers()
+    val msg = exitReasonMessage(1, failedPod, 1)
+    val expectedLossReason = ExecutorExited(1, exitCausedByApp = true, msg)
+    verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
+  }
+
   private def exitReasonMessage(execId: Int, failedPod: Pod, exitCode: Int): String = {
     val reason = Option(failedPod.getStatus.getReason)
     val message = Option(failedPod.getStatus.getMessage)

From f888d57368012af552965f91c548277365f3c369 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Fri, 27 Sep 2024 15:42:00 -0700
Subject: [PATCH 499/521] [SPARK-46525][SQL][TESTS][3.5] Fix
 `docker-integration-tests` on Apple Silicon
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What changes were proposed in this pull request?

This is a merged backport of SPARK-46525 with the original authorship, yaooqinn .
- #44509
- #44612
- #45303

`com.spotify.docker.client` is not going to support Apple Silicons as it has already been archived and the [jnr-unixsocket](https://mvnrepository.com/artifact/com.github.jnr/jnr-unixsocket) 0.18 it uses is not compatible with Apple Silicons.

If we run our docker IT tests on Apple Silicons, it will fail like

```java
[info] org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite *** ABORTED *** (2 seconds, 264 milliseconds)
[info]   com.spotify.docker.client.exceptions.DockerException: java.util.concurrent.ExecutionException:
com.spotify.docker.client.shaded.javax.ws.rs.ProcessingException:
java.lang.UnsatisfiedLinkError: could not load FFI provider jnr.ffi.provider.jffi.Provider
...
[info]   Cause: java.lang.IllegalStateException: Can't overwrite cause with java.lang.UnsatisfiedLinkError:
java.lang.UnsatisfiedLinkError: /Users/hzyaoqin/spark/target/tmp/jffi15403099445119552969.dylib:
dlopen(/Users/hzyaoqin/spark/target/tmp/jffi15403099445119552969.dylib, 0x0001): tried:
'/Users/hzyaoqin/spark/target/tmp/jffi15403099445119552969.dylib' (fat file, but missing compatible architecture (have 'i386,x86_64', need 'arm64')),
'/System/Volumes/Preboot/Cryptexes/OS/Users/hzyaoqin/spark/target/tmp/jffi15403099445119552969.dylib' (no such file), '/Users/hzyaoqin/spark/target/tmp/jffi15403099445119552969.dylib' (fat file, but missing compatible architecture (have 'i386,x86_64', need 'arm64'))
```

In this PR, we use its alternative to enable docker-related tests on Apple Chips

```xml
    <dependency>
      <groupId>com.github.docker-java</groupId>
      <artifactId>docker-java</artifactId>
      <scope>test</scope>
    </dependency>
```

### Why are the changes needed?

For developers who use Apple Silicons, w/ this patch, they can test JDBC/Docker Integration test locally instead of suffering slowness from GitHub actions.

### Does this PR introduce _any_ user-facing change?

No, dev only

### How was this patch tested?

Pass the CIs and do the manual test on Apple Silicon.
```
$ build/sbt -Pdocker-integration-tests 'docker-integration-tests/testOnly org.apache.spark.sql.jdbc.*MariaDB*'
...
[info] All tests passed.
[success] Total time: 157 s (02:37), completed Sep 27, 2024, 2:45:16 PM

$ build/sbt -Pdocker-integration-tests 'docker-integration-tests/testOnly org.apache.spark.sql.jdbc.*MySQL*'
...
[info] All tests passed.
[success] Total time: 109 s (01:49), completed Sep 27, 2024, 2:48:47 PM
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48289 from dongjoon-hyun/SPARK-46525.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 connector/docker-integration-tests/pom.xml    |  34 ++---
 .../sql/jdbc/DB2KrbIntegrationSuite.scala     |  15 +-
 .../sql/jdbc/DockerJDBCIntegrationSuite.scala | 131 ++++++++++--------
 .../sql/jdbc/MariaDBKrbIntegrationSuite.scala |  18 +--
 .../jdbc/PostgresKrbIntegrationSuite.scala    |  16 +--
 pom.xml                                       |  25 ++--
 project/SparkBuild.scala                      |   3 +-
 7 files changed, 130 insertions(+), 112 deletions(-)

diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index d655d1a552814..19377b36a612f 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -46,22 +46,6 @@
   </repositories>
 
   <dependencies>
-    <dependency>
-      <groupId>com.spotify</groupId>
-      <artifactId>docker-client</artifactId>
-      <scope>test</scope>
-      <classifier>shaded</classifier>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpclient</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpcore</artifactId>
-      <scope>test</scope>
-    </dependency>
     <!-- Necessary in order to avoid errors in log messages: -->
     <dependency>
       <groupId>com.google.guava</groupId>
@@ -112,14 +96,6 @@
       <artifactId>hadoop-minikdc</artifactId>
       <scope>test</scope>
     </dependency>
-    <!-- Although SPARK-28737 upgraded Jersey to 2.29 for JDK11, 'com.spotify.docker-client' still
-      uses this repackaged 'jersey-guava'. We add this back for JDK8/JDK11 testing. -->
-    <dependency>
-      <groupId>org.glassfish.jersey.bundles.repackaged</groupId>
-      <artifactId>jersey-guava</artifactId>
-      <version>2.25.1</version>
-      <scope>test</scope>
-    </dependency>
     <dependency>
       <groupId>org.mariadb.jdbc</groupId>
       <artifactId>mariadb-java-client</artifactId>
@@ -167,5 +143,15 @@
       <artifactId>mysql-connector-j</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>com.github.docker-java</groupId>
+      <artifactId>docker-java</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.github.docker-java</groupId>
+      <artifactId>docker-java-transport-zerodep</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index 9b518d61d252f..66e2afbb6effd 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -21,7 +21,7 @@ import java.security.PrivilegedExceptionAction
 import java.sql.Connection
 import javax.security.auth.login.Configuration
 
-import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
+import com.github.dockerjava.api.model.{AccessMode, Bind, ContainerConfig, HostConfig, Volume}
 import org.apache.hadoop.security.{SecurityUtil, UserGroupInformation}
 import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod.KERBEROS
 import org.scalatest.time.SpanSugar._
@@ -66,14 +66,15 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
     }
 
     override def beforeContainerStart(
-        hostConfigBuilder: HostConfig.Builder,
-        containerConfigBuilder: ContainerConfig.Builder): Unit = {
+        hostConfigBuilder: HostConfig,
+        containerConfigBuilder: ContainerConfig): Unit = {
       copyExecutableResource("db2_krb_setup.sh", initDbDir, replaceIp)
 
-      hostConfigBuilder.appendBinds(
-        HostConfig.Bind.from(initDbDir.getAbsolutePath)
-          .to("/var/custom").readOnly(true).build()
-      )
+      val newBind = new Bind(
+        initDbDir.getAbsolutePath,
+        new Volume("/var/custom"),
+        AccessMode.ro)
+      hostConfigBuilder.withBinds(hostConfigBuilder.getBinds :+ newBind: _*)
     }
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 55142e6d8de8b..837382239514a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -20,14 +20,18 @@ package org.apache.spark.sql.jdbc
 import java.net.ServerSocket
 import java.sql.{Connection, DriverManager}
 import java.util.Properties
+import java.util.concurrent.TimeUnit
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 
-import com.spotify.docker.client._
-import com.spotify.docker.client.DockerClient.{ListContainersParam, LogsParam}
-import com.spotify.docker.client.exceptions.ImageNotFoundException
-import com.spotify.docker.client.messages.{ContainerConfig, HostConfig, PortBinding}
+import com.github.dockerjava.api.DockerClient
+import com.github.dockerjava.api.async.{ResultCallback, ResultCallbackTemplate}
+import com.github.dockerjava.api.command.CreateContainerResponse
+import com.github.dockerjava.api.exception.NotFoundException
+import com.github.dockerjava.api.model._
+import com.github.dockerjava.core.{DefaultDockerClientConfig, DockerClientImpl}
+import com.github.dockerjava.zerodep.ZerodepDockerHttpClient
 import org.scalatest.concurrent.Eventually
 import org.scalatest.time.SpanSugar._
 
@@ -88,8 +92,8 @@ abstract class DatabaseOnDocker {
    * Optional step before container starts
    */
   def beforeContainerStart(
-      hostConfigBuilder: HostConfig.Builder,
-      containerConfigBuilder: ContainerConfig.Builder): Unit = {}
+      hostConfigBuilder: HostConfig,
+      containerConfigBuilder: ContainerConfig): Unit = {}
 }
 
 abstract class DockerJDBCIntegrationSuite
@@ -111,56 +115,75 @@ abstract class DockerJDBCIntegrationSuite
     sock.close()
     port
   }
-  private var containerId: String = _
+  private var container: CreateContainerResponse = _
   private var pulled: Boolean = false
   protected var jdbcUrl: String = _
 
   override def beforeAll(): Unit = runIfTestsEnabled(s"Prepare for ${this.getClass.getName}") {
     super.beforeAll()
     try {
-      docker = DefaultDockerClient.fromEnv.build()
+      val config = DefaultDockerClientConfig.createDefaultConfigBuilder.build
+      val httpClient = new ZerodepDockerHttpClient.Builder()
+        .dockerHost(config.getDockerHost)
+        .sslConfig(config.getSSLConfig)
+        .build()
+      docker = DockerClientImpl.getInstance(config, httpClient)
       // Check that Docker is actually up
       try {
-        docker.ping()
+        docker.pingCmd().exec()
       } catch {
         case NonFatal(e) =>
           log.error("Exception while connecting to Docker. Check whether Docker is running.")
           throw e
       }
-      // Ensure that the Docker image is installed:
       try {
-        docker.inspectImage(db.imageName)
+        // Ensure that the Docker image is installed:
+        docker.inspectImageCmd(db.imageName).exec()
       } catch {
-        case e: ImageNotFoundException =>
+        case e: NotFoundException =>
           log.warn(s"Docker image ${db.imageName} not found; pulling image from registry")
-          docker.pull(db.imageName)
+          docker.pullImageCmd(db.imageName)
+            .start()
+            .awaitCompletion(connectionTimeout.value.toSeconds, TimeUnit.SECONDS)
           pulled = true
       }
-      val hostConfigBuilder = HostConfig.builder()
-        .privileged(db.privileged)
-        .networkMode("bridge")
-        .ipcMode(if (db.usesIpc) "host" else "")
-        .portBindings(
-          Map(s"${db.jdbcPort}/tcp" -> List(PortBinding.of(dockerIp, externalPort)).asJava).asJava)
-      // Create the database container:
-      val containerConfigBuilder = ContainerConfig.builder()
-        .image(db.imageName)
-        .networkDisabled(false)
-        .env(db.env.map { case (k, v) => s"$k=$v" }.toSeq.asJava)
-        .exposedPorts(s"${db.jdbcPort}/tcp")
-      if (db.getEntryPoint.isDefined) {
-        containerConfigBuilder.entrypoint(db.getEntryPoint.get)
-      }
-      if (db.getStartupProcessName.isDefined) {
-        containerConfigBuilder.cmd(db.getStartupProcessName.get)
+
+      docker.pullImageCmd(db.imageName)
+        .start()
+        .awaitCompletion(connectionTimeout.value.toSeconds, TimeUnit.SECONDS)
+
+      val hostConfig = HostConfig
+        .newHostConfig()
+        .withNetworkMode("bridge")
+        .withPrivileged(db.privileged)
+        .withPortBindings(PortBinding.parse(s"$externalPort:${db.jdbcPort}"))
+
+      if (db.usesIpc) {
+        hostConfig.withIpcMode("host")
       }
-      db.beforeContainerStart(hostConfigBuilder, containerConfigBuilder)
-      containerConfigBuilder.hostConfig(hostConfigBuilder.build())
-      val config = containerConfigBuilder.build()
+
+      val containerConfig = new ContainerConfig()
+
+      db.beforeContainerStart(hostConfig, containerConfig)
+
       // Create the database container:
-      containerId = docker.createContainer(config).id
+      val createContainerCmd = docker.createContainerCmd(db.imageName)
+        .withHostConfig(hostConfig)
+        .withExposedPorts(ExposedPort.tcp(db.jdbcPort))
+        .withEnv(db.env.map { case (k, v) => s"$k=$v" }.toList.asJava)
+        .withNetworkDisabled(false)
+
+
+      db.getEntryPoint.foreach(ep => createContainerCmd.withEntrypoint(ep))
+      db.getStartupProcessName.foreach(n => createContainerCmd.withCmd(n))
+
+      container = createContainerCmd.exec()
       // Start the container and wait until the database can accept JDBC connections:
-      docker.startContainer(containerId)
+      docker.startContainerCmd(container.getId).exec()
+      eventually(connectionTimeout, interval(1.second)) {
+        val response = docker.inspectContainerCmd(container.getId).exec()
+        assert(response.getState.getRunning)
+      }
       jdbcUrl = db.getJdbcUrl(dockerIp, externalPort)
       var conn: Connection = null
       eventually(connectionTimeout, interval(1.second)) {
@@ -174,6 +197,7 @@ abstract class DockerJDBCIntegrationSuite
       }
     } catch {
       case NonFatal(e) =>
+        logError(s"Failed to initialize Docker container for ${this.getClass.getName}", e)
         try {
           afterAll()
         } finally {
@@ -206,36 +230,35 @@ abstract class DockerJDBCIntegrationSuite
   def dataPreparation(connection: Connection): Unit
 
   private def cleanupContainer(): Unit = {
-    if (docker != null && containerId != null && !keepContainer) {
+    if (docker != null && container != null && !keepContainer) {
       try {
-        docker.killContainer(containerId)
+        docker.killContainerCmd(container.getId).exec()
       } catch {
         case NonFatal(e) =>
-          val exitContainerIds =
-            docker.listContainers(ListContainersParam.withStatusExited()).asScala.map(_.id())
-          if (exitContainerIds.contains(containerId)) {
-            logWarning(s"Container $containerId already stopped")
-          } else {
-            logWarning(s"Could not stop container $containerId", e)
-          }
+          val response = docker.inspectContainerCmd(container.getId).exec()
+          logWarning(s"Container $container already stopped")
+          val status = Option(response).map(_.getState.getStatus).getOrElse("unknown")
+          logWarning(s"Could not stop container $container at stage '$status'", e)
       } finally {
         logContainerOutput()
-        docker.removeContainer(containerId)
+        docker.removeContainerCmd(container.getId).exec()
         if (removePulledImage && pulled) {
-          docker.removeImage(db.imageName)
+          docker.removeImageCmd(db.imageName).exec()
         }
       }
     }
   }
 
   private def logContainerOutput(): Unit = {
-    val logStream = docker.logs(containerId, LogsParam.stdout(), LogsParam.stderr())
-    try {
-      logInfo("\n\n===== CONTAINER LOGS FOR container Id: " + containerId + " =====")
-      logInfo(logStream.readFully())
-      logInfo("\n\n===== END OF CONTAINER LOGS FOR container Id: " + containerId + " =====")
-    } finally {
-      logStream.close()
-    }
+    logInfo("\n\n===== CONTAINER LOGS FOR container Id: " + container + " =====")
+    docker.logContainerCmd(container.getId)
+      .withStdOut(true)
+      .withStdErr(true)
+      .withFollowStream(true)
+      .withSince(0).exec(
+      new ResultCallbackTemplate[ResultCallback[Frame], Frame] {
+        override def onNext(f: Frame): Unit = logInfo(f.toString)
+      })
+    logInfo("\n\n===== END OF CONTAINER LOGS FOR container Id: " + container + " =====")
   }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index 873d5ad1ee43b..49c9e3dba0d7f 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import javax.security.auth.login.Configuration
 
-import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
+import com.github.dockerjava.api.model.{AccessMode, Bind, ContainerConfig, HostConfig, Volume}
 
 import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider
 import org.apache.spark.tags.DockerTest
@@ -52,17 +52,17 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
       Some("/docker-entrypoint/mariadb_docker_entrypoint.sh")
 
     override def beforeContainerStart(
-        hostConfigBuilder: HostConfig.Builder,
-        containerConfigBuilder: ContainerConfig.Builder): Unit = {
+        hostConfigBuilder: HostConfig,
+        containerConfigBuilder: ContainerConfig): Unit = {
       copyExecutableResource("mariadb_docker_entrypoint.sh", entryPointDir, replaceIp)
       copyExecutableResource("mariadb_krb_setup.sh", initDbDir, replaceIp)
 
-      hostConfigBuilder.appendBinds(
-        HostConfig.Bind.from(entryPointDir.getAbsolutePath)
-          .to("/docker-entrypoint").readOnly(true).build(),
-        HostConfig.Bind.from(initDbDir.getAbsolutePath)
-          .to("/docker-entrypoint-initdb.d").readOnly(true).build()
-      )
+      val binds =
+        Seq(entryPointDir -> "/docker-entrypoint", initDbDir -> "/docker-entrypoint-initdb.d")
+          .map { case (from, to) =>
+            new Bind(from.getAbsolutePath, new Volume(to), AccessMode.ro)
+          }
+      hostConfigBuilder.withBinds(hostConfigBuilder.getBinds ++ binds: _*)
     }
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index 667d8c7786187..1dcf101b394a4 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.jdbc
 
 import javax.security.auth.login.Configuration
 
-import com.spotify.docker.client.messages.{ContainerConfig, HostConfig}
+import com.github.dockerjava.api.model.{AccessMode, Bind, ContainerConfig, HostConfig, Volume}
 
 import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnectionProvider
 import org.apache.spark.tags.DockerTest
@@ -48,14 +48,14 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
       s"jdbc:postgresql://$ip:$port/postgres?user=$principal&gsslib=gssapi"
 
     override def beforeContainerStart(
-        hostConfigBuilder: HostConfig.Builder,
-        containerConfigBuilder: ContainerConfig.Builder): Unit = {
+        hostConfigBuilder: HostConfig,
+        containerConfigBuilder: ContainerConfig): Unit = {
       copyExecutableResource("postgres_krb_setup.sh", initDbDir, replaceIp)
-
-      hostConfigBuilder.appendBinds(
-        HostConfig.Bind.from(initDbDir.getAbsolutePath)
-          .to("/docker-entrypoint-initdb.d").readOnly(true).build()
-      )
+      val newBind = new Bind(
+        initDbDir.getAbsolutePath,
+        new Volume("/docker-entrypoint-initdb.d"),
+        AccessMode.ro)
+      hostConfigBuilder.withBinds(hostConfigBuilder.getBinds :+ newBind: _*)
     }
   }
 
diff --git a/pom.xml b/pom.xml
index 04acbdb3cd6e4..3d9b003bd19c8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1219,22 +1219,31 @@
         <scope>test</scope>
       </dependency>
       <dependency>
-        <groupId>com.spotify</groupId>
-        <artifactId>docker-client</artifactId>
-        <version>8.14.1</version>
+        <groupId>com.github.docker-java</groupId>
+        <artifactId>docker-java</artifactId>
+        <version>3.3.4</version>
         <scope>test</scope>
-        <classifier>shaded</classifier>
         <exclusions>
-          <exclusion>
-            <artifactId>guava</artifactId>
-            <groupId>com.google.guava</groupId>
-          </exclusion>
           <exclusion>
             <groupId>commons-logging</groupId>
             <artifactId>commons-logging</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.github.docker-java</groupId>
+            <artifactId>docker-java-transport-netty</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>com.github.docker-java</groupId>
+            <artifactId>docker-java-transport-jersey</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>com.github.docker-java</groupId>
+        <artifactId>docker-java-transport-zerodep</artifactId>
+        <version>3.3.4</version>
+        <scope>test</scope>
+      </dependency>
       <dependency>
         <groupId>com.mysql</groupId>
         <artifactId>mysql-connector-j</artifactId>
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index e8c52dc0aff3b..f8659a4f4a257 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -469,8 +469,7 @@ object SparkBuild extends PomBuild {
   /* Protobuf settings */
   enable(SparkProtobuf.settings)(protobuf)
 
-  // SPARK-14738 - Remove docker tests from main Spark build
-  // enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
+  enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
 
   if (!profiles.contains("volcano")) {
     enable(Volcano.settings)(kubernetes)

From a49d6f42f598c15e3394baa15fa6d083c241a8f4 Mon Sep 17 00:00:00 2001
From: Kent Yao <yao@apache.org>
Date: Tue, 23 Apr 2024 07:44:15 -0700
Subject: [PATCH 500/521] [SPARK-47949][SQL][DOCKER][TESTS] MsSQLServer: Bump
 up mssql docker image version to 2022-CU12-GDR1-ubuntu-22.04

### What changes were proposed in this pull request?

This PR umps up mssql docker image version to 2022-CU12-GDR1-ubuntu-22.04

FYI, https://mcr.microsoft.com/en-us/product/mssql/server/tags

### Why are the changes needed?

dependency mgr

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

existing tests

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #46176 from yaooqinn/SPARK-47949.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Dongjoon Hyun <dhyun@apple.com>
(cherry picked from commit 9c4f12ca04ac36ab6e23c0a896266b3a25e6d05d)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../jdbc/MsSQLServerDatabaseOnDocker.scala    | 32 +++++++++++++++++++
 .../jdbc/MsSqlServerIntegrationSuite.scala    | 14 +-------
 .../jdbc/v2/MsSqlServerIntegrationSuite.scala | 16 ++--------
 .../jdbc/v2/MsSqlServerNamespaceSuite.scala   | 17 ++--------
 4 files changed, 37 insertions(+), 42 deletions(-)
 create mode 100644 connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala

diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
new file mode 100644
index 0000000000000..b351b2ad1ec7d
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+class MsSQLServerDatabaseOnDocker extends DatabaseOnDocker {
+  override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
+    "mcr.microsoft.com/mssql/server:2022-CU12-GDR1-ubuntu-22.04")
+  override val env = Map(
+    "SA_PASSWORD" -> "Sapass123",
+    "ACCEPT_EULA" -> "Y"
+  )
+  override val usesIpc = false
+  override val jdbcPort: Int = 1433
+
+  override def getJdbcUrl(ip: String, port: Int): String =
+    s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index f2614f46bc3f6..443000050a476 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -38,19 +38,7 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
+  override val db = new MsSQLServerDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE TABLE tbl (x INT, y VARCHAR (50))").executeUpdate()
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 0bb2ea8249b39..de8fcf1a4a787 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -24,7 +24,7 @@ import org.scalatest.time.SpanSugar._
 import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
@@ -60,19 +60,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
     "scan with aggregate push-down: REGR_SXY without DISTINCT")
 
   override val catalogName: String = "mssql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
+  override val db = new MsSQLServerDatabaseOnDocker
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.mssql", classOf[JDBCTableCatalog].getName)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
index b0a2d37e465ac..de0ae5d59716b 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
@@ -21,7 +21,7 @@ import java.sql.Connection
 
 import scala.collection.JavaConverters._
 
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.{DockerJDBCIntegrationSuite, MsSQLServerDatabaseOnDocker}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
@@ -35,20 +35,7 @@ import org.apache.spark.tags.DockerTest
  */
 @DockerTest
 class MsSqlServerNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-      "mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04")
-    override val env = Map(
-      "SA_PASSWORD" -> "Sapass123",
-      "ACCEPT_EULA" -> "Y"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1433
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:sqlserver://$ip:$port;user=sa;password=Sapass123;"
-  }
-
+  override val db = new MsSQLServerDatabaseOnDocker
   val map = new CaseInsensitiveStringMap(
     Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
       "driver" -> "com.microsoft.sqlserver.jdbc.SQLServerDriver").asJava)

From 16b8153063b2a6ae6231535e822486f0d9df6ad5 Mon Sep 17 00:00:00 2001
From: Nick Young <nick.young@databricks.com>
Date: Mon, 30 Sep 2024 20:44:51 +0800
Subject: [PATCH 501/521] [SPARK-49819] Disable CollapseProject for correlated
 subqueries in projection over aggregate correctly

### What changes were proposed in this pull request?

CollapseProject should block collapsing with an aggregate if any correlated subquery is present. There are other correlated subqueries that are not ScalarSubquery that are not accounted for here.

### Why are the changes needed?

Availability issue.

### Does this PR introduce _any_ user-facing change?

Previously failing queries will not fail anymore.

### How was this patch tested?

UT.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48286 from n-young-db/n-young-db/collapse-project-correlated-subquery-check.

Lead-authored-by: Nick Young <nick.young@databricks.com>
Co-authored-by: Wenchen Fan <cloud0fan@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit 97ae372634b119b2b67304df67463b95b20febd9)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../sql/catalyst/optimizer/Optimizer.scala     |  8 +++-----
 .../org/apache/spark/sql/SubquerySuite.scala   | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 106b5396d3a8c..9e1d264be98f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression.hasCorrelatedSubquery
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{RepartitionOperation, _}
@@ -1164,11 +1165,8 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
    * in aggregate if they are also part of the grouping expressions. Otherwise the plan
    * after subquery rewrite will not be valid.
    */
-  private def canCollapseAggregate(p: Project, a: Aggregate): Boolean = {
-    p.projectList.forall(_.collect {
-      case s: ScalarSubquery if s.outerAttrs.nonEmpty => s
-    }.isEmpty)
-  }
+  private def canCollapseAggregate(p: Project, a: Aggregate): Boolean =
+    !p.projectList.exists(hasCorrelatedSubquery)
 
   def buildCleanedProjectList(
       upper: Seq[NamedExpression],
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index fbc256b33968a..260c992f1aed1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -2212,6 +2212,24 @@ class SubquerySuite extends QueryTest
     }
   }
 
+  test("SPARK-49819: Do not collapse projects with exist subqueries") {
+    withTempView("v") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
+      checkAnswer(
+        sql("""
+              |SELECT m, CASE WHEN EXISTS (SELECT SUM(c2) FROM v WHERE c1 = m) THEN 1 ELSE 0 END
+              |FROM (SELECT MIN(c2) AS m FROM v)
+              |""".stripMargin),
+        Row(1, 1) :: Nil)
+      checkAnswer(
+        sql("""
+              |SELECT c, CASE WHEN EXISTS (SELECT SUM(c2) FROM v WHERE c1 = c) THEN 1 ELSE 0 END
+              |FROM (SELECT c1 AS c FROM v GROUP BY c1)
+              |""".stripMargin),
+        Row(0, 1) :: Row(1, 1) :: Nil)
+    }
+  }
+
   test("SPARK-37199: deterministic in QueryPlan considers subquery") {
     val deterministicQueryPlan = sql("select (select 1 as b) as b")
       .queryExecution.executedPlan

From 75860a3cc50f366403d396a27eaa726f6860519a Mon Sep 17 00:00:00 2001
From: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Date: Tue, 1 Oct 2024 09:49:00 +0800
Subject: [PATCH 502/521] [SPARK-49743][SQL] OptimizeCsvJsonExpr should not
 change schema fields when pruning GetArrayStructFields

### What changes were proposed in this pull request?
- Cherry-pick of the original PR - https://github.com/apache/spark/pull/48190 to Spark 3.5

- When pruning the schema of the struct in `GetArrayStructFields`, rely on the existing `StructType` to obtain the pruned schema instead of using the accessed field.

### Why are the changes needed?

- Fixes a bug in `OptimizeCsvJsonExprs` rule that would have otherwise changed the schema fields of the underlying struct to be extracted.
- This would show up as a correctness issue where for a field instead of picking the right values we would have ended up giving null output.

### Does this PR introduce _any_ user-facing change?

Yes. The query output would change for the queries of the following type:
```
SELECT
  from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').a,
  from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').A
FROM
  range(3) as t
```

Earlier, the result would had been:
```
Array([ArraySeq(0),ArraySeq(null)], [ArraySeq(1),ArraySeq(null)], [ArraySeq(2),ArraySeq(null)])
```
vs the new result is (verified through spark-shell):
```
Array([ArraySeq(0),ArraySeq(0)], [ArraySeq(1),ArraySeq(1)], [ArraySeq(2),ArraySeq(2)])
```

### How was this patch tested?
- Added unit tests.
- Without this change, the added test would fail as we would have modified the schema from `a` to `A`:
```
- SPARK-49743: prune unnecessary columns from GetArrayStructFields does not change schema *** FAILED ***
  == FAIL: Plans do not match ===
  !Project [from_json(ArrayType(StructType(StructField(A,IntegerType,true)),true), json#0, Some(America/Los_Angeles)).A AS a#0]   Project [from_json(ArrayType(StructType(S
tructField(a,IntegerType,true)),true), json#0, Some(America/Los_Angeles)).A AS a#0]
   +- LocalRelation <empty>, [json#0]                                                                                             +- LocalRelation <empty>, [json#0] (PlanT
est.scala:179)
```

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48308 from nikhilsheoran-db/SPARK-49743-3.5.

Authored-by: Nikhil Sheoran <125331115+nikhilsheoran-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/OptimizeCsvJsonExprs.scala        |  7 ++++---
 .../optimizer/OptimizeJsonExprsSuite.scala      | 17 +++++++++++++++++
 .../org/apache/spark/sql/SQLQuerySuite.scala    | 13 +++++++++++++
 3 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
index 4347137bf68b8..04cc230f99b44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
@@ -112,9 +112,10 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
       val prunedSchema = StructType(Array(schema(ordinal)))
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
 
-    case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
-        if schema.elementType.asInstanceOf[StructType].length > 1 && j.options.isEmpty =>
-      val prunedSchema = ArrayType(StructType(Array(g.field)), g.containsNull)
+    case g @ GetArrayStructFields(j @ JsonToStructs(ArrayType(schema: StructType, _),
+        _, _, _), _, ordinal, _, _) if schema.length > 1 && j.options.isEmpty =>
+      // Obtain the pruned schema by picking the `ordinal` field of the struct.
+      val prunedSchema = ArrayType(StructType(Array(schema(ordinal))), g.containsNull)
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index c185de4c05d88..eed06da609f8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -307,4 +307,21 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
       comparePlans(optimized, query.analyze)
     }
   }
+
+  test("SPARK-49743: prune unnecessary columns from GetArrayStructFields does not change schema") {
+    val options = Map.empty[String, String]
+    val schema = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true)
+
+    val field = StructField("A", IntegerType) // Instead of "a", use "A" to test case sensitivity.
+    val query = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(schema, options, $"json"), field, 0, 2, true).as("a"))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val prunedSchema = ArrayType(StructType.fromDDL("a int"), containsNull = true)
+    val expected = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(prunedSchema, options, $"json"), field, 0, 1, true).as("a")).analyze
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 71f4f17de61d9..793a0da6a8622 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -4711,6 +4711,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     val df6 = df3.join(df2, col("df3.zaak_id") === col("df2.customer_id"), "outer")
     df5.crossJoin(df6)
   }
+
+  test("SPARK-49743: OptimizeCsvJsonExpr does not change schema when pruning struct") {
+    val df = sql("""
+        | SELECT
+        |    from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').a,
+        |    from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').A
+        | FROM
+        |    range(3) as t
+        |""".stripMargin)
+    val expectedAnswer = Seq(
+      Row(Array(0), Array(0)), Row(Array(1), Array(1)), Row(Array(2), Array(2)))
+    checkAnswer(df, expectedAnswer)
+  }
 }
 
 case class Foo(bar: Option[String])

From d9467b54349f7db7eadba1ca9efbffcbc707534a Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Mon, 30 Sep 2024 23:34:57 -0700
Subject: [PATCH 503/521] [SPARK-49816][SQL][3.5] Should only update
 out-going-ref-count for referenced outer CTE relation

backport https://github.com/apache/spark/pull/48284 to 3.5

### What changes were proposed in this pull request?

This PR fixes a long-standing reference counting bug in the rule `InlineCTE`. Let's look at the minimal repro:
```
      sql(
        """
          |WITH
          |t1 AS (SELECT 1 col),
          |t2 AS (SELECT * FROM t1)
          |SELECT * FROM t2
          |""".stripMargin).createTempView("v")
      // r1 is un-referenced, but it should not decrease the ref count of t2 inside view v.
      val df = sql(
        """
          |WITH
          |r1 AS (SELECT * FROM v),
          |r2 AS (SELECT * FROM v)
          |SELECT * FROM r2
          |""".stripMargin)
```
The logical plan is something like below
```
WithCTE
  CTEDef r1
    View v
      WithCTE
        CTEDef t1
          OneRowRelation
        CTEDef t2
          CTERef t1
        CTERef t2    // main query of the inner WithCTE
  CTEDef r2
    View v   // exactly the same as the view v above
      WithCTE
        CTEDef t1
          OneRowRelation
        CTEDef t2
          CTERef t1
        CTERef t2
  CTERef r2    // main query of the outer WithCTE
```
Ideally, the ref count of `t1`, `t2` and `r2` should be all `1`. They will be inlined and the final plan is the `OneRowRelation`. However, in `InlineCTE#buildCTEMap`, when we traverse into `CTEDef r1` and hit `CTERef t2`, we mistakenly update the out-going-ref-count of `r1`, which means that `r1` references `t2` and this is totally wrong. Later on, in `InlineCTE#cleanCTEMap`, we find that `r1` is not referenced at all, so we decrease the ref count of its out-going-ref, which is `t2`, and the ref count of `t2` becomes `0`. Finally, in `InlineCTE#inlineCTE`, we leave the plan of `t2` unchanged because its ref count is `0`, and the plan of `t2` contains `CTERef t1`. `t2` is still inlined so we end up with `CTERef t1` as the final plan without the `WithCTE` node.

### Why are the changes needed?

bug fix

### Does this PR introduce _any_ user-facing change?

Yes, the query failed before and now can work

### How was this patch tested?

new test

### Was this patch authored or co-authored using generative AI tooling?

no

Closes #48301 from cloud-fan/cte.

Authored-by: Wenchen Fan <wenchen@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../sql/catalyst/optimizer/InlineCTE.scala    | 32 ++++++++++++-------
 .../org/apache/spark/sql/CTEInlineSuite.scala | 21 ++++++++++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index 8d7ff4cbf163d..69adf3e15cf44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -79,41 +79,51 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
    *               - The number of incoming references to the CTE. This includes references from
    *                 other CTEs and regular places.
    *               - A mutable inner map that tracks outgoing references (counts) to other CTEs.
-   * @param outerCTEId While collecting the map we use this optional CTE id to identify the
-   *                   current outer CTE.
+   * @param collectCTERefs A function to collect CTE references so that the caller side can do some
+   *                       bookkeeping work.
    */
   def buildCTEMap(
       plan: LogicalPlan,
       cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
-      outerCTEId: Option[Long] = None): Unit = {
+      collectCTERefs: CTERelationRef => Unit = _ => ()): Unit = {
     plan match {
       case WithCTE(child, cteDefs) =>
         cteDefs.foreach { cteDef =>
           cteMap(cteDef.id) = (cteDef, 0, mutable.Map.empty.withDefaultValue(0))
         }
         cteDefs.foreach { cteDef =>
-          buildCTEMap(cteDef, cteMap, Some(cteDef.id))
+          buildCTEMap(cteDef, cteMap, ref => {
+            // A CTE relation can references CTE relations defined before it in the same `WithCTE`.
+            // Here we update the out-going-ref-count for it, in case this CTE relation is not
+            // referenced at all and can be optimized out, and we need to decrease the ref counts
+            // for CTE relations that are referenced by it.
+            if (cteDefs.exists(_.id == ref.cteId)) {
+              val (_, _, outerRefMap) = cteMap(cteDef.id)
+              outerRefMap(ref.cteId) += 1
+            }
+            // Similarly, a CTE relation can reference CTE relations defined in the outer `WithCTE`.
+            // Here we call the `collectCTERefs` function so that the outer CTE can also update the
+            // out-going-ref-count if needed.
+            collectCTERefs(ref)
+          })
         }
-        buildCTEMap(child, cteMap, outerCTEId)
+        buildCTEMap(child, cteMap, collectCTERefs)
 
       case ref: CTERelationRef =>
         val (cteDef, refCount, refMap) = cteMap(ref.cteId)
         cteMap(ref.cteId) = (cteDef, refCount + 1, refMap)
-        outerCTEId.foreach { cteId =>
-          val (_, _, outerRefMap) = cteMap(cteId)
-          outerRefMap(ref.cteId) += 1
-        }
+        collectCTERefs(ref)
 
       case _ =>
         if (plan.containsPattern(CTE)) {
           plan.children.foreach { child =>
-            buildCTEMap(child, cteMap, outerCTEId)
+            buildCTEMap(child, cteMap, collectCTERefs)
           }
 
           plan.expressions.foreach { expr =>
             if (expr.containsAllPatterns(PLAN_EXPRESSION, CTE)) {
               expr.foreach {
-                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, outerCTEId)
+                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, collectCTERefs)
                 case _ =>
               }
             }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 5f6c44792658a..73f5b742715eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -678,6 +678,27 @@ abstract class CTEInlineSuiteBase
       }.isDefined, "CTE columns should not be pruned.")
     }
   }
+
+  test("SPARK-49816: should only update out-going-ref-count for referenced outer CTE relation") {
+    withView("v") {
+      sql(
+        """
+          |WITH
+          |t1 AS (SELECT 1 col),
+          |t2 AS (SELECT * FROM t1)
+          |SELECT * FROM t2
+          |""".stripMargin).createTempView("v")
+      // r1 is un-referenced, but it should not decrease the ref count of t2 inside view v.
+      val df = sql(
+        """
+          |WITH
+          |r1 AS (SELECT * FROM v),
+          |r2 AS (SELECT * FROM v)
+          |SELECT * FROM r2
+          |""".stripMargin)
+      checkAnswer(df, Row(1))
+    }
+  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite

From b8f845670fdea76841ad6c285cc9feecc0e1edfc Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Mon, 30 Sep 2024 23:42:05 -0700
Subject: [PATCH 504/521] [SPARK-49841][PYTHON][TESTS][3.5] Skip PySpark
 `test_cast_to_udt_with_udt`test during `SPARK_SKIP_CONNECT_COMPAT_TESTS`

### What changes were proposed in this pull request?

This PR aims to skip PySpark `test_cast_to_udt_with_udt`test during `SPARK_SKIP_CONNECT_COMPAT_TESTS` in branch-3.5.

### Why are the changes needed?

Currently, Apache Spark 4.0.0's PySpark Connect compatibility test fails.
- https://github.com/apache/spark/actions/workflows/build_python_connect35.yml

This is due to the newly added feature. Although this is a kind of Apache Spark 4.0.0 test suite, we need to disable this test case from `branch-3.5`.
- #48251

### Does this PR introduce _any_ user-facing change?

No. This is a test-only PR.

### How was this patch tested?

This should be tested after merging via Daily CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48307 from dongjoon-hyun/SPARK-49841.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 python/pyspark/sql/tests/test_types.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index ad458988d4fd1..504d945dc02a0 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -753,6 +753,9 @@ def test_cast_to_string_with_udt(self):
         result = df.select(F.col("point").cast("string"), F.col("pypoint").cast("string")).head()
         self.assertEqual(result, Row(point="(1.0, 2.0)", pypoint="[3.0, 4.0]"))
 
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "SPARK-49787: Supported since Spark 4.0.0"
+    )
     def test_cast_to_udt_with_udt(self):
         row = Row(point=ExamplePoint(1.0, 2.0), python_only_point=PythonOnlyPoint(1.0, 2.0))
         df = self.spark.createDataFrame([row])

From ec281547eed4cee1dab2b777b4a03c764bd15b54 Mon Sep 17 00:00:00 2001
From: maheshbehera <maheshbehera@microsoft.com>
Date: Fri, 4 Oct 2024 23:34:57 +0800
Subject: [PATCH 505/521] [SPARK-47702][CORE] Remove Shuffle service endpoint
 from the locations list when RDD block is removed form a node

### What changes were proposed in this pull request?

Credit to maheshk114 for the initial investigation and the fix.

This PR fix a bug where the shuffle service's ID is kept among the block location list at the removing of a RDD block from a node. Before this change `StorageLevel.NONE` is used to notify about the block remove which causes the block manager master ignoring the update of the locations for shuffle service's IDs (for details please see the method `BlockManagerMasterEndpoint#updateBlockInfo()` and keep in mind `StorageLevel.NONE.useDisk` is `false`). But after this change only the replication count is set to 0 to notify the block remove so `StorageLevel#isValid` is still false but `storageLevel.useDisk` is kept as `true` this way the the shuffle service's ID will be removed from the block location list.

### Why are the changes needed?

If the block location is not updated properly, then tasks fails with fetch failed exception. The tasks will try to read the RDD blocks from a node using external shuffle service. The read will fail, if the node is already decommissioned.

```
WARN BlockManager [Executor task launch worker for task 25.0 in stage 6.0 (TID 1567)]: Failed to fetch remote block rdd_5_25 from BlockManagerId(4, vm-92303839, 7337, None) (failed attempt 1)
org.apache.spark.SparkException: Exception thrown in awaitResult:
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.network.BlockTransferService.fetchBlockSync(BlockTransferService.scala:103)
	at org.apache.spark.storage.BlockManager.fetchRemoteManagedBuffer(BlockManager.scala:1155)
	at org.apache.spark.storage.BlockManager.$anonfun$getRemoteBlock$8(BlockManager.scala:1099)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.storage.BlockManager.getRemoteBlock(BlockManager.scala:1099)
	at org.apache.spark.storage.BlockManager.getRemoteValues(BlockManager.scala:1045)
	at org.apache.spark.storage.BlockManager.get(BlockManager.scala:1264)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1326)
```

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Added a new UT.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes #47779 from attilapiros/SPARK-47702-attila.

Lead-authored-by: maheshbehera <maheshbehera@microsoft.com>
Co-authored-by: Attila Zsolt Piros <2017933+attilapiros@users.noreply.github.com>
Co-authored-by: attilapiros <piros.attila.zsolt@gmail.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
(cherry picked from commit 8a51ca7d66eb32d309cd7351d3c78770d47518d6)
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../apache/spark/storage/BlockManager.scala   |  6 ++-
 .../BlockManagerReplicationSuite.scala        | 37 +++++++++++++++++++
 .../spark/storage/BlockManagerSuite.scala     | 13 ++++---
 3 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 6de6069d2fea5..1b56aa7ade125 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -2082,8 +2082,10 @@ private[spark] class BlockManager(
       hasRemoveBlock = true
       if (tellMaster) {
         // Only update storage level from the captured block status before deleting, so that
-        // memory size and disk size are being kept for calculating delta.
-        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
+        // memory size and disk size are being kept for calculating delta. Reset the replica
+        // count 0 in storage level to notify that it is a remove operation.
+        val storageLevel = StorageLevel(blockStatus.get.storageLevel.toInt, 0)
+        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = storageLevel))
       }
     } finally {
       if (!hasRemoveBlock) {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 38a669bc85744..d2fa3b4158d87 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -38,6 +38,8 @@ import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
+import org.apache.spark.network.shuffle.ExternalBlockStoreClient
+import org.apache.spark.network.util.{MapConfigProvider, TransportConf}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
@@ -295,6 +297,41 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     }
   }
 
+  test("Test block location after replication with SHUFFLE_SERVICE_FETCH_RDD_ENABLED enabled") {
+    val newConf = conf.clone()
+    newConf.set(SHUFFLE_SERVICE_ENABLED, true)
+    newConf.set(SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
+    newConf.set(Tests.TEST_SKIP_ESS_REGISTER, true)
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
+    val shuffleClient = Some(new ExternalBlockStoreClient(
+        new TransportConf("shuffle", MapConfigProvider.EMPTY),
+        null, false, 5000))
+    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager-2",
+      new BlockManagerMasterEndpoint(rpcEnv, true, newConf,
+        new LiveListenerBus(newConf), shuffleClient, blockManagerInfo, mapOutputTracker,
+        sc.env.shuffleManager, isDriver = true)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat-2",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), newConf, true)
+
+    val shuffleServicePort = newConf.get(SHUFFLE_SERVICE_PORT)
+    val store1 = makeBlockManager(10000, "host-1")
+    val store2 = makeBlockManager(10000, "host-2")
+    assert(master.getPeers(store1.blockManagerId).toSet === Set(store2.blockManagerId))
+
+    val blockId = RDDBlockId(1, 2)
+    val message = new Array[Byte](1000)
+
+    // if SHUFFLE_SERVICE_FETCH_RDD_ENABLED is enabled, then shuffle port should be present.
+    store1.putSingle(blockId, message, StorageLevel.DISK_ONLY)
+    assert(master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+
+    // after block is removed, shuffle port should be removed.
+    store1.removeBlock(blockId, true)
+    assert(!master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+  }
+
   test("block replication - addition and deletion of block managers") {
     val blockSize = 1000
     val storeSize = 10000
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index ecd66dc2c5fb0..728e3a252b7a1 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,7 +33,7 @@ import scala.reflect.classTag
 import com.esotericsoftware.kryo.KryoException
 import org.apache.commons.lang3.RandomUtils
 import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
-import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
+import org.mockito.Mockito.{atLeastOnce, doAnswer, mock, never, spy, times, verify, when}
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -666,7 +666,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
       removedFromMemory: Boolean,
       removedFromDisk: Boolean): Unit = {
     def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
-      assert(captor.getAllValues().size() === 1)
+      assert(captor.getAllValues().size() >= 1)
       if (expectRemoved) {
         assert(captor.getValue() > 0)
       } else {
@@ -676,15 +676,18 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
 
     val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
     val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
-    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    val storageLevelCaptor =
+      ArgumentCaptor.forClass(classOf[StorageLevel]).asInstanceOf[ArgumentCaptor[StorageLevel]]
+    verify(master, atLeastOnce()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      storageLevelCaptor.capture(), memSizeCaptor.capture(), diskSizeCaptor.capture())
     assertSizeReported(memSizeCaptor, removedFromMemory)
     assertSizeReported(diskSizeCaptor, removedFromDisk)
+    assert(storageLevelCaptor.getValue.replication == 0)
   }
 
   private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
     verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
+      mc.any[StorageLevel](), mc.anyInt(), mc.anyInt())
   }
 
   test("reregistration on heart beat") {

From aa6784c1bcdd340d156463e5cedc59343dc53f4c Mon Sep 17 00:00:00 2001
From: yangjie01 <yangjie01@baidu.com>
Date: Fri, 4 Oct 2024 10:29:54 -0700
Subject: [PATCH 506/521] Revert "[SPARK-47702][CORE] Remove Shuffle service
 endpoint from the locations list when RDD block is removed form a node"

### What changes were proposed in this pull request?
This reverts commit ec281547eed4cee1dab2b777b4a03c764bd15b54.

### Why are the changes needed?
branch-3.5 cannot be compiled successfully with commit ec281547eed4cee1dab2b777b4a03c764bd15b54

```
[error] /home/runner/work/spark/spark/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala:304:23: value TEST_SKIP_ESS_REGISTER is not a member of object org.apache.spark.internal.config.Tests
[error]     newConf.set(Tests.TEST_SKIP_ESS_REGISTER, true)
[error]                       ^
[error] one error found
```

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass GItHub Actions

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #48353 from LuciferYang/Revert-SPARK-47702.

Authored-by: yangjie01 <yangjie01@baidu.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../apache/spark/storage/BlockManager.scala   |  6 +--
 .../BlockManagerReplicationSuite.scala        | 37 -------------------
 .../spark/storage/BlockManagerSuite.scala     | 13 +++----
 3 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 1b56aa7ade125..6de6069d2fea5 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -2082,10 +2082,8 @@ private[spark] class BlockManager(
       hasRemoveBlock = true
       if (tellMaster) {
         // Only update storage level from the captured block status before deleting, so that
-        // memory size and disk size are being kept for calculating delta. Reset the replica
-        // count 0 in storage level to notify that it is a remove operation.
-        val storageLevel = StorageLevel(blockStatus.get.storageLevel.toInt, 0)
-        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = storageLevel))
+        // memory size and disk size are being kept for calculating delta.
+        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
       }
     } finally {
       if (!hasRemoveBlock) {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index d2fa3b4158d87..38a669bc85744 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -38,8 +38,6 @@ import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
-import org.apache.spark.network.shuffle.ExternalBlockStoreClient
-import org.apache.spark.network.util.{MapConfigProvider, TransportConf}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
@@ -297,41 +295,6 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     }
   }
 
-  test("Test block location after replication with SHUFFLE_SERVICE_FETCH_RDD_ENABLED enabled") {
-    val newConf = conf.clone()
-    newConf.set(SHUFFLE_SERVICE_ENABLED, true)
-    newConf.set(SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
-    newConf.set(Tests.TEST_SKIP_ESS_REGISTER, true)
-    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
-    val shuffleClient = Some(new ExternalBlockStoreClient(
-        new TransportConf("shuffle", MapConfigProvider.EMPTY),
-        null, false, 5000))
-    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager-2",
-      new BlockManagerMasterEndpoint(rpcEnv, true, newConf,
-        new LiveListenerBus(newConf), shuffleClient, blockManagerInfo, mapOutputTracker,
-        sc.env.shuffleManager, isDriver = true)),
-      rpcEnv.setupEndpoint("blockmanagerHeartbeat-2",
-      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), newConf, true)
-
-    val shuffleServicePort = newConf.get(SHUFFLE_SERVICE_PORT)
-    val store1 = makeBlockManager(10000, "host-1")
-    val store2 = makeBlockManager(10000, "host-2")
-    assert(master.getPeers(store1.blockManagerId).toSet === Set(store2.blockManagerId))
-
-    val blockId = RDDBlockId(1, 2)
-    val message = new Array[Byte](1000)
-
-    // if SHUFFLE_SERVICE_FETCH_RDD_ENABLED is enabled, then shuffle port should be present.
-    store1.putSingle(blockId, message, StorageLevel.DISK_ONLY)
-    assert(master.getLocations(blockId).contains(
-      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
-
-    // after block is removed, shuffle port should be removed.
-    store1.removeBlock(blockId, true)
-    assert(!master.getLocations(blockId).contains(
-      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
-  }
-
   test("block replication - addition and deletion of block managers") {
     val blockSize = 1000
     val storeSize = 10000
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 728e3a252b7a1..ecd66dc2c5fb0 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,7 +33,7 @@ import scala.reflect.classTag
 import com.esotericsoftware.kryo.KryoException
 import org.apache.commons.lang3.RandomUtils
 import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
-import org.mockito.Mockito.{atLeastOnce, doAnswer, mock, never, spy, times, verify, when}
+import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -666,7 +666,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
       removedFromMemory: Boolean,
       removedFromDisk: Boolean): Unit = {
     def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
-      assert(captor.getAllValues().size() >= 1)
+      assert(captor.getAllValues().size() === 1)
       if (expectRemoved) {
         assert(captor.getValue() > 0)
       } else {
@@ -676,18 +676,15 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
 
     val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
     val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
-    val storageLevelCaptor =
-      ArgumentCaptor.forClass(classOf[StorageLevel]).asInstanceOf[ArgumentCaptor[StorageLevel]]
-    verify(master, atLeastOnce()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      storageLevelCaptor.capture(), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
     assertSizeReported(memSizeCaptor, removedFromMemory)
     assertSizeReported(diskSizeCaptor, removedFromDisk)
-    assert(storageLevelCaptor.getValue.replication == 0)
   }
 
   private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
     verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.any[StorageLevel](), mc.anyInt(), mc.anyInt())
+      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
   }
 
   test("reregistration on heart beat") {

From d4b34d2898ec983642a512db90f7afef4a1e8c88 Mon Sep 17 00:00:00 2001
From: Stefan Kandic <stefan.kandic@databricks.com>
Date: Fri, 4 Oct 2024 12:46:37 -0700
Subject: [PATCH 507/521] [SPARK-49843][SQL][3.5] Fix change comment on
 char/varchar columns

### What changes were proposed in this pull request?

Fix the issue in `AlterTableChangeColumnCommand` where changing the comment of a char/varchar column also tries to change the column type to string.

Backporting #48315 to 3.5

### Why are the changes needed?

Because the newColumn will always be a `StringType` even when the metadata says that it was originally char/varchar.

### Does this PR introduce _any_ user-facing change?

Yes, the query will no longer fail when using this code path.

### How was this patch tested?

New query in golden files.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48333 from stefankandic/branch-3.5.

Authored-by: Stefan Kandic <stefan.kandic@databricks.com>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../analysis/ResolveSessionCatalog.scala      | 10 ++++--
 .../analyzer-results/charvarchar.sql.out      | 12 +++++++
 .../sql-tests/inputs/charvarchar.sql          |  2 ++
 .../sql-tests/results/charvarchar.sql.out     | 32 ++++++++++++++-----
 4 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 0a86a043985eb..2a92dc59f3871 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1,
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.connector.V1Function
-import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField, StructType}
 
 /**
  * Converts resolved v2 commands to v1 if the catalog is the session catalog. Since the v2 commands
@@ -84,7 +84,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       val colName = a.column.name(0)
       val dataType = a.dataType.getOrElse {
         table.schema.findNestedField(Seq(colName), resolver = conf.resolver)
-          .map(_._2.dataType)
+          .map {
+            case (_, StructField(_, st: StringType, _, metadata)) =>
+              CharVarcharUtils.getRawType(metadata).getOrElse(st)
+            case (_, field) => field.dataType
+          }
           .getOrElse {
             throw QueryCompilationErrors.alterColumnCannotFindColumnInV1TableError(
               quoteIfNeeded(colName), table)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index 6e72fd28686a0..544d736b56b64 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -255,6 +255,18 @@ desc formatted char_part
 DescribeTableCommand `spark_catalog`.`default`.`char_part`, true, [col_name#x, data_type#x, comment#x]
 
 
+-- !query
+alter table char_part change column c1 comment 'char comment'
+-- !query analysis
+AlterTableChangeColumnCommand `spark_catalog`.`default`.`char_part`, c1, StructField(c1,CharType(5),true)
+
+
+-- !query
+alter table char_part change column v1 comment 'varchar comment'
+-- !query analysis
+AlterTableChangeColumnCommand `spark_catalog`.`default`.`char_part`, v1, StructField(v1,VarcharType(6),true)
+
+
 -- !query
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
index 8117dec53f4ab..be038e1083cd8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
@@ -49,6 +49,8 @@ desc formatted char_tbl1;
 create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2);
 desc formatted char_part;
 
+alter table char_part change column c1 comment 'char comment';
+alter table char_part change column v1 comment 'varchar comment';
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1';
 desc formatted char_part;
 
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index 888e8a9428910..dd8bdc698ea7f 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -543,6 +543,22 @@ Location [not included in comparison]/{warehouse_dir}/char_part
 Partition Provider  	Catalog
 
 
+-- !query
+alter table char_part change column c1 comment 'char comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table char_part change column v1 comment 'varchar comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
 -- !query schema
@@ -556,8 +572,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -599,8 +615,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -634,8 +650,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -669,8 +685,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    

From 9a5ae454e0c72d10b19abcfbfa7d985f0947c1b4 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Sat, 5 Oct 2024 07:38:42 +0900
Subject: [PATCH 508/521] [SPARK-49836][SQL][SS] Fix possibly broken query when
 window is provided to window/session_window fn

This PR fixes the correctness issue about losing operators during analysis - it happens when window is provided to window()/session_window() function.

The rule `TimeWindowing` and `SessionWindowing` are responsible to resolve the time window functions. When the window function has `window` as parameter (time column) (in other words, building time window from time window), the rule wraps window with WindowTime function so that the rule ResolveWindowTime will further resolve this. (And TimeWindowing/SessionWindowing will resolve this again against the result of ResolveWindowTime.)

The issue is that the rule uses "return" for the above, which intends to have "early return" as the other branch is too long compared to this branch. This unfortunately does not work as intended - the intention is just to go out of current local scope (mostly end of curly brace), but it seems to break the loop of execution in "outer" side.
(I haven't debugged further but it's simply clear that it doesn't work as intended.)

Quoting from Scala doc:

> Nonlocal returns are implemented by throwing and catching scala.runtime.NonLocalReturnException-s.

It's not super clear where NonLocalReturnException is caught in the call stack; it might exit the execution for much broader scope (context) than expected. And it's finally deprecated in Scala 3.2 and likely be removed in future.

https://dotty.epfl.ch/docs/reference/dropped-features/nonlocal-returns.html

Interestingly it does not break every query for chained time window aggregations. Spark already has several tests with DataFrame API and they haven't failed. The reproducer in community report is using SQL statement - where each aggregation is considered as subquery.

This PR fixes the rule to NOT use early return and instead have a huge if else.

Described in above.

Yes, this fixes the possible query breakage. The impacted workloads may not be very huge as chained time window aggregations is an advanced usage, and it does not break every query for the usage.

New UTs.

No.

Closes #48309 from HeartSaVioR/SPARK-49836.

Lead-authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Co-authored-by: Andrzej Zera <andrzejzera@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit d8c04cf2fb7599c993948df10f4746b70f8c52b9)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../analysis/ResolveTimeWindows.scala         | 248 +++++++++---------
 .../sql/DataFrameSessionWindowingSuite.scala  |  51 ++++
 .../sql/DataFrameTimeWindowingSuite.scala     |  53 ++++
 3 files changed, 228 insertions(+), 124 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
index 1ee218f9369c5..d1b43283e74b9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
@@ -87,84 +87,84 @@ object TimeWindowing extends Rule[LogicalPlan] {
         val window = windowExpressions.head
 
         if (StructType.acceptsType(window.timeColumn.dataType)) {
-          return p.transformExpressions {
+          p.transformExpressions {
             case t: TimeWindow => t.copy(timeColumn = WindowTime(window.timeColumn))
           }
-        }
-
-        val metadata = window.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
-
-        val newMetadata = new MetadataBuilder()
-          .withMetadata(metadata)
-          .putBoolean(TimeWindow.marker, true)
-          .build()
+        } else {
+          val metadata = window.timeColumn match {
+            case a: Attribute => a.metadata
+            case _ => Metadata.empty
+          }
 
-        def getWindow(i: Int, dataType: DataType): Expression = {
-          val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
-          val remainder = (timestamp - window.startTime) % window.slideDuration
-          val lastStart = timestamp - CaseWhen(Seq((LessThan(remainder, 0),
-            remainder + window.slideDuration)), Some(remainder))
-          val windowStart = lastStart - i * window.slideDuration
-          val windowEnd = windowStart + window.windowDuration
+          val newMetadata = new MetadataBuilder()
+            .withMetadata(metadata)
+            .putBoolean(TimeWindow.marker, true)
+            .build()
 
-          // We make sure value fields are nullable since the dataType of TimeWindow defines them
-          // as nullable.
-          CreateNamedStruct(
-            Literal(WINDOW_START) ::
-              PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
-              Literal(WINDOW_END) ::
-              PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
-              Nil)
-        }
+          def getWindow(i: Int, dataType: DataType): Expression = {
+            val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
+            val remainder = (timestamp - window.startTime) % window.slideDuration
+            val lastStart = timestamp - CaseWhen(Seq((LessThan(remainder, 0),
+              remainder + window.slideDuration)), Some(remainder))
+            val windowStart = lastStart - i * window.slideDuration
+            val windowEnd = windowStart + window.windowDuration
+
+            // We make sure value fields are nullable since the dataType of TimeWindow defines them
+            // as nullable.
+            CreateNamedStruct(
+              Literal(WINDOW_START) ::
+                PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
+                Literal(WINDOW_END) ::
+                PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
+                Nil)
+          }
 
-        val windowAttr = AttributeReference(
-          WINDOW_COL_NAME, window.dataType, metadata = newMetadata)()
+          val windowAttr = AttributeReference(
+            WINDOW_COL_NAME, window.dataType, metadata = newMetadata)()
 
-        if (window.windowDuration == window.slideDuration) {
-          val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
-            exprId = windowAttr.exprId, explicitMetadata = Some(newMetadata))
+          if (window.windowDuration == window.slideDuration) {
+            val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
+              exprId = windowAttr.exprId, explicitMetadata = Some(newMetadata))
 
-          val replacedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
+            val replacedPlan = p transformExpressions {
+              case t: TimeWindow => windowAttr
+            }
 
-          // For backwards compatibility we add a filter to filter out nulls
-          val filterExpr = IsNotNull(window.timeColumn)
+            // For backwards compatibility we add a filter to filter out nulls
+            val filterExpr = IsNotNull(window.timeColumn)
 
-          replacedPlan.withNewChildren(
-            Project(windowStruct +: child.output,
-              Filter(filterExpr, child)) :: Nil)
-        } else {
-          val overlappingWindows =
-            math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
-          val windows =
-            Seq.tabulate(overlappingWindows)(i =>
-              getWindow(i, window.timeColumn.dataType))
-
-          val projections = windows.map(_ +: child.output)
-
-          // When the condition windowDuration % slideDuration = 0 is fulfilled,
-          // the estimation of the number of windows becomes exact one,
-          // which means all produced windows are valid.
-          val filterExpr =
-          if (window.windowDuration % window.slideDuration == 0) {
-            IsNotNull(window.timeColumn)
+            replacedPlan.withNewChildren(
+              Project(windowStruct +: child.output,
+                Filter(filterExpr, child)) :: Nil)
           } else {
-            window.timeColumn >= windowAttr.getField(WINDOW_START) &&
-              window.timeColumn < windowAttr.getField(WINDOW_END)
+            val overlappingWindows =
+              math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
+            val windows =
+              Seq.tabulate(overlappingWindows)(i =>
+                getWindow(i, window.timeColumn.dataType))
+
+            val projections = windows.map(_ +: child.output)
+
+            // When the condition windowDuration % slideDuration = 0 is fulfilled,
+            // the estimation of the number of windows becomes exact one,
+            // which means all produced windows are valid.
+            val filterExpr =
+            if (window.windowDuration % window.slideDuration == 0) {
+              IsNotNull(window.timeColumn)
+            } else {
+              window.timeColumn >= windowAttr.getField(WINDOW_START) &&
+                window.timeColumn < windowAttr.getField(WINDOW_END)
+            }
+
+            val substitutedPlan = Filter(filterExpr,
+              Expand(projections, windowAttr +: child.output, child))
+
+            val renamedPlan = p transformExpressions {
+              case t: TimeWindow => windowAttr
+            }
+
+            renamedPlan.withNewChildren(substitutedPlan :: Nil)
           }
-
-          val substitutedPlan = Filter(filterExpr,
-            Expand(projections, windowAttr +: child.output, child))
-
-          val renamedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
-
-          renamedPlan.withNewChildren(substitutedPlan :: Nil)
         }
       } else if (numWindowExpr > 1) {
         throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
@@ -209,71 +209,71 @@ object SessionWindowing extends Rule[LogicalPlan] {
         val session = sessionExpressions.head
 
         if (StructType.acceptsType(session.timeColumn.dataType)) {
-          return p transformExpressions {
+          p transformExpressions {
             case t: SessionWindow => t.copy(timeColumn = WindowTime(session.timeColumn))
           }
-        }
+        } else {
+          val metadata = session.timeColumn match {
+            case a: Attribute => a.metadata
+            case _ => Metadata.empty
+          }
 
-        val metadata = session.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
+          val newMetadata = new MetadataBuilder()
+            .withMetadata(metadata)
+            .putBoolean(SessionWindow.marker, true)
+            .build()
 
-        val newMetadata = new MetadataBuilder()
-          .withMetadata(metadata)
-          .putBoolean(SessionWindow.marker, true)
-          .build()
-
-        val sessionAttr = AttributeReference(
-          SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
-
-        val sessionStart =
-          PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
-        val gapDuration = session.gapDuration match {
-          case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
-            Cast(expr, CalendarIntervalType)
-          case other =>
-            throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
-        }
-        val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
-          session.timeColumn.dataType, LongType)
-
-        // We make sure value fields are nullable since the dataType of SessionWindow defines them
-        // as nullable.
-        val literalSessionStruct = CreateNamedStruct(
-          Literal(SESSION_START) ::
-            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Literal(SESSION_END) ::
-            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Nil)
-
-        val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
-          exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
+          val sessionAttr = AttributeReference(
+            SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
 
-        val replacedPlan = p transformExpressions {
-          case s: SessionWindow => sessionAttr
-        }
+          val sessionStart =
+            PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
+          val gapDuration = session.gapDuration match {
+            case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
+              Cast(expr, CalendarIntervalType)
+            case other =>
+              throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
+          }
+          val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
+            session.timeColumn.dataType, LongType)
 
-        val filterByTimeRange = session.gapDuration match {
-          case Literal(interval: CalendarInterval, CalendarIntervalType) =>
-            interval == null || interval.months + interval.days + interval.microseconds <= 0
-          case _ => true
-        }
+          // We make sure value fields are nullable since the dataType of SessionWindow defines them
+          // as nullable.
+          val literalSessionStruct = CreateNamedStruct(
+            Literal(SESSION_START) ::
+              PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
+                .castNullable() ::
+              Literal(SESSION_END) ::
+              PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
+                .castNullable() ::
+              Nil)
 
-        // As same as tumbling window, we add a filter to filter out nulls.
-        // And we also filter out events with negative or zero or invalid gap duration.
-        val filterExpr = if (filterByTimeRange) {
-          IsNotNull(session.timeColumn) &&
-            (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
-        } else {
-          IsNotNull(session.timeColumn)
-        }
+          val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
+            exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
 
-        replacedPlan.withNewChildren(
-          Filter(filterExpr,
-            Project(sessionStruct +: child.output, child)) :: Nil)
+          val replacedPlan = p transformExpressions {
+            case s: SessionWindow => sessionAttr
+          }
+
+          val filterByTimeRange = session.gapDuration match {
+            case Literal(interval: CalendarInterval, CalendarIntervalType) =>
+              interval == null || interval.months + interval.days + interval.microseconds <= 0
+            case _ => true
+          }
+
+          // As same as tumbling window, we add a filter to filter out nulls.
+          // And we also filter out events with negative or zero or invalid gap duration.
+          val filterExpr = if (filterByTimeRange) {
+            IsNotNull(session.timeColumn) &&
+              (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
+          } else {
+            IsNotNull(session.timeColumn)
+          }
+
+          replacedPlan.withNewChildren(
+            Filter(filterExpr,
+              Project(sessionStruct +: child.output, child)) :: Nil)
+        }
       } else if (numWindowExpr > 1) {
         throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
       } else {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index 1ac1dda374fa7..6c1ca94a03079 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -547,4 +547,55 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-49836 using window fn with window as parameter should preserve parent operator") {
+    withTempView("clicks") {
+      val df = Seq(
+        // small window: [00:00, 01:00), user1, 2
+        ("2024-09-30 00:00:00", "user1"), ("2024-09-30 00:00:30", "user1"),
+        // small window: [01:00, 02:00), user2, 2
+        ("2024-09-30 00:01:00", "user2"), ("2024-09-30 00:01:30", "user2"),
+        // small window: [03:00, 04:00), user1, 1
+        ("2024-09-30 00:03:30", "user1"),
+        // small window: [11:00, 12:00), user1, 3
+        ("2024-09-30 00:11:00", "user1"), ("2024-09-30 00:11:30", "user1"),
+        ("2024-09-30 00:11:45", "user1")
+      ).toDF("eventTime", "userId")
+
+      // session window: (01:00, 09:00), user1, 3 / (02:00, 07:00), user2, 2 /
+      //   (12:00, 12:05), user1, 3
+
+      df.createOrReplaceTempView("clicks")
+
+      val aggregatedData = spark.sql(
+        """
+          |SELECT
+          |  userId,
+          |  avg(cpu_large.numClicks) AS clicksPerSession
+          |FROM
+          |(
+          |  SELECT
+          |    session_window(small_window, '5 minutes') AS session,
+          |    userId,
+          |    sum(numClicks) AS numClicks
+          |  FROM
+          |  (
+          |    SELECT
+          |      window(eventTime, '1 minute') AS small_window,
+          |      userId,
+          |      count(*) AS numClicks
+          |    FROM clicks
+          |    GROUP BY window, userId
+          |  ) cpu_small
+          |  GROUP BY session_window, userId
+          |) cpu_large
+          |GROUP BY userId
+          |""".stripMargin)
+
+      checkAnswer(
+        aggregatedData,
+        Seq(Row("user1", 3), Row("user2", 2))
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index 6ee173bc6af67..c52d428cd5dd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.Timestamp
 import java.time.LocalDateTime
 
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -714,4 +715,56 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
       )
     }
   }
+
+  test("SPARK-49836 using window fn with window as parameter should preserve parent operator") {
+    withTempView("clicks") {
+      val df = Seq(
+        // small window: [00:00, 01:00), user1, 2
+        ("2024-09-30 00:00:00", "user1"), ("2024-09-30 00:00:30", "user1"),
+        // small window: [01:00, 02:00), user2, 2
+        ("2024-09-30 00:01:00", "user2"), ("2024-09-30 00:01:30", "user2"),
+        // small window: [07:00, 08:00), user1, 1
+        ("2024-09-30 00:07:00", "user1"),
+        // small window: [11:00, 12:00), user1, 3
+        ("2024-09-30 00:11:00", "user1"), ("2024-09-30 00:11:30", "user1"),
+        ("2024-09-30 00:11:45", "user1")
+      ).toDF("eventTime", "userId")
+
+      // large window: [00:00, 10:00), user1, 3, [00:00, 10:00), user2, 2, [10:00, 20:00), user1, 3
+
+      df.createOrReplaceTempView("clicks")
+
+      val aggregatedData = spark.sql(
+        """
+          |SELECT
+          |  cpu_large.large_window.end AS timestamp,
+          |  avg(cpu_large.numClicks) AS avgClicksPerUser
+          |FROM
+          |(
+          |  SELECT
+          |    window(small_window, '10 minutes') AS large_window,
+          |    userId,
+          |    sum(numClicks) AS numClicks
+          |  FROM
+          |  (
+          |    SELECT
+          |      window(eventTime, '1 minute') AS small_window,
+          |      userId,
+          |      count(*) AS numClicks
+          |    FROM clicks
+          |    GROUP BY window, userId
+          |  ) cpu_small
+          |  GROUP BY window, userId
+          |) cpu_large
+          |GROUP BY timestamp
+          |""".stripMargin)
+
+      checkAnswer(
+        aggregatedData,
+        Seq(
+          Row(Timestamp.valueOf("2024-09-30 00:10:00"), 2.5),
+          Row(Timestamp.valueOf("2024-09-30 00:20:00"), 3))
+      )
+    }
+  }
 }

From ca8407b9733882fd28a6f94fd88aef27f3b648d0 Mon Sep 17 00:00:00 2001
From: maheshbehera <maheshbehera@microsoft.com>
Date: Sat, 5 Oct 2024 12:56:52 +0800
Subject: [PATCH 509/521] [SPARK-47702][CORE][3.5] Remove Shuffle service
 endpoint from the locations list when RDD block is removed form a node

Credit to maheshk114 for the initial investigation and the fix.

This PR fix a bug where the shuffle service's ID is kept among the block location list at the removing of a RDD block from a node. Before this change `StorageLevel.NONE` is used to notify about the block remove which causes the block manager master ignoring the update of the locations for shuffle service's IDs (for details please see the method `BlockManagerMasterEndpoint#updateBlockInfo()` and keep in mind `StorageLevel.NONE.useDisk` is `false`). But after this change only the replication count is set to 0 to notify the block remove so `StorageLevel#isValid` is still false but `storageLevel.useDisk` is kept as `true` this way the the shuffle service's ID will be removed from the block location list.

If the block location is not updated properly, then tasks fails with fetch failed exception. The tasks will try to read the RDD blocks from a node using external shuffle service. The read will fail, if the node is already decommissioned.

```
WARN BlockManager [Executor task launch worker for task 25.0 in stage 6.0 (TID 1567)]: Failed to fetch remote block rdd_5_25 from BlockManagerId(4, vm-92303839, 7337, None) (failed attempt 1)
org.apache.spark.SparkException: Exception thrown in awaitResult:
	at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:301)
	at org.apache.spark.network.BlockTransferService.fetchBlockSync(BlockTransferService.scala:103)
	at org.apache.spark.storage.BlockManager.fetchRemoteManagedBuffer(BlockManager.scala:1155)
	at org.apache.spark.storage.BlockManager.$anonfun$getRemoteBlock$8(BlockManager.scala:1099)
	at scala.Option.orElse(Option.scala:447)
	at org.apache.spark.storage.BlockManager.getRemoteBlock(BlockManager.scala:1099)
	at org.apache.spark.storage.BlockManager.getRemoteValues(BlockManager.scala:1045)
	at org.apache.spark.storage.BlockManager.get(BlockManager.scala:1264)
	at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:1326)
```

No

Added a new UT.

No

Closes #48357 from attilapiros/SPARK-47702-Spark3.5.

Authored-by: maheshbehera <maheshbehera@microsoft.com>
Signed-off-by: yangjie01 <yangjie01@baidu.com>
---
 .../apache/spark/storage/BlockManager.scala   |  6 ++--
 .../BlockManagerReplicationSuite.scala        | 36 +++++++++++++++++++
 .../spark/storage/BlockManagerSuite.scala     | 13 ++++---
 3 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 6de6069d2fea5..1b56aa7ade125 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -2082,8 +2082,10 @@ private[spark] class BlockManager(
       hasRemoveBlock = true
       if (tellMaster) {
         // Only update storage level from the captured block status before deleting, so that
-        // memory size and disk size are being kept for calculating delta.
-        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
+        // memory size and disk size are being kept for calculating delta. Reset the replica
+        // count 0 in storage level to notify that it is a remove operation.
+        val storageLevel = StorageLevel(blockStatus.get.storageLevel.toInt, 0)
+        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = storageLevel))
       }
     } finally {
       if (!hasRemoveBlock) {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 38a669bc85744..29526684c3e9f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -38,6 +38,8 @@ import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
+import org.apache.spark.network.shuffle.ExternalBlockStoreClient
+import org.apache.spark.network.util.{MapConfigProvider, TransportConf}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
@@ -295,6 +297,40 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     }
   }
 
+  test("Test block location after replication with SHUFFLE_SERVICE_FETCH_RDD_ENABLED enabled") {
+    val newConf = conf.clone()
+    newConf.set(SHUFFLE_SERVICE_ENABLED, true)
+    newConf.set(SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
+    val shuffleClient = Some(new ExternalBlockStoreClient(
+        new TransportConf("shuffle", MapConfigProvider.EMPTY),
+        null, false, 5000))
+    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager-2",
+      new BlockManagerMasterEndpoint(rpcEnv, true, newConf,
+        new LiveListenerBus(newConf), shuffleClient, blockManagerInfo, mapOutputTracker,
+        sc.env.shuffleManager, isDriver = true)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat-2",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), newConf, true)
+
+    val shuffleServicePort = newConf.get(SHUFFLE_SERVICE_PORT)
+    val store1 = makeBlockManager(10000, "host-1")
+    val store2 = makeBlockManager(10000, "host-2")
+    assert(master.getPeers(store1.blockManagerId).toSet === Set(store2.blockManagerId))
+
+    val blockId = RDDBlockId(1, 2)
+    val message = new Array[Byte](1000)
+
+    // if SHUFFLE_SERVICE_FETCH_RDD_ENABLED is enabled, then shuffle port should be present.
+    store1.putSingle(blockId, message, StorageLevel.DISK_ONLY)
+    assert(master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+
+    // after block is removed, shuffle port should be removed.
+    store1.removeBlock(blockId, true)
+    assert(!master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+  }
+
   test("block replication - addition and deletion of block managers") {
     val blockSize = 1000
     val storeSize = 10000
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index ecd66dc2c5fb0..728e3a252b7a1 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,7 +33,7 @@ import scala.reflect.classTag
 import com.esotericsoftware.kryo.KryoException
 import org.apache.commons.lang3.RandomUtils
 import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
-import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
+import org.mockito.Mockito.{atLeastOnce, doAnswer, mock, never, spy, times, verify, when}
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -666,7 +666,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
       removedFromMemory: Boolean,
       removedFromDisk: Boolean): Unit = {
     def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
-      assert(captor.getAllValues().size() === 1)
+      assert(captor.getAllValues().size() >= 1)
       if (expectRemoved) {
         assert(captor.getValue() > 0)
       } else {
@@ -676,15 +676,18 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
 
     val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
     val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
-    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    val storageLevelCaptor =
+      ArgumentCaptor.forClass(classOf[StorageLevel]).asInstanceOf[ArgumentCaptor[StorageLevel]]
+    verify(master, atLeastOnce()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      storageLevelCaptor.capture(), memSizeCaptor.capture(), diskSizeCaptor.capture())
     assertSizeReported(memSizeCaptor, removedFromMemory)
     assertSizeReported(diskSizeCaptor, removedFromDisk)
+    assert(storageLevelCaptor.getValue.replication == 0)
   }
 
   private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
     verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
+      mc.any[StorageLevel](), mc.anyInt(), mc.anyInt())
   }
 
   test("reregistration on heart beat") {

From 4d93a30e554db6c7fe749dcfecd09c4ed45d2982 Mon Sep 17 00:00:00 2001
From: Hyukjin Kwon <gurwls223@apache.org>
Date: Mon, 7 Oct 2024 16:52:57 +0900
Subject: [PATCH 510/521] [SPARK-49806][PYTHON][TESTS][FOLLOW-UP] Skip newline
 difference in Spark Connect compatibility test

### What changes were proposed in this pull request?

This PR is a followup of https://github.com/apache/spark/pull/48277 that disables newline related tests in Spark Connect compatibility test.

### Why are the changes needed?

To make the Spark Connect build pass. Currently, it fails as below (https://github.com/apache/spark/actions/runs/11186932292/job/31102993825):

```
======================================================================
FAIL [0.100s]: test_repr_behaviors (pyspark.sql.tests.connect.test_parity_dataframe.DataFrameParityTests.test_repr_behaviors)
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_dataframe.py", line 1586, in test_repr_behaviors
    self.assertEqual(re.sub(pattern, "", expected3), df.__repr__())
AssertionError: '+---[23 chars]---+-----+\n|  1|    1|\n+---+-----+\nonly showing top 1 row\n' != '+---[23 chars]---+-----+\n|  1|    1|\n+---+-----+\nonly showing top 1 row'
  +---+-----+
  |key|value|
  +---+-----+
  |  1|    1|
  +---+-----+
- only showing top 1 row
?                       -
+ only showing top 1 row
```

### Does this PR introduce _any_ user-facing change?

No, test-only.

### How was this patch tested?

Manaully.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48361 from HyukjinKwon/SPARK-49806-follow-up.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
---
 python/pyspark/sql/tests/test_dataframe.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 887648018cf3f..2f80a56e96002 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -1550,6 +1550,9 @@ def test_create_dataframe_from_pandas_with_day_time_interval(self):
         df = self.spark.createDataFrame(pd.DataFrame({"a": [timedelta(microseconds=123)]}))
         self.assertEqual(df.toPandas().a.iloc[0], timedelta(microseconds=123))
 
+    @unittest.skipIf(
+        "SPARK_SKIP_CONNECT_COMPAT_TESTS" in os.environ, "Newline difference from the server"
+    )
     def test_repr_behaviors(self):
         import re
 

From 4472fb26ec4af3398389cd4f96bc9d94663895a9 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Wed, 9 Oct 2024 14:22:11 +0200
Subject: [PATCH 511/521] [SPARK-49909][SQL][3.5] Fix the pretty name of some
 expressions

### What changes were proposed in this pull request?
The pr aims to fix the `pretty name` of some `expressions`, includes: `random`, `to_varchar`, `current_database`, `curdate`, `dateadd` and `array_agg`.
(PS: The pr is backport branch-3.5, master pr is: https://github.com/apache/spark/pull/48385)

### Why are the changes needed?
The actual function name used does not match the displayed name, as shown below:
- Before:
<img width="573" alt="image" src="https://github.com/user-attachments/assets/f5785c80-f6cb-494f-a15e-9258eca688a7">

- After:
<img width="570" alt="image" src="https://github.com/user-attachments/assets/792a7092-ccbf-49f4-a616-19110e5c2361">

### Does this PR introduce _any_ user-facing change?
Yes, Make the header of the data seen by the end-user from `Spark SQL` consistent with the `actual function name` used.

### How was this patch tested?
- Pass GA.
- Update existed UT.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48393 from panbingkun/branch-3.5_SPARK-49909.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../function_array_agg.explain                |  2 +-
 .../explain-results/function_curdate.explain  |  2 +-
 .../function_current_schema.explain           |  2 +-
 .../explain-results/function_dateadd.explain  |  2 +-
 .../function_random_with_seed.explain         |  2 +-
 .../function_to_varchar.explain               |  2 +-
 python/pyspark/sql/functions.py               | 30 +++++++++----------
 .../expressions/aggregate/collect.scala       |  5 ++--
 .../expressions/datetimeExpressions.scala     |  5 ++--
 .../spark/sql/catalyst/expressions/misc.scala |  5 ++--
 .../expressions/numberFormatExpressions.scala |  7 +++--
 .../expressions/randomExpressions.scala       |  8 +++--
 .../sql-functions/sql-expression-schema.md    | 20 ++++++-------
 .../analyzer-results/charvarchar.sql.out      |  6 ++--
 .../current_database_catalog.sql.out          |  2 +-
 .../analyzer-results/group-by.sql.out         |  4 +--
 .../sql-tests/results/charvarchar.sql.out     |  6 ++--
 .../results/current_database_catalog.sql.out  |  2 +-
 .../sql-tests/results/group-by.sql.out        |  4 +--
 .../results/subexp-elimination.sql.out        |  6 ++--
 20 files changed, 66 insertions(+), 56 deletions(-)

diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
index 102f736c62ef6..6668692f6cf1d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
@@ -1,2 +1,2 @@
-Aggregate [collect_list(a#0, 0, 0) AS collect_list(a)#0]
+Aggregate [array_agg(a#0, 0, 0) AS array_agg(a)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
index 5305b346c4f2d..be039d62a5494 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
@@ -1,2 +1,2 @@
-Project [current_date(Some(America/Los_Angeles)) AS current_date()#0]
+Project [curdate(Some(America/Los_Angeles)) AS curdate()#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
index 93dfac524d9a1..481c0a478c8df 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
@@ -1,2 +1,2 @@
-Project [current_database() AS current_database()#0]
+Project [current_schema() AS current_schema()#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
index 66325085b9c14..319428541760d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
@@ -1,2 +1,2 @@
-Project [date_add(d#0, 2) AS date_add(d, 2)#0]
+Project [dateadd(d#0, 2) AS dateadd(d, 2)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
index 81c81e95c2bdd..5854d2c7fa6be 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
@@ -1,2 +1,2 @@
-Project [random(1) AS rand(1)#0]
+Project [random(1) AS random(1)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
index f0d9cacc61ac5..cc5149bfed863 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
@@ -1,2 +1,2 @@
-Project [to_char(cast(b#0 as decimal(30,15)), $99.99) AS to_char(b, $99.99)#0]
+Project [to_varchar(cast(b#0 as decimal(30,15)), $99.99) AS to_varchar(b, $99.99)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 7e1a8faf00178..173f4da59f184 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -6710,31 +6710,31 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", 1)).show()
-    +---------------+
-    |date_add(dt, 1)|
-    +---------------+
-    |     2015-04-09|
-    +---------------+
+    +--------------+
+    |dateadd(dt, 1)|
+    +--------------+
+    |    2015-04-09|
+    +--------------+
 
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", sf.lit(2))).show()
-    +---------------+
-    |date_add(dt, 2)|
-    +---------------+
-    |     2015-04-10|
-    +---------------+
+    +--------------+
+    |dateadd(dt, 2)|
+    +--------------+
+    |    2015-04-10|
+    +--------------+
 
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", -1)).show()
-    +----------------+
-    |date_add(dt, -1)|
-    +----------------+
-    |      2015-04-07|
-    +----------------+
+    +---------------+
+    |dateadd(dt, -1)|
+    +---------------+
+    |     2015-04-07|
+    +---------------+
     """
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("dateadd", start, days)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 7bbc930ceab59..770d9c281fefb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -21,7 +21,7 @@ import scala.collection.generic.Growable
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
@@ -118,7 +118,8 @@ case class CollectList(
 
   override def createAggregationBuffer(): mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty
 
-  override def prettyName: String = "collect_list"
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("collect_list")
 
   override def eval(buffer: mutable.ArrayBuffer[Any]): Any = {
     new GenericArrayData(buffer.toArray)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 51ddf2b85f8c2..9511df50a2652 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -150,7 +150,8 @@ case class CurrentDate(timeZoneId: Option[String] = None)
 
   override def eval(input: InternalRow): Any = currentDate(zoneId)
 
-  override def prettyName: String = "current_date"
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_date")
 }
 
 // scalastyle:off line.size.limit
@@ -340,7 +341,7 @@ case class DateAdd(startDate: Expression, days: Expression)
     })
   }
 
-  override def prettyName: String = "date_add"
+  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("date_add")
 
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): DateAdd = copy(startDate = newLeft, days = newRight)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 92ed08435216b..12bdefc832740 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
@@ -164,7 +164,8 @@ object AssertTrue {
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def nullable: Boolean = false
-  override def prettyName: String = "current_database"
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_database")
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index 9dcca65efe5a8..17ada5a6f3d77 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
@@ -257,7 +257,10 @@ case class ToCharacter(left: Expression, right: Expression)
       inputTypeCheck
     }
   }
-  override def prettyName: String = "to_char"
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_char")
+
   override def nullSafeEval(decimal: Any, format: Any): Any = {
     val input = decimal.asInstanceOf[Decimal]
     numberFormatter.format(input)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index db78415a0cc54..17c5362f98f46 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, TreePattern}
@@ -111,8 +111,12 @@ case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG {
   }
 
   override def flatArguments: Iterator[Any] = Iterator(child)
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("rand")
+
   override def sql: String = {
-    s"rand(${if (hideSeed) "" else child.sql})"
+    s"$prettyName(${if (hideSeed) "" else child.sql})"
   }
 
   override protected def withNewChildInternal(newChild: Expression): Rand = copy(child = newChild)
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 71fde8c7268cc..66e5b08d171fa 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -96,17 +96,17 @@
 | org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct<CSC(1):double> |
 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct<from_csv(1, 0.8):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
-| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct<current_date():date> |
+| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct<curdate():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct<current_catalog():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct<current_database():string> |
-| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct<current_database():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct<current_schema():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct<current_date():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct<current_timezone():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct<current_timestamp():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user | SELECT current_user() | struct<current_user():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct<current_user():string> |
 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
-| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<dateadd(2016-07-30, 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct<date_diff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(2016-04-08, y):string> |
@@ -131,8 +131,8 @@
 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct<E():double> |
 | org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct<EXP(0):double> |
-| org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct<col:int> |
-| org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
+| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode | SELECT explode(array(10, 20)) | struct<col:int> |
+| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct<EXPM1(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct<factorial(5):bigint> |
@@ -212,7 +212,7 @@
 | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct<map_keys(map(1, a, 2, b)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct<map_values(map(1, a, 2, b)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>> |
-| org.apache.spark.sql.catalyst.expressions.Mask | mask | SELECT mask('abcd-EFGH-8765-4321') | struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string> |
+| org.apache.spark.sql.catalyst.expressions.MaskExpressionBuilder | mask | SELECT mask('abcd-EFGH-8765-4321') | struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string> |
 | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct<md5(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct<timestamp_micros(1230219000123123):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct<timestamp_millis(1230219000123):timestamp> |
@@ -255,7 +255,7 @@
 | org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct<rpad(hi, 5, ??):string> |
 | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):void> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
-| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
+| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<random():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
 | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpCount | regexp_count | SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v&#124;ph)en') | struct<regexp_count(Steven Jones and Stephen Smith are the best players, Ste(v&#124;ph)en):int> |
@@ -325,7 +325,7 @@
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToCharacter | to_char | SELECT to_char(454, '999') | struct<to_char(454, 999):string> |
-| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct<to_char(454, 999):string> |
+| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct<to_varchar(454, 999):string> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
@@ -379,13 +379,13 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<any(col):boolean> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<bool_or(col):boolean> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<some(col):boolean> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_list(col):array<int>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<array_agg(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_list(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_set(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct<corr(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct<count(1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct<count_if(((col % 2) = 0)):bigint> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAggExpressionBuilder | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_pop(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_samp(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<first(col):int> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index 544d736b56b64..ce75051630dfb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -714,19 +714,19 @@ Project [chr(cast(167 as bigint)) AS chr(167)#x, chr(cast(247 as bigint)) AS chr
 -- !query
 SELECT to_varchar(78.12, '$99.99')
 -- !query analysis
-Project [to_char(78.12, $99.99) AS to_char(78.12, $99.99)#x]
+Project [to_varchar(78.12, $99.99) AS to_varchar(78.12, $99.99)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT to_varchar(111.11, '99.9')
 -- !query analysis
-Project [to_char(111.11, 99.9) AS to_char(111.11, 99.9)#x]
+Project [to_varchar(111.11, 99.9) AS to_varchar(111.11, 99.9)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT to_varchar(12454.8, '99,999.9S')
 -- !query analysis
-Project [to_char(12454.8, 99,999.9S) AS to_char(12454.8, 99,999.9S)#x]
+Project [to_varchar(12454.8, 99,999.9S) AS to_varchar(12454.8, 99,999.9S)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
index ad72e19b6bb7f..2759f5e67507b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
@@ -2,5 +2,5 @@
 -- !query
 select current_database(), current_schema(), current_catalog()
 -- !query analysis
-Project [current_database() AS current_database()#x, current_database() AS current_database()#x, current_catalog() AS current_catalog()#x]
+Project [current_database() AS current_database()#x, current_schema() AS current_schema()#x, current_catalog() AS current_catalog()#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 93c463575dc1a..d7c7aad31a880 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1133,7 +1133,7 @@ SELECT
 FROM VALUES
   (1), (2), (1) AS tab(col)
 -- !query analysis
-Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, collect_list(col#x, 0, 0) AS collect_list(col)#x]
+Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, array_agg(col#x, 0, 0) AS array_agg(col)#x]
 +- SubqueryAlias tab
    +- LocalRelation [col#x]
 
@@ -1147,7 +1147,7 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a
 -- !query analysis
-Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, collect_list(b#x, 0, 0) AS collect_list(b)#x]
+Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, array_agg(b#x, 0, 0) AS array_agg(b)#x]
 +- SubqueryAlias v
    +- LocalRelation [a#x, b#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index dd8bdc698ea7f..a9604fc375acc 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -1222,7 +1222,7 @@ struct<chr(167):string,chr(247):string,chr(215):string>
 -- !query
 SELECT to_varchar(78.12, '$99.99')
 -- !query schema
-struct<to_char(78.12, $99.99):string>
+struct<to_varchar(78.12, $99.99):string>
 -- !query output
 $78.12
 
@@ -1230,7 +1230,7 @@ $78.12
 -- !query
 SELECT to_varchar(111.11, '99.9')
 -- !query schema
-struct<to_char(111.11, 99.9):string>
+struct<to_varchar(111.11, 99.9):string>
 -- !query output
 ##.#
 
@@ -1238,6 +1238,6 @@ struct<to_char(111.11, 99.9):string>
 -- !query
 SELECT to_varchar(12454.8, '99,999.9S')
 -- !query schema
-struct<to_char(12454.8, 99,999.9S):string>
+struct<to_varchar(12454.8, 99,999.9S):string>
 -- !query output
 12,454.8+
diff --git a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
index 379bf01e64571..7fbe2dfff4db1 100644
--- a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
@@ -2,6 +2,6 @@
 -- !query
 select current_database(), current_schema(), current_catalog()
 -- !query schema
-struct<current_database():string,current_database():string,current_catalog():string>
+struct<current_database():string,current_schema():string,current_catalog():string>
 -- !query output
 default	default	spark_catalog
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 548917ef79b2d..44fbfd7ad4952 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1066,7 +1066,7 @@ SELECT
 FROM VALUES
   (1), (2), (1) AS tab(col)
 -- !query schema
-struct<collect_list(col):array<int>,collect_list(col):array<int>>
+struct<collect_list(col):array<int>,array_agg(col):array<int>>
 -- !query output
 [1,2,1]	[1,2,1]
 
@@ -1080,7 +1080,7 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a
 -- !query schema
-struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>>
+struct<a:int,collect_list(b):array<int>,array_agg(b):array<int>>
 -- !query output
 1	[4,4]	[4,4]
 2	[3,4]	[3,4]
diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
index 0f7ff3f107567..28457c0579e95 100644
--- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
@@ -72,7 +72,7 @@ NULL
 -- !query
 SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData
 -- !query schema
-struct<((from_json(a).a + rand()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ rand())) > 2):boolean>
+struct<((from_json(a).a + random()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ random())) > 2):boolean>
 -- !query output
 NULL	NULL	1	true
 false	2	1	true
@@ -84,7 +84,7 @@ true	6	6	true
 -- !query
 SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
 -- !query schema
-struct<(IF(((from_json(a).a + rand()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
+struct<(IF(((from_json(a).a + random()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
 -- !query output
 2
 2
@@ -96,7 +96,7 @@ NULL
 -- !query
 SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData
 -- !query schema
-struct<CASE WHEN (from_json(a).a > 5) THEN ((from_json(a).b + rand()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + rand()) > 2) ELSE (((from_json(a).b + 2) + rand()) > 5) END:boolean>
+struct<CASE WHEN (from_json(a).a > 5) THEN ((from_json(a).b + random()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + random()) > 2) ELSE (((from_json(a).b + 2) + random()) > 5) END:boolean>
 -- !query output
 NULL
 false

From 5996c48eb99adcbba67fc863a5434fefc0c870b7 Mon Sep 17 00:00:00 2001
From: LantaoJin <jinlantao@gmail.com>
Date: Thu, 10 Oct 2024 12:20:13 +0800
Subject: [PATCH 512/521] [SPARK-49782][SQL] ResolveDataFrameDropColumns rule
 resolves UnresolvedAttribute with child output

When the drop list of `DataFrameDropColumns` contains an UnresolvedAttribute. Current rule mistakenly resolve the column with its grand-children's output attributes.
In dataframe/dataset API application, issue cannot be encountered since the `dropList` are all AttributeReferences.
But when we use Spark LogicalPlan, the bug will be encountered, the UnresolvedAttribute in dropList cannot work.

In `ResolveDataFrameDropColumns`
```scala
      val dropped = d.dropList.map {
        case u: UnresolvedAttribute =>
          resolveExpressionByPlanChildren(u, d.child)   //mistakenly resolve the column with its grand-children's output attributes
        case e => e
      }
```
To fix it, change to `resolveExpressionByPlanChildren(u, d)` or `resolveExpressionByPlanOutput(u, d.child)`

No

UT added.

No.

Closes #48240 from LantaoJin/SPARK-49782.

Authored-by: LantaoJin <jinlantao@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit d7772f27b8a851c5007d7d9c891bb862233bf7fa)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../analysis/ResolveDataFrameDropColumns.scala         |  2 +-
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala    | 10 ++++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
index 2642b4a1c5daa..0f9b93cc2986d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
@@ -36,7 +36,7 @@ class ResolveDataFrameDropColumns(val catalogManager: CatalogManager)
       //   df.drop(col("non-existing-column"))
       val dropped = d.dropList.map {
         case u: UnresolvedAttribute =>
-          resolveExpressionByPlanChildren(u, d.child)
+          resolveExpressionByPlanChildren(u, d)
         case e => e
       }
       val remaining = d.child.output.filterNot(attr => dropped.exists(_.semanticEquals(attr)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index e54b95f91af60..70a7fdc81e7e1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -1717,4 +1717,14 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val plan = testRelation.select(udf.as("u")).select($"u").analyze
     assert(plan.output.head.nullable)
   }
+
+  test("SPARK-49782: ResolveDataFrameDropColumns rule resolves complex UnresolvedAttribute") {
+    val function = UnresolvedFunction("trim", Seq(UnresolvedAttribute("i")), isDistinct = false)
+    val addColumnF = Project(Seq(UnresolvedAttribute("i"), Alias(function, "f")()), testRelation5)
+    // Drop column "f" via ResolveDataFrameDropColumns rule.
+    val inputPlan = DataFrameDropColumns(Seq(UnresolvedAttribute("f")), addColumnF)
+    // The expected Project (root node) should only have column "i".
+    val expectedPlan = Project(Seq(UnresolvedAttribute("i")), addColumnF).analyze
+    checkAnalysis(inputPlan, expectedPlan)
+  }
 }

From 36299b3014a49e2a156566c0f82e513a5b820fc2 Mon Sep 17 00:00:00 2001
From: Chenhao Li <chenhao.li@databricks.com>
Date: Tue, 15 Oct 2024 20:21:42 +0200
Subject: [PATCH 513/521] [SPARK-49959][SQL] Fix ColumnarArray.copy() to read
 nulls from the correct offset

### What changes were proposed in this pull request?

`ColumnarArray` represents an array containing elements from `data[offset]` to `data[offset + length)`. When copying the array, the null flag should also be read starting from `offset` rather than 0.

Some expressions depend on this utility function. For example, this bug can lead to incorrect results in `ArrayTransform`.

### Why are the changes needed?

Fix correctness issue.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Two unit tests, one with `ArrayTransform`, and the other tests `ColumnarArray` directly. Both the tests would fail without the change in the PR.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48458 from chenhao-db/fix_ColumnarArray_copy.

Authored-by: Chenhao Li <chenhao.li@databricks.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
(cherry picked from commit 5f2bd5c10dc7f7f1ed3d2bd286ad98f284b2032c)
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../org/apache/spark/sql/vectorized/ColumnarArray.java   | 2 +-
 .../org/apache/spark/sql/DataFrameComplexTypeSuite.scala | 9 +++++++++
 .../sql/execution/vectorized/ColumnVectorSuite.scala     | 6 ++++++
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 1f8e679a4146f..e58f36641d298 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -56,7 +56,7 @@ public int numElements() {
   private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
     if (data.hasNull()) {
       for (int i = 0; i < length; i++) {
-        if (data.isNullAt(i)) {
+        if (data.isNullAt(offset + i)) {
           arrayData.setNullAt(i);
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
index 4f25642906628..95c5c5590e504 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
@@ -31,6 +31,15 @@ import org.apache.spark.sql.types.ArrayType
 class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  test("ArrayTransform with scan input") {
+    withTempPath { f =>
+      spark.sql("select array(array(1, null, 3), array(4, 5, null), array(null, 8, 9)) as a")
+        .write.parquet(f.getAbsolutePath)
+      val df = spark.read.parquet(f.getAbsolutePath).selectExpr("transform(a, (x, i) -> x)")
+      checkAnswer(df, Row(Seq(Seq(1, null, 3), Seq(4, 5, null), Seq(null, 8, 9))))
+    }
+  }
+
   test("UDF on struct") {
     val f = udf((a: String) => a)
     val df = sparkContext.parallelize(Seq((1, 1))).toDF("a", "b")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 1f79626533ab6..a40a416bbb5a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -501,6 +501,12 @@ class ColumnVectorSuite extends SparkFunSuite {
     val arr = new ColumnarArray(testVector, 0, testVector.capacity)
     assert(arr.toSeq(testVector.dataType) == expected)
     assert(arr.copy().toSeq(testVector.dataType) == expected)
+
+    if (expected.nonEmpty) {
+      val withOffset = new ColumnarArray(testVector, 1, testVector.capacity - 1)
+      assert(withOffset.toSeq(testVector.dataType) == expected.tail)
+      assert(withOffset.copy().toSeq(testVector.dataType) == expected.tail)
+    }
   }
 
   testVectors("getInts with dictionary and nulls", 3, IntegerType) { testVector =>

From 92ec503d9c77cb856cb9d4a1c93bda027c380a31 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 16 Oct 2024 07:22:59 -0700
Subject: [PATCH 514/521] [SPARK-49981][CORE][TESTS] Fix
 `AsyncRDDActionsSuite.FutureAction result, timeout` test case to be robust

### What changes were proposed in this pull request?

This PR aims to fix `AsyncRDDActionsSuite.FutureAction result, timeout` test case to be robust.

### Why are the changes needed?

To reduce the flakiness in GitHub Action CI. Previously, the sleep time is identical to the timeout time. It causes a flakiness in some environments like GitHub Action.
- https://github.com/apache/spark/actions/runs/11298639789/job/31428018075
```
AsyncRDDActionsSuite:
...
- FutureAction result, timeout *** FAILED ***
  Expected exception java.util.concurrent.TimeoutException to be thrown, but no exception was thrown (AsyncRDDActionsSuite.scala:206)
```

### Does this PR introduce _any_ user-facing change?

No, this is a test-only change.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48485 from dongjoon-hyun/SPARK-49981.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit a3b91247b32083805fdd50e9f7f46e9a91b8fd8d)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 5e66ca962ea2c..c4ef45658ae9c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -201,10 +201,10 @@ class AsyncRDDActionsSuite extends SparkFunSuite with TimeLimits {
 
   test("FutureAction result, timeout") {
     val f = sc.parallelize(1 to 100, 4)
-              .mapPartitions(itr => { Thread.sleep(20); itr })
+              .mapPartitions(itr => { Thread.sleep(200); itr })
               .countAsync()
     intercept[TimeoutException] {
-      ThreadUtils.awaitResult(f, Duration(20, "milliseconds"))
+      ThreadUtils.awaitResult(f, Duration(2, "milliseconds"))
     }
   }
 

From d37a8b9c97b33ede4c7c42ec8c16b7c152d42b86 Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Wed, 16 Oct 2024 07:24:33 -0700
Subject: [PATCH 515/521] [SPARK-49983][CORE][TESTS] Fix
 `BarrierTaskContextSuite.successively sync with allGather and barrier` test
 case to be robust

### What changes were proposed in this pull request?

This PR aims to fix `BarrierTaskContextSuite.successively sync with allGather and barrier` test case to be robust.

### Why are the changes needed?

The test case asserts the duration of partitions. However, this is flaky because we don't know when a partition is triggered before `barrier` sync.

https://github.com/apache/spark/blob/0e75d19a736aa18fe77414991ebb7e3577a43af8/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala#L116-L118

Although we added `TestUtils.waitUntilExecutorsUp` at Apache Spark 3.0.0 like the following,

- #28658

let's say a partition starts slowly than `38ms` and all partitions sleep `1s` exactly. Then, the test case fails like the following.
- https://github.com/apache/spark/actions/runs/11298639789/job/31428018075
```
BarrierTaskContextSuite:
...
- successively sync with allGather and barrier *** FAILED ***
  1038 was not less than or equal to 1000 (BarrierTaskContextSuite.scala:118)
```

According to the failure history here (SPARK-49983) and SPARK-31730, the slowness seems to be less than `200ms` when it happens. So, this PR aims to reduce the flakiness by capping the sleep up to 500ms while keeping the `1s` validation. There is no test coverage change because this test case focuses on the `successively sync with allGather and battier`.

### Does this PR introduce _any_ user-facing change?

No, this is a test-only test case.

### How was this patch tested?

Pass the CIs.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48487 from dongjoon-hyun/SPARK-49983.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
(cherry picked from commit bcfe62b9988f9b00c23de0b71acc1c6170edee9e)
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../org/apache/spark/scheduler/BarrierTaskContextSuite.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 26cd5374fa09c..f370a8e02391e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -101,7 +101,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
       // Sleep for a random time before global sync.
-      Thread.sleep(Random.nextInt(1000))
+      Thread.sleep(Random.nextInt(500))
       context.barrier()
       val time1 = System.currentTimeMillis()
       // Sleep for a random time before global sync.

From f5bc92841eb7217cb83cc260c14a555bcfbf2bf3 Mon Sep 17 00:00:00 2001
From: Ziqi Liu <ziqi.liu@databricks.com>
Date: Thu, 17 Oct 2024 12:24:59 +0800
Subject: [PATCH 516/521] [SPARK-49979][SQL] Fix AQE hanging issue when
 collecting twice on a failed plan

### What changes were proposed in this pull request?

Record failure/error status in query stage. And abort immediately upon seeing failed query stage when creating new query stages.

### Why are the changes needed?

AQE has a potential hanging issue when we collect twice from a failed AQE plan, no new query stage will be created, and no stage will be submitted either. We will be waiting for a finish event forever, which will never come because that query stage has already failed in the previous run.

### Does this PR introduce _any_ user-facing change?
NO

### How was this patch tested?
New UT.

### Was this patch authored or co-authored using generative AI tooling?
NO

Closes #48484 from liuzqt/SPARK-49979.

Authored-by: Ziqi Liu <ziqi.liu@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
(cherry picked from commit e374b94a9c8b217156ce24137efbd404a38e4f21)
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../adaptive/AdaptiveSparkPlanExec.scala      | 12 +++++++++++
 .../execution/adaptive/QueryStageExec.scala   |  9 ++++++++
 .../adaptive/AdaptiveQueryExecSuite.scala     | 21 +++++++++++++++++++
 3 files changed, 42 insertions(+)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index d2e879e3eddb1..77c9696e6e295 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -310,6 +310,7 @@ case class AdaptiveSparkPlanExec(
               }(AdaptiveSparkPlanExec.executionContext)
             } catch {
               case e: Throwable =>
+                stage.error.set(Some(e))
                 cleanUpAndThrowException(Seq(e), Some(stage.id))
             }
           }
@@ -325,6 +326,7 @@ case class AdaptiveSparkPlanExec(
           case StageSuccess(stage, res) =>
             stage.resultOption.set(Some(res))
           case StageFailure(stage, ex) =>
+            stage.error.set(Some(ex))
             errors.append(ex)
         }
 
@@ -567,6 +569,7 @@ case class AdaptiveSparkPlanExec(
         newStages = Seq(newStage))
 
     case q: QueryStageExec =>
+      assertStageNotFailed(q)
       CreateStageResult(newPlan = q,
         allChildStagesMaterialized = q.isMaterialized, newStages = Seq.empty)
 
@@ -779,6 +782,15 @@ case class AdaptiveSparkPlanExec(
     }
   }
 
+  private def assertStageNotFailed(stage: QueryStageExec): Unit = {
+    if (stage.hasFailed) {
+      throw stage.error.get().get match {
+        case fatal: SparkFatalException => fatal.throwable
+        case other => other
+      }
+    }
+  }
+
   /**
    * Cancel all running stages with best effort and throw an Exception containing all stage
    * materialization errors and stage cancellation errors.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index 433315c493215..31fe9b9ed368c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -88,6 +88,13 @@ abstract class QueryStageExec extends LeafExecNode {
   private[adaptive] def resultOption: AtomicReference[Option[Any]] = _resultOption
   final def isMaterialized: Boolean = resultOption.get().isDefined
 
+  @transient
+  @volatile
+  protected var _error = new AtomicReference[Option[Throwable]](None)
+
+  def error: AtomicReference[Option[Throwable]] = _error
+  final def hasFailed: Boolean = _error.get().isDefined
+
   override def output: Seq[Attribute] = plan.output
   override def outputPartitioning: Partitioning = plan.outputPartitioning
   override def outputOrdering: Seq[SortOrder] = plan.outputOrdering
@@ -195,6 +202,7 @@ case class ShuffleQueryStageExec(
       ReusedExchangeExec(newOutput, shuffle),
       _canonicalized)
     reuse._resultOption = this._resultOption
+    reuse._error = this._error
     reuse
   }
 
@@ -247,6 +255,7 @@ case class BroadcastQueryStageExec(
       ReusedExchangeExec(newOutput, broadcast),
       _canonicalized)
     reuse._resultOption = this._resultOption
+    reuse._error = this._error
     reuse
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index cab3e69b0d17b..49b2bd002d5fa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -2877,6 +2877,27 @@ class AdaptiveQueryExecSuite
     val unionDF = aggDf1.union(aggDf2)
     checkAnswer(unionDF.select("id").distinct, Seq(Row(null)))
   }
+
+  test("SPARK-49979: AQE hang forever when collecting twice on a failed AQE plan") {
+    val func: Long => Boolean = (i : Long) => {
+      throw new Exception("SPARK-49979")
+    }
+    withUserDefinedFunction("func" -> true) {
+      spark.udf.register("func", func)
+      val df1 = spark.range(1024).select($"id".as("key1"))
+      val df2 = spark.range(2048).select($"id".as("key2"))
+        .withColumn("group_key", $"key2" % 1024)
+      val df = df1.filter(expr("func(key1)")).hint("MERGE").join(df2, $"key1" === $"key2")
+        .groupBy($"group_key").agg("key1" -> "count")
+      intercept[Throwable] {
+        df.collect()
+      }
+      // second collect should not hang forever
+      intercept[Throwable] {
+        df.collect()
+      }
+    }
+  }
 }
 
 /**

From 3d2d1c11ab7f85db953562fcf297f9c08c2d888d Mon Sep 17 00:00:00 2001
From: Angerszhuuuu <angers.zhu@gmail.com>
Date: Thu, 17 Oct 2024 12:31:54 +0800
Subject: [PATCH 517/521] [SPARK-48155][SQL][3.5] AQEPropagateEmptyRelation for
 join should check if remain child is just BroadcastQueryStageExec

### What changes were proposed in this pull request?
This PR backports #46523 and #48300 for SPARK-48155 to branch-3.5.

### Why are the changes needed?

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

### Was this patch authored or co-authored using generative AI tooling?

Closes #48505 from zml1206/SPARK-48155-3.5.

Lead-authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Co-authored-by: zml1206 <zhuml1206@gmail.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
---
 .../optimizer/PropagateEmptyRelation.scala    | 16 +++--
 .../adaptive/AQEPropagateEmptyRelation.scala  |  7 ++
 .../adaptive/AdaptiveQueryExecSuite.scala     | 67 +++++++++++++++++++
 3 files changed, 84 insertions(+), 6 deletions(-)

diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index fd7a87087ddd2..738d547d4fb61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -65,6 +65,8 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
   private def nullValueProjectList(plan: LogicalPlan): Seq[NamedExpression] =
     plan.output.map{ a => Alias(cast(Literal(null), a.dataType), a.name)(a.exprId) }
 
+  protected def canExecuteWithoutJoin(plan: LogicalPlan): Boolean = true
+
   protected def commonApplyFunc: PartialFunction[LogicalPlan, LogicalPlan] = {
     case p: Union if p.children.exists(isEmpty) =>
       val newChildren = p.children.filterNot(isEmpty)
@@ -109,20 +111,22 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
           // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
           case LeftOuter | LeftSemi | LeftAnti if isLeftEmpty => empty(p)
           case LeftSemi if isRightEmpty | isFalseCondition => empty(p)
-          case LeftAnti if isRightEmpty | isFalseCondition => p.left
+          case LeftAnti if (isRightEmpty | isFalseCondition) && canExecuteWithoutJoin(p.left) =>
+            p.left
           case FullOuter if isLeftEmpty && isRightEmpty => empty(p)
-          case LeftOuter | FullOuter if isRightEmpty =>
+          case LeftOuter | FullOuter if isRightEmpty && canExecuteWithoutJoin(p.left) =>
             Project(p.left.output ++ nullValueProjectList(p.right), p.left)
           case RightOuter if isRightEmpty => empty(p)
-          case RightOuter | FullOuter if isLeftEmpty =>
+          case RightOuter | FullOuter if isLeftEmpty && canExecuteWithoutJoin(p.right) =>
             Project(nullValueProjectList(p.left) ++ p.right.output, p.right)
-          case LeftOuter if isFalseCondition =>
+          case LeftOuter if isFalseCondition && canExecuteWithoutJoin(p.left) =>
             Project(p.left.output ++ nullValueProjectList(p.right), p.left)
-          case RightOuter if isFalseCondition =>
+          case RightOuter if isFalseCondition && canExecuteWithoutJoin(p.right) =>
             Project(nullValueProjectList(p.left) ++ p.right.output, p.right)
           case _ => p
         }
-      } else if (joinType == LeftSemi && conditionOpt.isEmpty && nonEmpty(p.right)) {
+      } else if (joinType == LeftSemi && conditionOpt.isEmpty &&
+        nonEmpty(p.right) && canExecuteWithoutJoin(p.left)) {
         p.left
       } else if (joinType == LeftAnti && conditionOpt.isEmpty && nonEmpty(p.right)) {
         empty(p)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
index 7951a6f36b9bd..858130fae32b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEPropagateEmptyRelation.scala
@@ -82,6 +82,13 @@ object AQEPropagateEmptyRelation extends PropagateEmptyRelationBase {
     case _ => false
   }
 
+  // A broadcast query stage can't be executed without the join operator.
+  // TODO: we can return the original query plan before broadcast.
+  override protected def canExecuteWithoutJoin(plan: LogicalPlan): Boolean = plan match {
+    case LogicalQueryStage(_, _: BroadcastQueryStageExec) => false
+    case _ => true
+  }
+
   override protected def applyInternal(p: LogicalPlan): LogicalPlan = p.transformUpWithPruning(
     // LOCAL_RELATION and TRUE_OR_FALSE_LITERAL pattern are matched at
     // `PropagateEmptyRelationBase.commonApplyFunc`
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 49b2bd002d5fa..f6b96ee7e1ebd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -161,6 +161,12 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  private def findTopLevelUnion(plan: SparkPlan): Seq[UnionExec] = {
+    collect(plan) {
+      case l: UnionExec => l
+    }
+  }
+
   private def findReusedExchange(plan: SparkPlan): Seq[ReusedExchangeExec] = {
     collectWithSubqueries(plan) {
       case ShuffleQueryStageExec(_, e: ReusedExchangeExec, _) => e
@@ -2701,6 +2707,67 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-48155: AQEPropagateEmptyRelation check remained child for join") {
+    withSQLConf(
+      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      // Before SPARK-48155, since the AQE will call ValidateSparkPlan,
+      // all AQE optimize rule won't work and return the origin plan.
+      // After SPARK-48155, Spark avoid invalid propagate of empty relation.
+      // Then the UNION first child empty relation can be propagate correctly
+      // and the JOIN won't be propagated since will generated a invalid plan.
+      val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+        """
+          |SELECT /*+ BROADCAST(t3) */ t3.b, count(t3.a) FROM testData2 t1
+          |INNER JOIN (
+          |  SELECT * FROM testData2
+          |  WHERE b = 0
+          |  UNION ALL
+          |  SELECT * FROM testData2
+          |  WHErE b != 0
+          |) t2
+          |ON t1.b = t2.b AND t1.a = 0
+          |RIGHT OUTER JOIN testData2 t3
+          |ON t1.a > t3.a
+          |GROUP BY t3.b
+        """.stripMargin
+      )
+      assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1)
+      assert(findTopLevelUnion(adaptivePlan).size == 0)
+    }
+
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "100") {
+      withTempView("t1", "t2", "t3", "t4") {
+        Seq(1).toDF().createOrReplaceTempView("t1")
+        spark.range(100).createOrReplaceTempView("t2")
+        spark.range(2).createOrReplaceTempView("t3")
+        spark.range(2).createOrReplaceTempView("t4")
+        val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT tt2.value
+            |FROM (
+            |  SELECT value
+            |  FROM t1
+            |  WHERE NOT EXISTS (
+            |      SELECT 1
+            |      FROM (
+            |        SELECT t2.id
+            |        FROM t2
+            |          JOIN t3 ON t2.id = t3.id
+            |        AND t2.id > 100
+            |      ) tt
+            |      WHERE t1.value = tt.id
+            |    )
+            |    AND t1.value = 1
+            |) tt2
+            |  LEFT JOIN t4 ON tt2.value = t4.id
+            |""".stripMargin
+        )
+        assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1)
+      }
+    }
+  }
+
   test("SPARK-39915: Dataset.repartition(N) may not create N partitions") {
     withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "6") {
       // partitioning:  HashPartitioning

From 1fae1d78f080b3b041e09299b9d8aa9df380f6b3 Mon Sep 17 00:00:00 2001
From: Jungtaek Lim <kabhwan.opensource@gmail.com>
Date: Fri, 18 Oct 2024 11:03:32 +0900
Subject: [PATCH 518/521] [SPARK-49829][SS] Fix the bug on the optimization on
 adding input to state store in stream-stream join

The PR proposes to revise the optimization on adding input to state store in stream-stream join, to fix correctness issue.

Here is the logic of optimization before this PR:

https://github.com/apache/spark/blob/039fd13eacb1cef835045e3a60cebf958589e1a2/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala#L671-L677

```
        val isLeftSemiWithMatch =
          joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
        // Add to state store only if both removal predicates do not match,
        // and the row is not matched for left side of left semi join.
        val shouldAddToState =
          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) &&
          !isLeftSemiWithMatch
```

The criteria of `both removal predicates do not match` means the input is going to be evicted in this batch. I'm not sure about the coverage of this optimization, but there are two major issues with the above optimization:

1) missing to add the input to state store in left side prevents the input on the right side to match with "that" input. Even though the input is going to be evicted in this batch, there could be still inputs on the right side in this batch which can match with that input.

2) missing to add the input to state store prevents that input to produce unmatched (null-outer) output, as we produce unmatched output during the eviction of state.

Worth noting that `state watermark != watermark for eviction` and eviction we mentioned in above is based on "state watermark". state watermark could be either 1) equal or earlier than watermark for eviction or 2) "later" than watermark for eviction.

Yes, there are correctness issues among stream-stream join, especially when the output of the stateful operator is provided as input of stream-stream join. The correctness issue is fixed with the PR.

New UTs.

No.

Closes #48297 from HeartSaVioR/SPARK-49829.

Lead-authored-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
Co-authored-by: Andrzej Zera <andrzejzera@gmail.com>
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
(cherry picked from commit 75b86667ee7607d3523d7ce75c1022752142a443)
Signed-off-by: Jungtaek Lim <kabhwan.opensource@gmail.com>
---
 .../StreamingSymmetricHashJoinExec.scala      |  39 ++++-
 .../MultiStatefulOperatorsSuite.scala         |  54 ++++++
 .../sql/streaming/StreamingJoinSuite.scala    | 165 ++++++++++++++++++
 3 files changed, 251 insertions(+), 7 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index 3ad1dc58cae79..15e27edcd0410 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -631,13 +631,38 @@ case class StreamingSymmetricHashJoinExec(
       private val iteratorNotEmpty: Boolean = super.hasNext
 
       override def completion(): Unit = {
-        val isLeftSemiWithMatch =
-          joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
-        // Add to state store only if both removal predicates do not match,
-        // and the row is not matched for left side of left semi join.
-        val shouldAddToState =
-          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) &&
-          !isLeftSemiWithMatch
+        // The criteria of whether the input has to be added into state store or not:
+        // - Left side: input can be skipped to be added to the state store if it's already matched
+        //   and the join type is left semi.
+        //   For other cases, the input should be added, including the case it's going to be evicted
+        //   in this batch. It hasn't yet evaluated with inputs from right side for this batch.
+        //   Refer to the classdoc of SteramingSymmetricHashJoinExec about how stream-stream join
+        //   works.
+        // - Right side: for this side, the evaluation with inputs from left side for this batch
+        //   is done at this point. That said, input can be skipped to be added to the state store
+        //   if input is going to be evicted in this batch. Though, input should be added to the
+        //   state store if it's right outer join or full outer join, as unmatched output is
+        //   handled during state eviction.
+        val isLeftSemiWithMatch = joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
+        val shouldAddToState = if (isLeftSemiWithMatch) {
+          false
+        } else if (joinSide == LeftSide) {
+          true
+        } else {
+          // joinSide == RightSide
+
+          // if the input is not evicted in this batch (hence need to be persisted)
+          val isNotEvictingInThisBatch =
+            !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow)
+
+          isNotEvictingInThisBatch ||
+            // if the input is producing "unmatched row" in this batch
+            (
+              (joinType == RightOuter && !iteratorNotEmpty) ||
+                (joinType == FullOuter && !iteratorNotEmpty)
+            )
+        }
+
         if (shouldAddToState) {
           joinStateManager.append(key, thisRow, matched = iteratorNotEmpty)
           updatedStateRowsCount += 1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
index fb5445ae436a1..0149e95586499 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
@@ -878,6 +878,60 @@ class MultiStatefulOperatorsSuite
     testOutputWatermarkInJoin(join3, input1, -40L * 1000 - 1)
   }
 
+  test("SPARK-49829 time window agg per each source followed by stream-stream join") {
+    val inputStream1 = MemoryStream[Long]
+    val inputStream2 = MemoryStream[Long]
+
+    val df1 = inputStream1.toDF()
+      .selectExpr("value", "timestamp_seconds(value) AS ts")
+      .withWatermark("ts", "5 seconds")
+
+    val df2 = inputStream2.toDF()
+      .selectExpr("value", "timestamp_seconds(value) AS ts")
+      .withWatermark("ts", "5 seconds")
+
+    val df1Window = df1.groupBy(
+      window($"ts", "10 seconds")
+    ).agg(sum("value").as("sum_df1"))
+
+    val df2Window = df2.groupBy(
+      window($"ts", "10 seconds")
+    ).agg(sum("value").as("sum_df2"))
+
+    val joined = df1Window.join(df2Window, "window", "inner")
+      .selectExpr("CAST(window.end AS long) AS window_end", "sum_df1", "sum_df2")
+
+    // The test verifies the case where both sides produce input as time window (append mode)
+    // for stream-stream join having join condition for equality of time window.
+    // Inputs are produced into stream-stream join when the time windows are completed, meaning
+    // they will be evicted in this batch for stream-stream join as well. (NOTE: join condition
+    // does not delay the state watermark in stream-stream join).
+    // Before SPARK-49829, left side does not add the input to state store if it's going to evict
+    // in this batch, which breaks the match between input from left side and input from right
+    // side for this batch.
+    testStream(joined)(
+      MultiAddData(
+        (inputStream1, Seq(1L, 2L, 3L, 4L, 5L)),
+        (inputStream2, Seq(5L, 6L, 7L, 8L, 9L))
+      ),
+      // watermark: 5 - 5 = 0
+      CheckNewAnswer(),
+      MultiAddData(
+        (inputStream1, Seq(11L, 12L, 13L, 14L, 15L)),
+        (inputStream2, Seq(15L, 16L, 17L, 18L, 19L))
+      ),
+      // watermark: 15 - 5 = 10 (windows for [0, 10) are completed)
+      // Before SPARK-49829, the test fails because this row is not produced.
+      CheckNewAnswer((10L, 15L, 35L)),
+      MultiAddData(
+        (inputStream1, Seq(100L)),
+        (inputStream2, Seq(101L))
+      ),
+      // watermark: 100 - 5 = 95 (windows for [0, 20) are completed)
+      CheckNewAnswer((20L, 65L, 85L))
+    )
+  }
+
   private def assertNumStateRows(numTotalRows: Seq[Long]): AssertOnQuery = AssertOnQuery { q =>
     q.processAllAvailable()
     val progressWithData = q.recentProgress.lastOption.get
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 3e1bc57dfa245..aad91601758ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -1417,6 +1417,56 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
       )
     }
   }
+
+  test("SPARK-49829 left-outer join, input being unmatched is between WM for late event and " +
+    "WM for eviction") {
+
+    withTempDir { checkpoint =>
+      // This config needs to be set, otherwise no-data batch will be triggered and after
+      // no-data batch, WM for late event and WM for eviction would be same.
+      withSQLConf(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false") {
+        val memoryStream1 = MemoryStream[(String, Int)]
+        val memoryStream2 = MemoryStream[(String, Int)]
+
+        val data1 = memoryStream1.toDF()
+          .selectExpr("_1 AS key", "timestamp_seconds(_2) AS eventTime")
+          .withWatermark("eventTime", "0 seconds")
+        val data2 = memoryStream2.toDF()
+          .selectExpr("_1 AS key", "timestamp_seconds(_2) AS eventTime")
+          .withWatermark("eventTime", "0 seconds")
+
+        val joinedDf = data1.join(data2, Seq("key", "eventTime"), "leftOuter")
+          .selectExpr("key", "CAST(eventTime AS long) AS eventTime")
+
+        testStream(joinedDf)(
+          StartStream(checkpointLocation = checkpoint.getCanonicalPath),
+          // batch 0
+          // WM: late record = 0, eviction = 0
+          MultiAddData(
+            (memoryStream1, Seq(("a", 1), ("b", 2))),
+            (memoryStream2, Seq(("b", 2), ("c", 1)))
+          ),
+          CheckNewAnswer(("b", 2)),
+          // state rows
+          // left: ("a", 1), ("b", 2)
+          // right: ("b", 2), ("c", 1)
+          // batch 1
+          // WM: late record = 0, eviction = 2
+          // Before Spark introduces multiple stateful operator, WM for late record was same as
+          // WM for eviction, hence ("d", 1) was treated as late record.
+          // With the multiple state operator, ("d", 1) is added in batch 1 but also evicted in
+          // batch 1. Note that the eviction is happening with state watermark: for this join,
+          // state watermark = state eviction under join condition. Before SPARK-49829, this
+          // wasn't producing unmatched row, and it is fixed.
+          AddData(memoryStream1, ("d", 1)),
+          CheckNewAnswer(("a", 1), ("d", 1))
+          // state rows
+          // left: none
+          // right: none
+        )
+      }
+    }
+  }
 }
 
 @SlowSQLTest
@@ -1824,4 +1874,119 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
       assertNumStateRows(total = 9, updated = 4)
     )
   }
+
+  test("SPARK-49829 two chained stream-stream left outer joins among three input streams") {
+    withSQLConf(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false") {
+      val memoryStream1 = MemoryStream[(Long, Int)]
+      val memoryStream2 = MemoryStream[(Long, Int)]
+      val memoryStream3 = MemoryStream[(Long, Int)]
+
+      val data1 = memoryStream1.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v1")
+        .withWatermark("eventTime", "0 seconds")
+      val data2 = memoryStream2.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v2")
+        .withWatermark("eventTime", "0 seconds")
+      val data3 = memoryStream3.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v3")
+        .withWatermark("eventTime", "0 seconds")
+
+      val join = data1
+        .join(data2, Seq("eventTime"), "leftOuter")
+        .join(data3, Seq("eventTime"), "leftOuter")
+        .selectExpr("CAST(eventTime AS long) AS eventTime", "v1", "v2", "v3")
+
+      testStream(join)(
+        // batch 0
+        // WM: late event = 0, eviction = 0
+        MultiAddData(
+          (memoryStream1, Seq((20L, 1))),
+          (memoryStream2, Seq((20L, 1))),
+          (memoryStream3, Seq((20L, 1)))
+        ),
+        CheckNewAnswer((20, 1, 1, 1)),
+        // state rows
+        // 1st join
+        // left: (20, 1)
+        // right: (20, 1)
+        // 2nd join
+        // left: (20, 1, 1)
+        // right: (20, 1)
+        // batch 1
+        // WM: late event = 0, eviction = 20
+        MultiAddData(
+          (memoryStream1, Seq((21L, 2))),
+          (memoryStream2, Seq((21L, 2)))
+        ),
+        CheckNewAnswer(),
+        // state rows
+        // 1st join
+        // left: (21, 2)
+        // right: (21, 2)
+        // 2nd join
+        // left: (21, 2, 2)
+        // right: none
+        // batch 2
+        // WM: late event = 20, eviction = 20 (slowest: inputStream3)
+        MultiAddData(
+          (memoryStream1, Seq((22L, 3))),
+          (memoryStream3, Seq((22L, 3)))
+        ),
+        CheckNewAnswer(),
+        // state rows
+        // 1st join
+        // left: (21, 2), (22, 3)
+        // right: (21, 2)
+        // 2nd join
+        // left: (21, 2, 2)
+        // right: (22, 3)
+        // batch 3
+        // WM: late event = 20, eviction = 21 (slowest: inputStream2)
+        AddData(memoryStream1, (23L, 4)),
+        CheckNewAnswer(Row(21, 2, 2, null)),
+        // state rows
+        // 1st join
+        // left: (22, 3), (23, 4)
+        // right: none
+        // 2nd join
+        // left: none
+        // right: (22, 3)
+        // batch 4
+        // WM: late event = 21, eviction = 21 (slowest: inputStream2)
+        MultiAddData(
+          (memoryStream1, Seq((24L, 5))),
+          (memoryStream2, Seq((24L, 5))),
+          (memoryStream3, Seq((24L, 5)))
+        ),
+        CheckNewAnswer(Row(24, 5, 5, 5)),
+        // state rows
+        // 1st join
+        // left: (22, 3), (23, 4), (24, 5)
+        // right: (24, 5)
+        // 2nd join
+        // left: (24, 5, 5)
+        // right: (22, 3), (24, 5)
+        // batch 5
+        // WM: late event = 21, eviction = 24
+        // just trigger a new batch with arbitrary data as the original test relies on no-data
+        // batch, and we need to check with remaining unmatched outputs
+        AddData(memoryStream1, (100L, 6)),
+        // Before SPARK-49829, the test fails because (23, 4, null, null) wasn't produced.
+        // (The assertion of state for left inputs & right inputs weren't included on the test
+        // before SPARK-49829.)
+        CheckNewAnswer(Row(22, 3, null, 3), Row(23, 4, null, null))
+      )
+
+      /*
+      // The collection of the above new answers is the same with below in original test:
+      val expected = Array(
+        Row(Timestamp.valueOf("2024-02-10 10:20:00"), 1, 1, 1),
+        Row(Timestamp.valueOf("2024-02-10 10:21:00"), 2, 2, null),
+        Row(Timestamp.valueOf("2024-02-10 10:22:00"), 3, null, 3),
+        Row(Timestamp.valueOf("2024-02-10 10:23:00"), 4, null, null),
+        Row(Timestamp.valueOf("2024-02-10 10:24:00"), 5, 5, 5),
+      )
+       */
+    }
+  }
 }

From 6b9b3c04f9a78a903b05d81209b41836e3061523 Mon Sep 17 00:00:00 2001
From: panbingkun <panbingkun@baidu.com>
Date: Fri, 18 Oct 2024 10:46:06 +0200
Subject: [PATCH 519/521] Revert "[SPARK-49909][SQL][3.5] Fix the pretty name
 of some expressions"

### What changes were proposed in this pull request?
The pr aims to revert https://github.com/apache/spark/pull/48393.
This reverts commit 4472fb26ec4af3398389cd4f96bc9d94663895a9.

### Why are the changes needed?
Only revert.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
Pass GA

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes #48531 from panbingkun/branch-3.5_SPARK-49909_revert.

Authored-by: panbingkun <panbingkun@baidu.com>
Signed-off-by: Max Gekk <max.gekk@gmail.com>
---
 .../function_array_agg.explain                |  2 +-
 .../explain-results/function_curdate.explain  |  2 +-
 .../function_current_schema.explain           |  2 +-
 .../explain-results/function_dateadd.explain  |  2 +-
 .../function_random_with_seed.explain         |  2 +-
 .../function_to_varchar.explain               |  2 +-
 python/pyspark/sql/functions.py               | 30 +++++++++----------
 .../expressions/aggregate/collect.scala       |  5 ++--
 .../expressions/datetimeExpressions.scala     |  5 ++--
 .../spark/sql/catalyst/expressions/misc.scala |  5 ++--
 .../expressions/numberFormatExpressions.scala |  7 ++---
 .../expressions/randomExpressions.scala       |  8 ++---
 .../sql-functions/sql-expression-schema.md    | 20 ++++++-------
 .../analyzer-results/charvarchar.sql.out      |  6 ++--
 .../current_database_catalog.sql.out          |  2 +-
 .../analyzer-results/group-by.sql.out         |  4 +--
 .../sql-tests/results/charvarchar.sql.out     |  6 ++--
 .../results/current_database_catalog.sql.out  |  2 +-
 .../sql-tests/results/group-by.sql.out        |  4 +--
 .../results/subexp-elimination.sql.out        |  6 ++--
 20 files changed, 56 insertions(+), 66 deletions(-)

diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
index 6668692f6cf1d..102f736c62ef6 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
@@ -1,2 +1,2 @@
-Aggregate [array_agg(a#0, 0, 0) AS array_agg(a)#0]
+Aggregate [collect_list(a#0, 0, 0) AS collect_list(a)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
index be039d62a5494..5305b346c4f2d 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
@@ -1,2 +1,2 @@
-Project [curdate(Some(America/Los_Angeles)) AS curdate()#0]
+Project [current_date(Some(America/Los_Angeles)) AS current_date()#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
index 481c0a478c8df..93dfac524d9a1 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
@@ -1,2 +1,2 @@
-Project [current_schema() AS current_schema()#0]
+Project [current_database() AS current_database()#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
index 319428541760d..66325085b9c14 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
@@ -1,2 +1,2 @@
-Project [dateadd(d#0, 2) AS dateadd(d, 2)#0]
+Project [date_add(d#0, 2) AS date_add(d, 2)#0]
 +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
index 5854d2c7fa6be..81c81e95c2bdd 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
@@ -1,2 +1,2 @@
-Project [random(1) AS random(1)#0]
+Project [random(1) AS rand(1)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
index cc5149bfed863..f0d9cacc61ac5 100644
--- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
@@ -1,2 +1,2 @@
-Project [to_varchar(cast(b#0 as decimal(30,15)), $99.99) AS to_varchar(b, $99.99)#0]
+Project [to_char(cast(b#0 as decimal(30,15)), $99.99) AS to_char(b, $99.99)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 173f4da59f184..7e1a8faf00178 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -6710,31 +6710,31 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", 1)).show()
-    +--------------+
-    |dateadd(dt, 1)|
-    +--------------+
-    |    2015-04-09|
-    +--------------+
+    +---------------+
+    |date_add(dt, 1)|
+    +---------------+
+    |     2015-04-09|
+    +---------------+
 
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", sf.lit(2))).show()
-    +--------------+
-    |dateadd(dt, 2)|
-    +--------------+
-    |    2015-04-10|
-    +--------------+
+    +---------------+
+    |date_add(dt, 2)|
+    +---------------+
+    |     2015-04-10|
+    +---------------+
 
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [('2015-04-08', 2,)], ['dt', 'add']
     ... ).select(sf.dateadd("dt", -1)).show()
-    +---------------+
-    |dateadd(dt, -1)|
-    +---------------+
-    |     2015-04-07|
-    +---------------+
+    +----------------+
+    |date_add(dt, -1)|
+    +----------------+
+    |      2015-04-07|
+    +----------------+
     """
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("dateadd", start, days)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 770d9c281fefb..7bbc930ceab59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -21,7 +21,7 @@ import scala.collection.generic.Growable
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
@@ -118,8 +118,7 @@ case class CollectList(
 
   override def createAggregationBuffer(): mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty
 
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("collect_list")
+  override def prettyName: String = "collect_list"
 
   override def eval(buffer: mutable.ArrayBuffer[Any]): Any = {
     new GenericArrayData(buffer.toArray)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 9511df50a2652..51ddf2b85f8c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -150,8 +150,7 @@ case class CurrentDate(timeZoneId: Option[String] = None)
 
   override def eval(input: InternalRow): Any = currentDate(zoneId)
 
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_date")
+  override def prettyName: String = "current_date"
 }
 
 // scalastyle:off line.size.limit
@@ -341,7 +340,7 @@ case class DateAdd(startDate: Expression, days: Expression)
     })
   }
 
-  override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("date_add")
+  override def prettyName: String = "date_add"
 
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): DateAdd = copy(startDate = newLeft, days = newRight)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index 12bdefc832740..92ed08435216b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
@@ -164,8 +164,7 @@ object AssertTrue {
 case class CurrentDatabase() extends LeafExpression with Unevaluable {
   override def dataType: DataType = StringType
   override def nullable: Boolean = false
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_database")
+  override def prettyName: String = "current_database"
   final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE)
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index 17ada5a6f3d77..9dcca65efe5a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
@@ -257,10 +257,7 @@ case class ToCharacter(left: Expression, right: Expression)
       inputTypeCheck
     }
   }
-
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_char")
-
+  override def prettyName: String = "to_char"
   override def nullSafeEval(decimal: Any, format: Any): Any = {
     val input = decimal.asInstanceOf[Decimal]
     numberFormatter.format(input)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index 17c5362f98f46..db78415a0cc54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, TreePattern}
@@ -111,12 +111,8 @@ case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG {
   }
 
   override def flatArguments: Iterator[Any] = Iterator(child)
-
-  override def prettyName: String =
-    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("rand")
-
   override def sql: String = {
-    s"$prettyName(${if (hideSeed) "" else child.sql})"
+    s"rand(${if (hideSeed) "" else child.sql})"
   }
 
   override protected def withNewChildInternal(newChild: Expression): Rand = copy(child = newChild)
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 66e5b08d171fa..71fde8c7268cc 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -96,17 +96,17 @@
 | org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct<CSC(1):double> |
 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct<from_csv(1, 0.8):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
-| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct<curdate():date> |
+| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct<current_date():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct<current_catalog():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct<current_database():string> |
-| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct<current_schema():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct<current_database():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct<current_date():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct<current_timezone():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct<current_timestamp():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user | SELECT current_user() | struct<current_user():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct<current_user():string> |
 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
-| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<dateadd(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct<date_diff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(2016-04-08, y):string> |
@@ -131,8 +131,8 @@
 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct<E():double> |
 | org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct<EXP(0):double> |
-| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode | SELECT explode(array(10, 20)) | struct<col:int> |
-| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
+| org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct<col:int> |
+| org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct<col:int> |
 | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct<EXPM1(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct<extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct<factorial(5):bigint> |
@@ -212,7 +212,7 @@
 | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct<map_keys(map(1, a, 2, b)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct<map_values(map(1, a, 2, b)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>> |
-| org.apache.spark.sql.catalyst.expressions.MaskExpressionBuilder | mask | SELECT mask('abcd-EFGH-8765-4321') | struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string> |
+| org.apache.spark.sql.catalyst.expressions.Mask | mask | SELECT mask('abcd-EFGH-8765-4321') | struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string> |
 | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct<md5(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct<timestamp_micros(1230219000123123):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct<timestamp_millis(1230219000123):timestamp> |
@@ -255,7 +255,7 @@
 | org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct<rpad(hi, 5, ??):string> |
 | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct<raise_error(custom error message):void> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
-| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<random():double> |
+| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
 | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpCount | regexp_count | SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v&#124;ph)en') | struct<regexp_count(Steven Jones and Stephen Smith are the best players, Ste(v&#124;ph)en):int> |
@@ -325,7 +325,7 @@
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.ToCharacter | to_char | SELECT to_char(454, '999') | struct<to_char(454, 999):string> |
-| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct<to_varchar(454, 999):string> |
+| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct<to_char(454, 999):string> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
@@ -379,13 +379,13 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<any(col):boolean> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<bool_or(col):boolean> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct<some(col):boolean> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<array_agg(col):array<int>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_list(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_list(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct<collect_set(col):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct<corr(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct<count(1):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct<count_if(((col % 2) = 0)):bigint> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAggExpressionBuilder | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct<hex(count_min_sketch(col, 0.5, 0.5, 1)):string> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_pop(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct<covar_samp(c1, c2):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<first(col):int> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index ce75051630dfb..544d736b56b64 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -714,19 +714,19 @@ Project [chr(cast(167 as bigint)) AS chr(167)#x, chr(cast(247 as bigint)) AS chr
 -- !query
 SELECT to_varchar(78.12, '$99.99')
 -- !query analysis
-Project [to_varchar(78.12, $99.99) AS to_varchar(78.12, $99.99)#x]
+Project [to_char(78.12, $99.99) AS to_char(78.12, $99.99)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT to_varchar(111.11, '99.9')
 -- !query analysis
-Project [to_varchar(111.11, 99.9) AS to_varchar(111.11, 99.9)#x]
+Project [to_char(111.11, 99.9) AS to_char(111.11, 99.9)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT to_varchar(12454.8, '99,999.9S')
 -- !query analysis
-Project [to_varchar(12454.8, 99,999.9S) AS to_varchar(12454.8, 99,999.9S)#x]
+Project [to_char(12454.8, 99,999.9S) AS to_char(12454.8, 99,999.9S)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
index 2759f5e67507b..ad72e19b6bb7f 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out
@@ -2,5 +2,5 @@
 -- !query
 select current_database(), current_schema(), current_catalog()
 -- !query analysis
-Project [current_database() AS current_database()#x, current_schema() AS current_schema()#x, current_catalog() AS current_catalog()#x]
+Project [current_database() AS current_database()#x, current_database() AS current_database()#x, current_catalog() AS current_catalog()#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index d7c7aad31a880..93c463575dc1a 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -1133,7 +1133,7 @@ SELECT
 FROM VALUES
   (1), (2), (1) AS tab(col)
 -- !query analysis
-Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, array_agg(col#x, 0, 0) AS array_agg(col)#x]
+Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, collect_list(col#x, 0, 0) AS collect_list(col)#x]
 +- SubqueryAlias tab
    +- LocalRelation [col#x]
 
@@ -1147,7 +1147,7 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a
 -- !query analysis
-Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, array_agg(b#x, 0, 0) AS array_agg(b)#x]
+Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, collect_list(b#x, 0, 0) AS collect_list(b)#x]
 +- SubqueryAlias v
    +- LocalRelation [a#x, b#x]
 
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index a9604fc375acc..dd8bdc698ea7f 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -1222,7 +1222,7 @@ struct<chr(167):string,chr(247):string,chr(215):string>
 -- !query
 SELECT to_varchar(78.12, '$99.99')
 -- !query schema
-struct<to_varchar(78.12, $99.99):string>
+struct<to_char(78.12, $99.99):string>
 -- !query output
 $78.12
 
@@ -1230,7 +1230,7 @@ $78.12
 -- !query
 SELECT to_varchar(111.11, '99.9')
 -- !query schema
-struct<to_varchar(111.11, 99.9):string>
+struct<to_char(111.11, 99.9):string>
 -- !query output
 ##.#
 
@@ -1238,6 +1238,6 @@ struct<to_varchar(111.11, 99.9):string>
 -- !query
 SELECT to_varchar(12454.8, '99,999.9S')
 -- !query schema
-struct<to_varchar(12454.8, 99,999.9S):string>
+struct<to_char(12454.8, 99,999.9S):string>
 -- !query output
 12,454.8+
diff --git a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
index 7fbe2dfff4db1..379bf01e64571 100644
--- a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
@@ -2,6 +2,6 @@
 -- !query
 select current_database(), current_schema(), current_catalog()
 -- !query schema
-struct<current_database():string,current_schema():string,current_catalog():string>
+struct<current_database():string,current_database():string,current_catalog():string>
 -- !query output
 default	default	spark_catalog
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 44fbfd7ad4952..548917ef79b2d 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1066,7 +1066,7 @@ SELECT
 FROM VALUES
   (1), (2), (1) AS tab(col)
 -- !query schema
-struct<collect_list(col):array<int>,array_agg(col):array<int>>
+struct<collect_list(col):array<int>,collect_list(col):array<int>>
 -- !query output
 [1,2,1]	[1,2,1]
 
@@ -1080,7 +1080,7 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a
 -- !query schema
-struct<a:int,collect_list(b):array<int>,array_agg(b):array<int>>
+struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>>
 -- !query output
 1	[4,4]	[4,4]
 2	[3,4]	[3,4]
diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
index 28457c0579e95..0f7ff3f107567 100644
--- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
@@ -72,7 +72,7 @@ NULL
 -- !query
 SELECT from_json(a, 'struct<a:int,b:string>').a + random() > 2, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b + + random() > 2 FROM testData
 -- !query schema
-struct<((from_json(a).a + random()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ random())) > 2):boolean>
+struct<((from_json(a).a + rand()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ rand())) > 2):boolean>
 -- !query output
 NULL	NULL	1	true
 false	2	1	true
@@ -84,7 +84,7 @@ true	6	6	true
 -- !query
 SELECT if(from_json(a, 'struct<a:int,b:string>').a + random() > 5, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
 -- !query schema
-struct<(IF(((from_json(a).a + random()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
+struct<(IF(((from_json(a).a + rand()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int>
 -- !query output
 2
 2
@@ -96,7 +96,7 @@ NULL
 -- !query
 SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b + random() > 5 when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 + random() > 2 else from_json(a, 'struct<a:int,b:string>').b + 2 + random() > 5 end FROM testData
 -- !query schema
-struct<CASE WHEN (from_json(a).a > 5) THEN ((from_json(a).b + random()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + random()) > 2) ELSE (((from_json(a).b + 2) + random()) > 5) END:boolean>
+struct<CASE WHEN (from_json(a).a > 5) THEN ((from_json(a).b + rand()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + rand()) > 2) ELSE (((from_json(a).b + 2) + rand()) > 5) END:boolean>
 -- !query output
 NULL
 false

From 3a4ebae450cddf9b310eda0e9b8b12a39bf4729b Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 18 Oct 2024 09:49:24 -0700
Subject: [PATCH 520/521] [SPARK-50021][CORE][UI][3.5] Fix `ApplicationPage` to
 hide App UI links when UI is disabled

### What changes were proposed in this pull request?

This PR aims to fix `ApplicationPage` to hide UI link when UI is disabled.

### Why are the changes needed?

Previously, Spark throws `HTTP ERROR 500 java.lang.IllegalArgumentException: Invalid URI host: null (authority: null)` like the following

**1. PREPARATION**
```
$ cat conf/spark-defaults.conf
spark.ui.reverseProxy true
spark.ui.reverseProxyUrl http://localhost:8080

$ sbin/start-master.sh

$ sbin/start-worker.sh spark://$(hostname):7077

$ bin/spark-shell --master spark://$(hostname):7077 -c spark.ui.enabled=false
```

**2. BEFORE**
<img width="496" alt="Screenshot 2024-10-17 at 21 24 32" src="https://github.com/user-attachments/assets/9884790c-a294-4e61-b630-7758c5532afc">

<img width="1002" alt="Screenshot 2024-10-17 at 21 24 51" src="https://github.com/user-attachments/assets/f1e3a121-37ba-4525-a433-21ad15402edf">

**3. AFTER**
<img width="493" alt="Screenshot 2024-10-17 at 21 22 26" src="https://github.com/user-attachments/assets/7a1ef578-3d9f-495e-9545-6edd26b4d565">

### Does this PR introduce _any_ user-facing change?

Yes, but previously it was a broken link.

### How was this patch tested?

Pass the CIs with the newly added test case.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48547 from dongjoon-hyun/SPARK-50021-3.5.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../deploy/master/ui/ApplicationPage.scala    | 14 +++-
 .../master/ui/ApplicationPageSuite.scala      | 73 +++++++++++++++++++
 2 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 9e10a0bbf3964..31d541368ab45 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -21,6 +21,8 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.ExecutorState
 import org.apache.spark.deploy.StandaloneResourceUtils.{formatResourceRequirements, formatResourcesAddresses}
@@ -93,10 +95,14 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
             <li><strong>State:</strong> {app.state}</li>
             {
               if (!app.isFinished) {
-                <li><strong>
-                    <a href={UIUtils.makeHref(parent.master.reverseProxy,
-                      app.id, app.desc.appUiUrl)}>Application Detail UI</a>
-                </strong></li>
+                if (StringUtils.isBlank(app.desc.appUiUrl)) {
+                  <li><strong>Application UI:</strong> Disabled</li>
+                } else {
+                  <li><strong>
+                      <a href={UIUtils.makeHref(parent.master.reverseProxy,
+                        app.id, app.desc.appUiUrl)}>Application Detail UI</a>
+                  </strong></li>
+                }
               }
             }
           </ul>
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala
new file mode 100644
index 0000000000000..e1edef8f4155c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master.ui
+
+import java.util.Date
+import javax.servlet.http.HttpServletRequest
+
+import org.mockito.Mockito.{mock, when}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.ApplicationDescription
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.master.{ApplicationInfo, ApplicationState, Master}
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.rpc.RpcEndpointRef
+
+class ApplicationPageSuite extends SparkFunSuite {
+
+  private val master = mock(classOf[Master])
+
+  private val rp = new ResourceProfile(Map.empty, Map.empty)
+  private val desc = ApplicationDescription("name", Some(4), null, "appUiUrl", rp)
+  private val descWithoutUI = ApplicationDescription("name", Some(4), null, "", rp)
+  private val appFinished = new ApplicationInfo(0, "app-finished", desc, new Date, null, 1)
+  appFinished.markFinished(ApplicationState.FINISHED)
+  private val appLive = new ApplicationInfo(0, "app-live", desc, new Date, null, 1)
+  private val appLiveWithoutUI =
+    new ApplicationInfo(0, "app-live-without-ui", descWithoutUI, new Date, null, 1)
+
+  private val state = mock(classOf[MasterStateResponse])
+  when(state.completedApps).thenReturn(Array(appFinished))
+  when(state.activeApps).thenReturn(Array(appLive, appLiveWithoutUI))
+
+  private val rpc = mock(classOf[RpcEndpointRef])
+  when(rpc.askSync[MasterStateResponse](RequestMasterState)).thenReturn(state)
+
+  private val masterWebUI = mock(classOf[MasterWebUI])
+  when(masterWebUI.master).thenReturn(master)
+  when(masterWebUI.masterEndpointRef).thenReturn(rpc)
+
+  test("SPARK-45774: Application Detail UI") {
+    val request = mock(classOf[HttpServletRequest])
+    when(request.getParameter("appId")).thenReturn("app-live")
+
+    val result = new ApplicationPage(masterWebUI).render(request).toString()
+    assert(result.contains("Application Detail UI"))
+    assert(!result.contains("Application History UI"))
+  }
+
+  test("SPARK-50021: Application Detail UI is empty when spark.ui.enabled=false") {
+    val request = mock(classOf[HttpServletRequest])
+    when(request.getParameter("appId")).thenReturn("app-live-without-ui")
+
+    val result = new ApplicationPage(masterWebUI).render(request).toString()
+    assert(result.contains("Application UI:</strong> Disabled"))
+    assert(!result.contains("Application History UI"))
+  }
+}

From d24393b0b5edd8b2f1a224042c03c36eb7560cff Mon Sep 17 00:00:00 2001
From: Dongjoon Hyun <dongjoon@apache.org>
Date: Fri, 18 Oct 2024 14:23:26 -0700
Subject: [PATCH 521/521] [SPARK-50022][CORE][UI][3.5] Fix `MasterPage` to hide
 App UI links when UI is disabled

### What changes were proposed in this pull request?

This PR aims to fix `MasterPage` to hide App UI links when UI is disabled.

Previously, the link leads the following errors if a user clicks it.
<img width="997" alt="Screenshot 2024-10-17 at 22 06 22" src="https://github.com/user-attachments/assets/e53ba01f-533f-4d42-a488-830afaf40efa">

### Why are the changes needed?

**1. PREPARATION**

```
$ cat conf/spark-defaults.conf
spark.ui.reverseProxy true
spark.ui.reverseProxyUrl http://localhost:8080

$ sbin/start-master.sh

$ sbin/start-worker.sh spark://$(hostname):7077

$ bin/spark-shell --master spark://$(hostname):7077 -c spark.ui.enabled=false
```

**2. BEFORE**

<img width="340" alt="Screenshot 2024-10-17 at 22 01 16" src="https://github.com/user-attachments/assets/3069e43d-ba8c-4d36-8101-65e10b420879">

**3. AFTER**

<img width="345" alt="Screenshot 2024-10-17 at 22 04 12" src="https://github.com/user-attachments/assets/b9feba47-90fb-4557-803c-94eaa8ce62e1">

### Does this PR introduce _any_ user-facing change?

The previous behavior shows HTTP 500 error.

### How was this patch tested?

Pass the CIs with a newly added test case.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #48549 from dongjoon-hyun/SPARK-50022-3.5.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
---
 .../spark/deploy/master/ui/MasterPage.scala   |  3 +-
 .../deploy/master/ui/MasterWebUISuite.scala   |  5 +-
 .../master/ui/ReadOnlyMasterWebUISuite.scala  | 82 +++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)
 create mode 100644 core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala

diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index d8753a0ffcb59..078ed102f0bbd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -21,6 +21,7 @@ import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
+import org.apache.commons.lang3.StringUtils
 import org.json4s.JValue
 
 import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, MasterStateResponse, RequestKillDriver, RequestMasterState}
@@ -289,7 +290,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       </td>
       <td>
         {
-          if (app.isFinished) {
+          if (app.isFinished || StringUtils.isBlank(app.desc.appUiUrl)) {
             app.desc.name
           } else {
             <a href={UIUtils.makeHref(parent.master.reverseProxy,
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index 40265a12af93b..bda3309ad8208 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -35,6 +35,7 @@ import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
 import org.apache.spark.util.Utils
 
 class MasterWebUISuite extends SparkFunSuite {
+  import MasterWebUISuite._
 
   val conf = new SparkConf().set(DECOMMISSION_ENABLED, true)
   val securityMgr = new SecurityManager(conf)
@@ -113,12 +114,14 @@ class MasterWebUISuite extends SparkFunSuite {
   private def convPostDataToString(data: Map[String, String]): String = {
     convPostDataToString(data.toSeq)
   }
+}
 
+object MasterWebUISuite {
   /**
    * Send an HTTP request to the given URL using the method and the body specified.
    * Return the connection object.
    */
-  private def sendHttpRequest(
+  private[ui] def sendHttpRequest(
       url: String,
       method: String,
       body: String = ""): HttpURLConnection = {
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
new file mode 100644
index 0000000000000..9fd5431418aa2
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master.ui
+
+import java.util.Date
+import javax.servlet.http.HttpServletResponse.SC_OK
+
+import scala.io.Source
+
+import org.mockito.Mockito.{mock, when}
+
+import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.master._
+import org.apache.spark.deploy.master.ui.MasterWebUISuite._
+import org.apache.spark.internal.config.DECOMMISSION_ENABLED
+import org.apache.spark.internal.config.UI.UI_KILL_ENABLED
+import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
+import org.apache.spark.util.Utils
+
+class ReadOnlyMasterWebUISuite extends SparkFunSuite {
+
+  import org.apache.spark.deploy.DeployTestUtils._
+
+  val conf = new SparkConf()
+    .set(UI_KILL_ENABLED, false)
+    .set(DECOMMISSION_ENABLED, false)
+  val securityMgr = new SecurityManager(conf)
+  val rpcEnv = mock(classOf[RpcEnv])
+  val master = mock(classOf[Master])
+  val masterEndpointRef = mock(classOf[RpcEndpointRef])
+  when(master.securityMgr).thenReturn(securityMgr)
+  when(master.conf).thenReturn(conf)
+  when(master.rpcEnv).thenReturn(rpcEnv)
+  when(master.self).thenReturn(masterEndpointRef)
+  val desc1 = createAppDesc().copy(name = "WithUI")
+  val desc2 = desc1.copy(name = "WithoutUI", appUiUrl = "")
+  val app1 = new ApplicationInfo(new Date().getTime, "app1", desc1, new Date(), null, Int.MaxValue)
+  val app2 = new ApplicationInfo(new Date().getTime, "app2", desc2, new Date(), null, Int.MaxValue)
+  val state = new MasterStateResponse(
+    "host", 8080, None, Array.empty, Array(app1, app2), Array.empty,
+    Array.empty, Array.empty, RecoveryState.ALIVE)
+  when(masterEndpointRef.askSync[MasterStateResponse](RequestMasterState)).thenReturn(state)
+  val masterWebUI = new MasterWebUI(master, 0)
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    masterWebUI.bind()
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      masterWebUI.stop()
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  test("SPARK-50022: Fix 'MasterPage' to hide App UI links when UI is disabled") {
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/"
+    val conn = sendHttpRequest(url, "GET")
+    assert(conn.getResponseCode === SC_OK)
+    val result = Source.fromInputStream(conn.getInputStream).mkString
+    assert(result.contains("<a href=\"appUiUrl\">WithUI</a>"))
+    assert(result.contains("  WithoutUI\n"))
+  }
+}

Property Name	Default	Meaning	Since Version
`spark.master.ui.port`	`8080`	+ Specifies the port number of the Master Web UI endpoint. +	1.1.0
`spark.master.ui.decommission.allow.mode`	`LOCAL`	+ Specifies the behavior of the Master Web UI's /workers/kill endpoint. Possible choices + are: `LOCAL` means allow this endpoint from IP's that are local to the machine running + the Master, `DENY` means to completely disable this endpoint, `ALLOW` means to allow + calling this endpoint from any IP. +	3.1.0
`spark.master.rest.enabled`	`false`	+ Whether to use the Master REST API endpoint or not. +	1.3.0
`spark.master.rest.port`	`6066`	+ Specifies the port number of the Master REST API endpoint. +	1.3.0
`spark.deploy.retainedApplications`	200
Command	Description	HTTP METHOD	Since Version
`create`	Create a Spark driver via `cluster` mode.	POST	1.3.0
`kill`	Kill a single Spark driver.	POST	1.3.0
`status`	Check the status of a Spark job.	GET	1.3.0