From 6b9b3c04f9a78a903b05d81209b41836e3061523 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Fri, 18 Oct 2024 10:46:06 +0200 Subject: [PATCH] Revert "[SPARK-49909][SQL][3.5] Fix the pretty name of some expressions" ### What changes were proposed in this pull request? The pr aims to revert https://github.com/apache/spark/pull/48393. This reverts commit 4472fb26ec4af3398389cd4f96bc9d94663895a9. ### Why are the changes needed? Only revert. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48531 from panbingkun/branch-3.5_SPARK-49909_revert. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../function_array_agg.explain | 2 +- .../explain-results/function_curdate.explain | 2 +- .../function_current_schema.explain | 2 +- .../explain-results/function_dateadd.explain | 2 +- .../function_random_with_seed.explain | 2 +- .../function_to_varchar.explain | 2 +- python/pyspark/sql/functions.py | 30 +++++++++---------- .../expressions/aggregate/collect.scala | 5 ++-- .../expressions/datetimeExpressions.scala | 5 ++-- .../spark/sql/catalyst/expressions/misc.scala | 5 ++-- .../expressions/numberFormatExpressions.scala | 7 ++--- .../expressions/randomExpressions.scala | 8 ++--- .../sql-functions/sql-expression-schema.md | 20 ++++++------- .../analyzer-results/charvarchar.sql.out | 6 ++-- .../current_database_catalog.sql.out | 2 +- .../analyzer-results/group-by.sql.out | 4 +-- .../sql-tests/results/charvarchar.sql.out | 6 ++-- .../results/current_database_catalog.sql.out | 2 +- .../sql-tests/results/group-by.sql.out | 4 +-- .../results/subexp-elimination.sql.out | 6 ++-- 20 files changed, 56 insertions(+), 66 deletions(-) diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain index 6668692f6cf1d..102f736c62ef6 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain @@ -1,2 +1,2 @@ -Aggregate [array_agg(a#0, 0, 0) AS array_agg(a)#0] +Aggregate [collect_list(a#0, 0, 0) AS collect_list(a)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain index be039d62a5494..5305b346c4f2d 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain @@ -1,2 +1,2 @@ -Project [curdate(Some(America/Los_Angeles)) AS curdate()#0] +Project [current_date(Some(America/Los_Angeles)) AS current_date()#0] +- LocalRelation , [d#0, t#0, s#0, x#0L, wt#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain index 481c0a478c8df..93dfac524d9a1 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain @@ -1,2 +1,2 @@ -Project [current_schema() AS current_schema()#0] +Project [current_database() AS current_database()#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain index 319428541760d..66325085b9c14 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain @@ -1,2 +1,2 @@ -Project [dateadd(d#0, 2) AS dateadd(d, 2)#0] +Project [date_add(d#0, 2) AS date_add(d, 2)#0] +- LocalRelation , [d#0, t#0, s#0, x#0L, wt#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain index 5854d2c7fa6be..81c81e95c2bdd 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain @@ -1,2 +1,2 @@ -Project [random(1) AS random(1)#0] +Project [random(1) AS rand(1)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain index cc5149bfed863..f0d9cacc61ac5 100644 --- a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain @@ -1,2 +1,2 @@ -Project [to_varchar(cast(b#0 as decimal(30,15)), $99.99) AS to_varchar(b, $99.99)#0] +Project [to_char(cast(b#0 as decimal(30,15)), $99.99) AS to_char(b, $99.99)#0] +- LocalRelation , [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 173f4da59f184..7e1a8faf00178 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -6710,31 +6710,31 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", 1)).show() - +--------------+ - |dateadd(dt, 1)| - +--------------+ - | 2015-04-09| - +--------------+ + +---------------+ + |date_add(dt, 1)| + +---------------+ + | 2015-04-09| + +---------------+ >>> import pyspark.sql.functions as sf >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", sf.lit(2))).show() - +--------------+ - |dateadd(dt, 2)| - +--------------+ - | 2015-04-10| - +--------------+ + +---------------+ + |date_add(dt, 2)| + +---------------+ + | 2015-04-10| + +---------------+ >>> import pyspark.sql.functions as sf >>> spark.createDataFrame( ... [('2015-04-08', 2,)], ['dt', 'add'] ... ).select(sf.dateadd("dt", -1)).show() - +---------------+ - |dateadd(dt, -1)| - +---------------+ - | 2015-04-07| - +---------------+ + +----------------+ + |date_add(dt, -1)| + +----------------+ + | 2015-04-07| + +----------------+ """ days = lit(days) if isinstance(days, int) else days return _invoke_function_over_columns("dateadd", start, days) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala index 770d9c281fefb..7bbc930ceab59 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala @@ -21,7 +21,7 @@ import scala.collection.generic.Growable import scala.collection.mutable import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.trees.UnaryLike @@ -118,8 +118,7 @@ case class CollectList( override def createAggregationBuffer(): mutable.ArrayBuffer[Any] = mutable.ArrayBuffer.empty - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("collect_list") + override def prettyName: String = "collect_list" override def eval(buffer: mutable.ArrayBuffer[Any]): Any = { new GenericArrayData(buffer.toArray) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 9511df50a2652..51ddf2b85f8c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -150,8 +150,7 @@ case class CurrentDate(timeZoneId: Option[String] = None) override def eval(input: InternalRow): Any = currentDate(zoneId) - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_date") + override def prettyName: String = "current_date" } // scalastyle:off line.size.limit @@ -341,7 +340,7 @@ case class DateAdd(startDate: Expression, days: Expression) }) } - override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("date_add") + override def prettyName: String = "date_add" override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): DateAdd = copy(startDate = newLeft, days = newRight) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index 12bdefc832740..92ed08435216b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed} +import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke @@ -164,8 +164,7 @@ object AssertTrue { case class CurrentDatabase() extends LeafExpression with Unevaluable { override def dataType: DataType = StringType override def nullable: Boolean = false - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_database") + override def prettyName: String = "current_database" final override val nodePatterns: Seq[TreePattern] = Seq(CURRENT_LIKE) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala index 17ada5a6f3d77..9dcca65efe5a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import java.util.Locale -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch import org.apache.spark.sql.catalyst.expressions.Cast._ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode} @@ -257,10 +257,7 @@ case class ToCharacter(left: Expression, right: Expression) inputTypeCheck } } - - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_char") - + override def prettyName: String = "to_char" override def nullSafeEval(decimal: Any, format: Any): Any = { val input = decimal.asInstanceOf[Decimal] numberFormatter.format(input) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala index 17c5362f98f46..db78415a0cc54 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.InternalRow -import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedSeed} +import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral} import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, TreePattern} @@ -111,12 +111,8 @@ case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG { } override def flatArguments: Iterator[Any] = Iterator(child) - - override def prettyName: String = - getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("rand") - override def sql: String = { - s"$prettyName(${if (hideSeed) "" else child.sql})" + s"rand(${if (hideSeed) "" else child.sql})" } override protected def withNewChildInternal(newChild: Expression): Rand = copy(child = newChild) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 66e5b08d171fa..71fde8c7268cc 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -96,17 +96,17 @@ | org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct | | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct> | | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | -| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct | +| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct | -| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct | +| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user | SELECT current_user() | struct | | org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct | | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct | -| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct | +| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct | | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct | @@ -131,8 +131,8 @@ | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> | | org.apache.spark.sql.catalyst.expressions.EulerNumber | e | SELECT e() | struct | | org.apache.spark.sql.catalyst.expressions.Exp | exp | SELECT exp(0) | struct | -| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode | SELECT explode(array(10, 20)) | struct | -| org.apache.spark.sql.catalyst.expressions.ExplodeExpressionBuilder | explode_outer | SELECT explode_outer(array(10, 20)) | struct | +| org.apache.spark.sql.catalyst.expressions.Explode | explode | SELECT explode(array(10, 20)) | struct | +| org.apache.spark.sql.catalyst.expressions.Explode | explode_outer | SELECT explode_outer(array(10, 20)) | struct | | org.apache.spark.sql.catalyst.expressions.Expm1 | expm1 | SELECT expm1(0) | struct | | org.apache.spark.sql.catalyst.expressions.Extract | extract | SELECT extract(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456') | struct | | org.apache.spark.sql.catalyst.expressions.Factorial | factorial | SELECT factorial(5) | struct | @@ -212,7 +212,7 @@ | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct> | | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct> | | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct> | -| org.apache.spark.sql.catalyst.expressions.MaskExpressionBuilder | mask | SELECT mask('abcd-EFGH-8765-4321') | struct | +| org.apache.spark.sql.catalyst.expressions.Mask | mask | SELECT mask('abcd-EFGH-8765-4321') | struct | | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct | | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct | | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct | @@ -255,7 +255,7 @@ | org.apache.spark.sql.catalyst.expressions.RPadExpressionBuilder | rpad | SELECT rpad('hi', 5, '??') | struct | | org.apache.spark.sql.catalyst.expressions.RaiseError | raise_error | SELECT raise_error('custom error message') | struct | | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct | -| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | +| org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct | | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct | | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.RegExpCount | regexp_count | SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en') | struct | @@ -325,7 +325,7 @@ | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct | | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct | | org.apache.spark.sql.catalyst.expressions.ToCharacter | to_char | SELECT to_char(454, '999') | struct | -| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct | +| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_varchar | SELECT to_varchar(454, '999') | struct | | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct | | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct | | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct | @@ -379,13 +379,13 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | any | SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | bool_or | SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.BoolOr | some | SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | +| org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | array_agg | SELECT array_agg(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.CollectList | collect_list | SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet | collect_set | SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col) | struct> | | org.apache.spark.sql.catalyst.expressions.aggregate.Corr | corr | SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.Count | count | SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CountIf | count_if | SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col) | struct | -| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAggExpressionBuilder | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct | +| org.apache.spark.sql.catalyst.expressions.aggregate.CountMinSketchAgg | count_min_sketch | SELECT hex(count_min_sketch(col, 0.5d, 0.5d, 1)) FROM VALUES (1), (2), (1) AS tab(col) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CovPopulation | covar_pop | SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.CovSample | covar_samp | SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2) | struct | | org.apache.spark.sql.catalyst.expressions.aggregate.First | first | SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col) | struct | diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out index ce75051630dfb..544d736b56b64 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out @@ -714,19 +714,19 @@ Project [chr(cast(167 as bigint)) AS chr(167)#x, chr(cast(247 as bigint)) AS chr -- !query SELECT to_varchar(78.12, '$99.99') -- !query analysis -Project [to_varchar(78.12, $99.99) AS to_varchar(78.12, $99.99)#x] +Project [to_char(78.12, $99.99) AS to_char(78.12, $99.99)#x] +- OneRowRelation -- !query SELECT to_varchar(111.11, '99.9') -- !query analysis -Project [to_varchar(111.11, 99.9) AS to_varchar(111.11, 99.9)#x] +Project [to_char(111.11, 99.9) AS to_char(111.11, 99.9)#x] +- OneRowRelation -- !query SELECT to_varchar(12454.8, '99,999.9S') -- !query analysis -Project [to_varchar(12454.8, 99,999.9S) AS to_varchar(12454.8, 99,999.9S)#x] +Project [to_char(12454.8, 99,999.9S) AS to_char(12454.8, 99,999.9S)#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out index 2759f5e67507b..ad72e19b6bb7f 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/current_database_catalog.sql.out @@ -2,5 +2,5 @@ -- !query select current_database(), current_schema(), current_catalog() -- !query analysis -Project [current_database() AS current_database()#x, current_schema() AS current_schema()#x, current_catalog() AS current_catalog()#x] +Project [current_database() AS current_database()#x, current_database() AS current_database()#x, current_catalog() AS current_catalog()#x] +- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out index d7c7aad31a880..93c463575dc1a 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out @@ -1133,7 +1133,7 @@ SELECT FROM VALUES (1), (2), (1) AS tab(col) -- !query analysis -Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, array_agg(col#x, 0, 0) AS array_agg(col)#x] +Aggregate [collect_list(col#x, 0, 0) AS collect_list(col)#x, collect_list(col#x, 0, 0) AS collect_list(col)#x] +- SubqueryAlias tab +- LocalRelation [col#x] @@ -1147,7 +1147,7 @@ FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a -- !query analysis -Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, array_agg(b#x, 0, 0) AS array_agg(b)#x] +Aggregate [a#x], [a#x, collect_list(b#x, 0, 0) AS collect_list(b)#x, collect_list(b#x, 0, 0) AS collect_list(b)#x] +- SubqueryAlias v +- LocalRelation [a#x, b#x] diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out index a9604fc375acc..dd8bdc698ea7f 100644 --- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out @@ -1222,7 +1222,7 @@ struct -- !query SELECT to_varchar(78.12, '$99.99') -- !query schema -struct +struct -- !query output $78.12 @@ -1230,7 +1230,7 @@ $78.12 -- !query SELECT to_varchar(111.11, '99.9') -- !query schema -struct +struct -- !query output ##.# @@ -1238,6 +1238,6 @@ struct -- !query SELECT to_varchar(12454.8, '99,999.9S') -- !query schema -struct +struct -- !query output 12,454.8+ diff --git a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out index 7fbe2dfff4db1..379bf01e64571 100644 --- a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out @@ -2,6 +2,6 @@ -- !query select current_database(), current_schema(), current_catalog() -- !query schema -struct +struct -- !query output default default spark_catalog diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out index 44fbfd7ad4952..548917ef79b2d 100644 --- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out @@ -1066,7 +1066,7 @@ SELECT FROM VALUES (1), (2), (1) AS tab(col) -- !query schema -struct,array_agg(col):array> +struct,collect_list(col):array> -- !query output [1,2,1] [1,2,1] @@ -1080,7 +1080,7 @@ FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a -- !query schema -struct,array_agg(b):array> +struct,collect_list(b):array> -- !query output 1 [4,4] [4,4] 2 [3,4] [3,4] diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out index 28457c0579e95..0f7ff3f107567 100644 --- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out @@ -72,7 +72,7 @@ NULL -- !query SELECT from_json(a, 'struct').a + random() > 2, from_json(a, 'struct').b, from_json(b, 'array>')[0].a, from_json(b, 'array>')[0].b + + random() > 2 FROM testData -- !query schema -struct<((from_json(a).a + random()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ random())) > 2):boolean> +struct<((from_json(a).a + rand()) > 2):boolean,from_json(a).b:string,from_json(b)[0].a:int,((from_json(b)[0].b + (+ rand())) > 2):boolean> -- !query output NULL NULL 1 true false 2 1 true @@ -84,7 +84,7 @@ true 6 6 true -- !query SELECT if(from_json(a, 'struct').a + random() > 5, from_json(b, 'array>')[0].a, from_json(b, 'array>')[0].a + 1) FROM testData -- !query schema -struct<(IF(((from_json(a).a + random()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> +struct<(IF(((from_json(a).a + rand()) > 5), from_json(b)[0].a, (from_json(b)[0].a + 1))):int> -- !query output 2 2 @@ -96,7 +96,7 @@ NULL -- !query SELECT case when from_json(a, 'struct').a > 5 then from_json(a, 'struct').b + random() > 5 when from_json(a, 'struct').a > 4 then from_json(a, 'struct').b + 1 + random() > 2 else from_json(a, 'struct').b + 2 + random() > 5 end FROM testData -- !query schema -struct 5) THEN ((from_json(a).b + random()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + random()) > 2) ELSE (((from_json(a).b + 2) + random()) > 5) END:boolean> +struct 5) THEN ((from_json(a).b + rand()) > 5) WHEN (from_json(a).a > 4) THEN (((from_json(a).b + 1) + rand()) > 2) ELSE (((from_json(a).b + 2) + rand()) > 5) END:boolean> -- !query output NULL false