svn commit: r54854 - /dev/spark/v3.3.0-rc5-bin/
Author: maxgekk Date: Sat Jun 4 09:15:39 2022 New Revision: 54854 Log: Apache Spark v3.3.0-rc5 Added: dev/spark/v3.3.0-rc5-bin/ dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc Sat Jun 4 09:15:39 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKbIlYTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02YyD/wPYvJ8VvweSAAYMFrbIU66mZdfqcS3 +IXY1KjmwzirOnWPN9ovrcwLjHKFkWtsOLVDum4x4ffQoZUBKn+4xz/FGQx5j70Uw +pJ+GXXWQSBrhIs/CApCDz98Fx4KX+u6d/0qr5fpftOPJIQn/D9nwPOlF3NBaIhOv +jYqvZKEXqVgkZ/TSVnFHP4BRUYW7norV3F6s229KVvdvHE1wlUt3TnMk6ouSEmgM +a4AZXxryhD0BtkB+9+8WmVWjYQjX8NlPp4wB7fi2p4RfSLKDiDWP+ompSqwh65GD +k1hc32cgEQIAeVrh5O1ssT65PPOjNacVKzjRojujo1esKFyzaBdl6Ew04aCtsliF +mEO9XI1Jh+NijDnhxcRA8gck+hI/AqDf9aXUcSWyQm/BclygtHs/UXPYWPhgi1jm +4pGPBtsYOESCU0PewomFbwU36nMX/roLPJRGxk3m1ItxxU9FfPEYpRCaBa4KQxzB +g4gtAlYs3CokukoskZMl6nF22CnCaB/1PzaFExp7Tys9UgX6Pv5Vf0gwc94wRvKK +9RWtbAsvL6Cqd5pwu8cmauS3++BIGq6r09bzLvL5hG2fQwZ4jrIZqyFBSoxghSTb +uulNdjthA+c82How6/ACzTrTrtTh8LammrIuX95E545fMdDGBO2DYfjrK1g2BprC +fKF4UyooOJUriA== +=YOtt +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 Sat Jun 4 09:15:39 2022 @@ -0,0 +1 @@ +1b9fb801c955e1038122804678defdd2eaba0f26dd501e09a2f61e13c77b9292bf14ca9c25a8561ce23ff4ee50ebad6d047a34696394f520f059f8e32dc91a9a SparkR_3.3.0.tar.gz Added: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc Sat Jun 4 09:15:39 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKbIlkTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q01pkEACwe03A1jrjWnAN6evlwk0xxMugbZI+ +2xNUuHOAPNc6Z1rsYuZnh8WCHKVo/Ik0JEdpDAPQDGqC1Pwn4l4LFf9c6BiTTCRS +14VsiJrERpzzBNT8lqVIT09Z2esLFjTiw7S/tXFwkSNPT6o+IZb3KxuTm6XREc1Q +QmsbC/EfOmqxSlTdBf3Dq7T2RSSNyFHOLwdgtPUWNxSXhKGzQd6WYceUx2aCGkrv +u/TGoPhQL+F15EmhrK5Pfrycvo4UbJrsWzBswUeQFbJ3klyQlPvOfdm/VZhWzG/a +XGggZmTFiPEdFRJ9FRnArK9lng/8uUME/2Am9WTU28dkFRiaND/CARJ9NvYKyYIR +TBOudzm+advHgOjiHS1FWLXG9sHdGvgjwFe/g3byzPqiCl2LmPencXCgH0lmRd/x +H7HFp4nRQtWIVByedwSeFGJS4zZh42fWg4h7K6iP8dP4ZoepcuPGZw6qIi0P+tFh +ATTimLDx28LhsiaRE7QP2xvYXI0yCIjeDLPGgbM9rpUapqwUMTcuDUtnFSKzV7QW +Ly+jJpyBL6lSAy7N7e4mpCm8yEep/sdPCL/H7XF9cHCEV5Afnh/vqG63jXKxZYgz +vRTW5oDMCn/mpxt8NxQXtiu7iXNJvAIPPJZWclCSoTBfkueQhyRCypYXF5//O6l8 +YZ4yF8LA+z0gDA== +=2iWV +-END PGP SIGNATURE- Added: dev/spark
[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit bf3c472ff87ab7ec17f55e4730d6c6c9a7f299ad Author: Maxim Gekk AuthorDate: Sat Jun 4 06:43:12 2022 + Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87..0e449e841cf 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b..d12f2ad73fa 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e..842d63f5d38 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d75..f7d187bf952 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e7747..53f38df8851 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e..845f6659407 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370a..8e159089193 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3..1987c133285 100644 --- a
[spark] branch branch-3.3 updated (b7e95bad882 -> bf3c472ff87)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from b7e95bad882 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries add 7cf29705272 Preparing Spark release v3.3.0-rc5 new bf3c472ff87 Preparing development version 3.3.1-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v3.3.0-rc5 created (now 7cf29705272)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to tag v3.3.0-rc5 in repository https://gitbox.apache.org/repos/asf/spark.git at 7cf29705272 (commit) This tag includes the following new commits: new 7cf29705272 Preparing Spark release v3.3.0-rc5 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v3.3.0-rc5
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to tag v3.3.0-rc5 in repository https://gitbox.apache.org/repos/asf/spark.git commit 7cf29705272ab8e8c70e8885a3664ad8ae3cd5e9 Author: Maxim Gekk AuthorDate: Sat Jun 4 06:43:05 2022 + Preparing Spark release v3.3.0-rc5 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf..9479bb3bf87 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fa..2e9c4d9960b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d38..2a9acfa335e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf952..7b17e625d75 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df8851..c5c920e7747 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407..697b5a3928e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e159089193..ad2db11370a 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c133285..1a7bdee70f3 100644 --- a/common/tags
[spark] branch branch-3.3 updated: [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new b7e95bad882 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries b7e95bad882 is described below commit b7e95bad882482168b7dd301fcfa3daf80477a7a Author: Josh Rosen AuthorDate: Sat Jun 4 09:12:42 2022 +0300 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries ### What changes were proposed in this pull request? This is a followup to #36654. That PR modified the existing `QueryPlan.transformDownWithSubqueries` to add additional arguments for tree pattern pruning. In this PR, I roll back the change to that method's signature and instead add a new `transformDownWithSubqueriesAndPruning` method. ### Why are the changes needed? The original change breaks binary and source compatibility in Catalyst. Technically speaking, Catalyst APIs are considered internal to Spark and are subject to change between minor releases (see [source](https://github.com/apache/spark/blob/bb51add5c79558df863d37965603387d40cc4387/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala#L20-L24)), but I think it's nice to try to avoid API breakage when possible. While trying to compile some custom Catalyst code, I ran into issues when trying to call the `transformDownWithSubqueries` method without supplying a tree pattern filter condition. If I do `transformDownWithSubqueries() { f} ` then I get a compilation error. I think this is due to the first parameter group containing all default parameters. My PR's solution of adding a new `transformDownWithSubqueriesAndPruning` method solves this problem. It's also more consistent with the naming convention used for other pruning-enabled tree transformation methods. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36765 from JoshRosen/SPARK-39259-binary-compatibility-followup. Authored-by: Josh Rosen Signed-off-by: Max Gekk (cherry picked from commit eda6c4b9987f0515cb0aae4686c8a0ae0a3987d4) Signed-off-by: Max Gekk --- .../sql/catalyst/optimizer/finishAnalysis.scala| 2 +- .../spark/sql/catalyst/plans/QueryPlan.scala | 22 -- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 242c799dd22..a33069051d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -84,7 +84,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { treePatternbits.containsPattern(CURRENT_LIKE) } -plan.transformDownWithSubqueries(transformCondition) { +plan.transformDownWithSubqueriesAndPruning(transformCondition) { case subQuery => subQuery.transformAllExpressionsWithPruning(transformCondition) { case cd: CurrentDate => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index d0283f4d367..cc62c81b101 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * to rewrite the whole plan, include its subqueries, in one go. */ def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = -transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f) +transformDownWithSubqueries(f) /** * Returns a copy of this node where the given partial function has been recursively applied @@ -479,10 +479,20 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * first to this node, then this node's subqueries and finally this node's children. * When the partial function does not apply to a given node, it is left unchanged. */ - def transformDownWithSubqueries( -cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = UnknownRuleId) -(f: PartialFunction[PlanType, PlanType]) -: PlanType = { + def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = { +transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f) + } + + /** + * This meth
[spark] branch master updated: [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new eda6c4b9987 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries eda6c4b9987 is described below commit eda6c4b9987f0515cb0aae4686c8a0ae0a3987d4 Author: Josh Rosen AuthorDate: Sat Jun 4 09:12:42 2022 +0300 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries ### What changes were proposed in this pull request? This is a followup to #36654. That PR modified the existing `QueryPlan.transformDownWithSubqueries` to add additional arguments for tree pattern pruning. In this PR, I roll back the change to that method's signature and instead add a new `transformDownWithSubqueriesAndPruning` method. ### Why are the changes needed? The original change breaks binary and source compatibility in Catalyst. Technically speaking, Catalyst APIs are considered internal to Spark and are subject to change between minor releases (see [source](https://github.com/apache/spark/blob/bb51add5c79558df863d37965603387d40cc4387/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala#L20-L24)), but I think it's nice to try to avoid API breakage when possible. While trying to compile some custom Catalyst code, I ran into issues when trying to call the `transformDownWithSubqueries` method without supplying a tree pattern filter condition. If I do `transformDownWithSubqueries() { f} ` then I get a compilation error. I think this is due to the first parameter group containing all default parameters. My PR's solution of adding a new `transformDownWithSubqueriesAndPruning` method solves this problem. It's also more consistent with the naming convention used for other pruning-enabled tree transformation methods. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing tests. Closes #36765 from JoshRosen/SPARK-39259-binary-compatibility-followup. Authored-by: Josh Rosen Signed-off-by: Max Gekk --- .../sql/catalyst/optimizer/finishAnalysis.scala| 2 +- .../spark/sql/catalyst/plans/QueryPlan.scala | 22 -- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index 242c799dd22..a33069051d9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -84,7 +84,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] { treePatternbits.containsPattern(CURRENT_LIKE) } -plan.transformDownWithSubqueries(transformCondition) { +plan.transformDownWithSubqueriesAndPruning(transformCondition) { case subQuery => subQuery.transformAllExpressionsWithPruning(transformCondition) { case cd: CurrentDate => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index d0283f4d367..cc62c81b101 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * to rewrite the whole plan, include its subqueries, in one go. */ def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = -transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f) +transformDownWithSubqueries(f) /** * Returns a copy of this node where the given partial function has been recursively applied @@ -479,10 +479,20 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] * first to this node, then this node's subqueries and finally this node's children. * When the partial function does not apply to a given node, it is left unchanged. */ - def transformDownWithSubqueries( -cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = UnknownRuleId) -(f: PartialFunction[PlanType, PlanType]) -: PlanType = { + def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): PlanType = { +transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f) + } + + /** + * This method is the top-down (pre-order) counterpart of transformUpWithSubqueries. + * Returns a copy of this n
svn commit: r54845 - in /dev/spark/v3.3.0-rc4-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: maxgekk Date: Fri Jun 3 12:28:47 2022 New Revision: 54845 Log: Apache Spark v3.3.0-rc4 docs [This commit notification would consist of 2665 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54843 - /dev/spark/v3.3.0-rc4-bin/
Author: maxgekk Date: Fri Jun 3 11:54:40 2022 New Revision: 54843 Log: Apache Spark v3.3.0-rc4 Added: dev/spark/v3.3.0-rc4-bin/ dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc Fri Jun 3 11:54:40 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKZ9hwTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02g6EADly9nJXABQs9frXWjgUexvm5TY6+lY +mbUg3K+faPfljt1NKRjqzkue5ePMm6zm2x2Sj33Rco9iIGQk8H3BKc+6IIOreknJ +bgGBmZ/ffo7NM2RlReVTKUuVllrFtmXECznG+o4K2w8HrOr498KtXQ2eE33XKG2h +SzDhMyn6VIIal2FDwc63Edyh2CV89wQpHOFhrhMQbhBziV/IQ5d4ggrbMB+WOVQi +IK5l0PqUEB+8LYODMC2F5OVt8p0VRr8OOv5YzA6/3Dca5hKHElbDqDgU0KVFQR2d +03CHh3DmQP7QDfsGN4z+w/VbXu9oBLPeCd4N8mxIRwReqJUuGYrkpgOa1X+5wPKN +NfR4LBnde7MiBWaonKl/UtvyuYqjA1bxIi/Ff0juhzpWkffLz/dB434HqJe2wArA +B/wjzcYKkcMt+402si0/B00rjGS2bC8tuTnQbppr1Ln+7i9qDrX0WBzaqSeHAR2l +J9dwPrGf0w0XPni0fqM3+tZyIkIxWCjhBT4OgBYX/yT3EyBj3KRTjVkpJ3In/fpe +YD90gZGKR8/YdU0cbnKA6oV9vC3aH8fXUC8gM74cot9OLvczBTYG1GwLVh86e7VG +qMBcNSxJabiK0uEI2mt09eXrAINxAlw+1vi2NM0ZuAZ0j5pi/SZu23QIiSu8FiIt +AaoHVlpVgkCL+g== +=tqAA +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 Fri Jun 3 11:54:40 2022 @@ -0,0 +1 @@ +c53dcb750d9c7ace040b9c6a11661aaea3bdd0500b0da688521fb6a0989ad95dba82655b2c523fbcb6ded11f9c2c81542263fff4d7e28f1e06e7e697c0299bc4 SparkR_3.3.0.tar.gz Added: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc Fri Jun 3 11:54:40 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKZ9h4THG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02biEACsBNascO1EuZR/a4+rjpSP5RVUC6KD +8GL8oXtB6KKMy4wUlYPj3xODg3AI7L/9+OQ3lAUpSTgUBr3RvzKEgyhxUYSyTdx4 +CIv7r1ft1NDgYA59sreFu2YuKMY6CsyP9Ze6KSHG2zWxAps9VPN/Ar9dzGUFFC22 +0MdZVXmnl3Ea2KXrxCPINH6p1xANbmQA+G3gLX73oT3z1jCzwbSxubWhj6Yw55YQ +sMIvWT/4IIkYldEDaGVmZWCAQ/UyCXiLRraymmG2DQVhAeoHxGo5jxdggnRLlSqW +0J5PWmtNUHjj9g9pFjbm76x4BJLUGuLptnumvbkqYgh5X6h+OKBWMw5ceIpMR2/f +vPRGa9y1Bk0WluNeN3IIsMe7UuFoJBIuCeOi8UmTbVGoV+naY5psSMtJPylQ8mJR +c8nY8gXCWeMCWxokNQQIWxXZpRMwWlojoV2AmRUR+nYG+roebyhI3H4rU6SiVXlP +vae+kIjPQCILPqEwRlCa+vfqj9ukfE0AmusnGhN3/Mc0qOTtkOqRVd2+KHpF+i4C +JnXqqJhtg4KUCsLqey3gUJsjXgTAHIXxISYWzPWQYBrKBnXBA0/GP1+cow9vTeuB +TzmirWfaVBv4DkSoWzQ0q8ils3aKsiML07VSyhcVCTWQcoLJ+WR8z3kV+a0vYr5j +oY4OgV1u6UmElA== +=n+my +-END PGP SIGNATURE- Added: dev/spark
[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 03012f432ac24049291c71415a32677f612a7afd Author: Maxim Gekk AuthorDate: Fri Jun 3 09:20:38 2022 + Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87..0e449e841cf 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b..d12f2ad73fa 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e..842d63f5d38 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d75..f7d187bf952 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e7747..53f38df8851 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e..845f6659407 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370a..8e159089193 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3..1987c133285 100644 --- a
[spark] branch branch-3.3 updated (61d22b6f313 -> 03012f432ac)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 61d22b6f313 [SPARK-39371][DOCS][CORE] Review and fix issues in Scala/Java API docs of Core module add 4e3599bc11a Preparing Spark release v3.3.0-rc4 new 03012f432ac Preparing development version 3.3.1-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v3.3.0-rc4 created (now 4e3599bc11a)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to tag v3.3.0-rc4 in repository https://gitbox.apache.org/repos/asf/spark.git at 4e3599bc11a (commit) This tag includes the following new commits: new 4e3599bc11a Preparing Spark release v3.3.0-rc4 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v3.3.0-rc4
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to tag v3.3.0-rc4 in repository https://gitbox.apache.org/repos/asf/spark.git commit 4e3599bc11a1cb0ea9fc819e7f752d2228e54baf Author: Maxim Gekk AuthorDate: Fri Jun 3 09:20:31 2022 + Preparing Spark release v3.3.0-rc4 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf..9479bb3bf87 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fa..2e9c4d9960b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d38..2a9acfa335e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf952..7b17e625d75 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df8851..c5c920e7747 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407..697b5a3928e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e159089193..ad2db11370a 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c133285..1a7bdee70f3 100644 --- a/common/tags
[spark] branch master updated (9e6f2dd7268 -> 873ad5596b5)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN` add 873ad5596b5 [SPARK-37623][SQL] Support ANSI Aggregate Function: regr_intercept No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/Covariance.scala | 4 +- .../expressions/aggregate/linearRegression.scala | 57 +- .../aggregate/AggregateExpressionSuite.scala | 17 +++ .../sql-functions/sql-expression-schema.md | 1 + .../sql-tests/inputs/linear-regression.sql | 6 +++ .../inputs/postgreSQL/aggregates_part1.sql | 2 +- .../inputs/udf/postgreSQL/udf-aggregates_part1.sql | 2 +- .../sql-tests/results/linear-regression.sql.out| 35 - .../results/postgreSQL/aggregates_part1.sql.out| 10 +++- .../udf/postgreSQL/udf-aggregates_part1.sql.out| 10 +++- 11 files changed, 136 insertions(+), 9 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 4a0f0ff6c22 [SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries 4a0f0ff6c22 is described below commit 4a0f0ff6c22b85cb0fc1eef842da8dbe4c90543a Author: Ole Sasse AuthorDate: Fri Jun 3 09:12:26 2022 +0300 [SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries ### What changes were proposed in this pull request? Apply the optimizer rule ComputeCurrentTime consistently across subqueries. This is a backport of https://github.com/apache/spark/pull/36654. ### Why are the changes needed? At the moment timestamp functions like now() can return different values within a query if subqueries are involved ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A new unit test was added Closes #36752 from olaky/SPARK-39259-spark_3_3. Authored-by: Ole Sasse Signed-off-by: Max Gekk --- .../sql/catalyst/optimizer/finishAnalysis.scala| 41 +- .../spark/sql/catalyst/plans/QueryPlan.scala | 11 ++- .../optimizer/ComputeCurrentTimeSuite.scala| 89 -- 3 files changed, 95 insertions(+), 46 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index ef9c4b9af40..242c799dd22 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -17,14 +17,16 @@ package org.apache.spark.sql.catalyst.optimizer -import scala.collection.mutable +import java.time.{Instant, LocalDateTime} import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.trees.TreePattern._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ} +import org.apache.spark.sql.catalyst.trees.TreePatternBits +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, localDateTimeToMicros} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -73,29 +75,30 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] { */ object ComputeCurrentTime extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { -val currentDates = mutable.Map.empty[String, Literal] -val timeExpr = CurrentTimestamp() -val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long] -val currentTime = Literal.create(timestamp, timeExpr.dataType) +val instant = Instant.now() +val currentTimestampMicros = instantToMicros(instant) +val currentTime = Literal.create(currentTimestampMicros, TimestampType) val timezone = Literal.create(conf.sessionLocalTimeZone, StringType) -val localTimestamps = mutable.Map.empty[String, Literal] -plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) { - case currentDate @ CurrentDate(Some(timeZoneId)) => -currentDates.getOrElseUpdate(timeZoneId, { - Literal.create(currentDate.eval().asInstanceOf[Int], DateType) -}) - case CurrentTimestamp() | Now() => currentTime - case CurrentTimeZone() => timezone - case localTimestamp @ LocalTimestamp(Some(timeZoneId)) => -localTimestamps.getOrElseUpdate(timeZoneId, { - Literal.create(localTimestamp.eval().asInstanceOf[Long], TimestampNTZType) -}) +def transformCondition(treePatternbits: TreePatternBits): Boolean = { + treePatternbits.containsPattern(CURRENT_LIKE) +} + +plan.transformDownWithSubqueries(transformCondition) { + case subQuery => +subQuery.transformAllExpressionsWithPruning(transformCondition) { + case cd: CurrentDate => +Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, cd.zoneId), DateType) + case CurrentTimestamp() | Now() => currentTime + case CurrentTimeZone() => timezone + case localTimestamp: LocalTimestamp => +val asDateTime = LocalDateTime.ofInstant(instant, localTimestamp.zoneId) +Literal.create(localDateTimeToMicros(as
[spark] branch master updated: [SPARK-39320][SQL] Support aggregate function `MEDIAN`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN` 9e6f2dd7268 is described below commit 9e6f2dd72686a9ac44fd4573b5a408f8a8e55fe1 Author: Jiaan Geng AuthorDate: Fri Jun 3 08:23:22 2022 +0300 [SPARK-39320][SQL] Support aggregate function `MEDIAN` ### What changes were proposed in this pull request? Many mainstream database supports aggregate function `MEDIAN`. **Syntax:** Aggregate function `MEDIAN( )` Window function `MEDIAN( ) OVER ( [ PARTITION BY ] )` **Arguments:** expr: The expression must evaluate to a numeric data type (INTEGER, FLOAT, DECIMAL, or equivalent). **Examples**: ``` select k, median(v) from aggr group by k order by k; +---+---+ | K | MEDIAN(V) | |---+---| | 1 | 20.0 | | 2 | 22.5 | | 3 | NULL | +---+---+ ``` ### Why are the changes needed? The mainstream database supports `MEDIAN` show below: **Snowflake** https://docs.snowflake.com/en/sql-reference/functions/median.html **Oracle** https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/MEDIAN.html#GUID-DE15705A-AC18-4416-8487-B9E1D70CE01A **ClickHouse** https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/median **Redshift** https://docs.aws.amazon.com/redshift/latest/dg/r_MEDIAN.html **Teradata** https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Ordered-Analytical/Window-Aggregate-Functions/MEDIAN **DB2** https://www.ibm.com/docs/en/db2/11.5?topic=functions-median **Vertica** https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/MEDIANAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_20 **H2** http://www.h2database.com/html/functions-aggregate.html#median **Sybase** https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.infocenter.dc01776.1601/doc/html/san1278453109663.html **Exasol** https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/median.htm **Yellowbrick** https://www.yellowbrick.com/docs/5.2/ybd_sqlref/median.html **Mariadb** https://mariadb.com/kb/en/median/ **Singlestore** https://docs.singlestore.com/db/v7.6/en/reference/sql-reference/aggregate-functions/median.html **InfluxDB** https://docs.influxdata.com/flux/v0.x/stdlib/universe/median/ ### Does this PR introduce _any_ user-facing change? 'No'. New feature. ### How was this patch tested? New tests. Closes #36714 from beliefer/SPARK-39320. Authored-by: Jiaan Geng Signed-off-by: Max Gekk --- .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +- .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/percentiles.scala| 26 +- .../sql-functions/sql-expression-schema.md | 1 + .../test/resources/sql-tests/inputs/group-by.sql | 29 -- .../resources/sql-tests/inputs/percentiles.sql | 212 +++ .../src/test/resources/sql-tests/inputs/window.sql | 112 -- .../resources/sql-tests/results/group-by.sql.out | 70 +--- .../sql-tests/results/percentiles.sql.out | 417 + .../resources/sql-tests/results/window.sql.out | 229 +-- 10 files changed, 661 insertions(+), 441 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ed2e9ba2b6b..7635918279a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -21,7 +21,7 @@ import scala.collection.mutable import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.SubExprUtils._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, PercentileCont, PercentileDisc} +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc} import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE} import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ @@ -243,7 +243,8 @@ trait CheckAnalysis extends PredicateH
[spark] branch branch-3.3 updated (4da8f3a76b1 -> bc4aab5c26d)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 4da8f3a76b1 [SPARK-39361] Don't use Log4J2's extended throwable conversion pattern in default logging configurations add bc4aab5c26d [SPARK-39295][DOCS][PYTHON][3.3] Improve documentation of pandas API supported list No new revisions were added by this update. Summary of changes: .../pandas_on_spark/supported_pandas_api.rst | 62 +++--- 1 file changed, 30 insertions(+), 32 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39259][SQL] Evaluate timestamps consistently in subqueries
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 52e2717c2d1 [SPARK-39259][SQL] Evaluate timestamps consistently in subqueries 52e2717c2d1 is described below commit 52e2717c2d1b6e1f449de5714b6e202074bac26f Author: Ole Sasse AuthorDate: Thu Jun 2 21:42:10 2022 +0300 [SPARK-39259][SQL] Evaluate timestamps consistently in subqueries ### What changes were proposed in this pull request? Apply the optimizer rule ComputeCurrentTime consistently across subqueries ### Why are the changes needed? At the moment timestamp functions like now() can return different values within a query if subqueries are involved ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? A new unit test was added Closes #36654 from olaky/SPARK-39259. Authored-by: Ole Sasse Signed-off-by: Max Gekk --- .../sql/catalyst/optimizer/finishAnalysis.scala| 41 +- .../spark/sql/catalyst/plans/QueryPlan.scala | 11 ++- .../optimizer/ComputeCurrentTimeSuite.scala| 89 -- 3 files changed, 95 insertions(+), 46 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala index ef9c4b9af40..242c799dd22 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala @@ -17,14 +17,16 @@ package org.apache.spark.sql.catalyst.optimizer -import scala.collection.mutable +import java.time.{Instant, LocalDateTime} import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.catalyst.trees.TreePattern._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ} +import org.apache.spark.sql.catalyst.trees.TreePatternBits +import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, localDateTimeToMicros} import org.apache.spark.sql.connector.catalog.CatalogManager import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -73,29 +75,30 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] { */ object ComputeCurrentTime extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = { -val currentDates = mutable.Map.empty[String, Literal] -val timeExpr = CurrentTimestamp() -val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long] -val currentTime = Literal.create(timestamp, timeExpr.dataType) +val instant = Instant.now() +val currentTimestampMicros = instantToMicros(instant) +val currentTime = Literal.create(currentTimestampMicros, TimestampType) val timezone = Literal.create(conf.sessionLocalTimeZone, StringType) -val localTimestamps = mutable.Map.empty[String, Literal] -plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) { - case currentDate @ CurrentDate(Some(timeZoneId)) => -currentDates.getOrElseUpdate(timeZoneId, { - Literal.create(currentDate.eval().asInstanceOf[Int], DateType) -}) - case CurrentTimestamp() | Now() => currentTime - case CurrentTimeZone() => timezone - case localTimestamp @ LocalTimestamp(Some(timeZoneId)) => -localTimestamps.getOrElseUpdate(timeZoneId, { - Literal.create(localTimestamp.eval().asInstanceOf[Long], TimestampNTZType) -}) +def transformCondition(treePatternbits: TreePatternBits): Boolean = { + treePatternbits.containsPattern(CURRENT_LIKE) +} + +plan.transformDownWithSubqueries(transformCondition) { + case subQuery => +subQuery.transformAllExpressionsWithPruning(transformCondition) { + case cd: CurrentDate => +Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, cd.zoneId), DateType) + case CurrentTimestamp() | Now() => currentTime + case CurrentTimeZone() => timezone + case localTimestamp: LocalTimestamp => +val asDateTime = LocalDateTime.ofInstant(instant, localTimestamp.zoneId) +Literal.create(localDateTimeToMicros(asDateTime), TimestampNTZType) +} } } } - /** * Replaces the expression of CurrentDatabase with the current data
[spark] branch branch-3.3 updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new ef521d30a3b [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time ef521d30a3b is described below commit ef521d30a3b023213bbc3076911a93c0c0c425dc Author: yangjie01 AuthorDate: Thu Jun 2 13:06:14 2022 +0300 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time ### What changes were proposed in this pull request? After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation. For example, the following case: ``` spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;") spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show ``` The expected message is ``` Table or view not found: t2 ``` But the actual message is ``` org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76 ``` For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis` ### Why are the changes needed? Fix analysis exception message compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions and add a new test case Closes #36746 from LuciferYang/SPARK-39354. Authored-by: yangjie01 Signed-off-by: Max Gekk (cherry picked from commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89) Signed-off-by: Max Gekk --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 - .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ba492e58f6e..51c1d1f768f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4328,10 +4328,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { case TypeCheckResult.TypeCheckFailure(message) => -e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) -e.failAnalysis( - s"cannot resolve '${e.sql}' due to data type mismatch: $message" + -extraHintForAnsiTypeCoercionExpression(plan)) +e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message) } case _ => }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 9c72b9974c4..b0d1d6c2a30 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError") + val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError") + protected def
[spark] branch master updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 89fdb8a6fb6 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time 89fdb8a6fb6 is described below commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89 Author: yangjie01 AuthorDate: Thu Jun 2 13:06:14 2022 +0300 [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time ### What changes were proposed in this pull request? After SPARK-38118, `dataTypeMismatchError` related to `Filter` will be checked and throw in `RemoveTempResolvedColumn`, this will cause compatibility issue with exception message presentation. For example, the following case: ``` spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;") spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show ``` The expected message is ``` Table or view not found: t2 ``` But the actual message is ``` org.apache.spark.sql.AnalysisException: cannot resolve 'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires date type, however, ''2020-12-27'' is of string type.; line 1 pos 76 ``` For forward compatibility, this pr change to only records `DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis` ### Why are the changes needed? Fix analysis exception message compatibility. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions and add a new test case Closes #36746 from LuciferYang/SPARK-39354. Authored-by: yangjie01 Signed-off-by: Max Gekk --- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++- .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 - .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++-- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b13dede2acc..3017fc10dfd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try} import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst._ -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression, DATA_TYPE_MISMATCH_ERROR} +import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.catalyst.encoders.OuterScopes import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _} @@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends Rule[LogicalPlan] { case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { case TypeCheckResult.TypeCheckFailure(message) => -e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true) -e.failAnalysis( - s"cannot resolve '${e.sql}' due to data type mismatch: $message" + -extraHintForAnsiTypeCoercionExpression(plan)) +e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message) } case _ => }) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 95b0226f00d..ed2e9ba2b6b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError") + val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError") + protected def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg) } @@ -174,7 +176,20 @@ trait Ch
[spark] branch branch-3.3 updated (4bbaf3777e9 -> fef569507bc)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 4bbaf3777e9 [SPARK-38675][CORE] Fix race during unlock in BlockInfoManager add fef569507bc [SPARK-39346][SQL][3.3] Convert asserts/illegal state exception to internal errors on each phase No new revisions were added by this update. Summary of changes: .../sql/kafka010/KafkaMicroBatchSourceSuite.scala | 11 +--- .../main/scala/org/apache/spark/sql/Dataset.scala | 14 +++--- .../spark/sql/execution/QueryExecution.scala | 31 +- .../sql/execution/streaming/StreamExecution.scala | 4 ++- .../streaming/MicroBatchExecutionSuite.scala | 6 +++-- .../sql/streaming/continuous/ContinuousSuite.scala | 7 ++--- 6 files changed, 51 insertions(+), 22 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39346][SQL] Convert asserts/illegal state exception to internal errors on each phase
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8894e785eda [SPARK-39346][SQL] Convert asserts/illegal state exception to internal errors on each phase 8894e785eda is described below commit 8894e785edae42a642351ad91e539324c39da8e4 Author: Max Gekk AuthorDate: Wed Jun 1 20:16:17 2022 +0300 [SPARK-39346][SQL] Convert asserts/illegal state exception to internal errors on each phase ### What changes were proposed in this pull request? In the PR, I propose to catch asserts/illegal state exception on each phase of query execution: ANALYSIS, OPTIMIZATION, PLANNING, and convert them to a SparkException w/ the `INTERNAL_ERROR` error class. ### Why are the changes needed? To improve user experience with Spark SQL and unify representation of user-facing errors. ### Does this PR introduce _any_ user-facing change? No. The changes might affect users in corner cases only. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *KafkaMicroBatchV1SourceSuite" $ build/sbt "test:testOnly *KafkaMicroBatchV2SourceSuite" ``` Closes #36704 from MaxGekk/wrapby-INTERNAL_ERROR-every-phase. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../sql/kafka010/KafkaMicroBatchSourceSuite.scala | 11 +--- .../main/scala/org/apache/spark/sql/Dataset.scala | 14 +++--- .../spark/sql/execution/QueryExecution.scala | 31 +- .../sql/execution/streaming/StreamExecution.scala | 4 ++- .../streaming/MicroBatchExecutionSuite.scala | 6 +++-- .../sql/streaming/continuous/ContinuousSuite.scala | 7 ++--- 6 files changed, 51 insertions(+), 22 deletions(-) diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala index 2396f31b954..0a32b1b54d0 100644 --- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala +++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala @@ -34,6 +34,7 @@ import org.apache.kafka.common.TopicPartition import org.scalatest.concurrent.PatienceConfiguration.Timeout import org.scalatest.time.SpanSugar._ +import org.apache.spark.{SparkException, SparkThrowable} import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession} import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap import org.apache.spark.sql.connector.read.streaming.SparkDataStream @@ -666,9 +667,10 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testUtils.sendMessages(topic2, Array("6")) }, StartStream(), - ExpectFailure[IllegalStateException](e => { + ExpectFailure[SparkException](e => { +assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") // The offset of `topic2` should be changed from 2 to 1 -assert(e.getMessage.contains("was changed from 2 to 1")) +assert(e.getCause.getMessage.contains("was changed from 2 to 1")) }) ) } @@ -764,12 +766,13 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase { testStream(df)( StartStream(checkpointLocation = metadataPath.getAbsolutePath), -ExpectFailure[IllegalStateException](e => { +ExpectFailure[SparkException](e => { + assert(e.asInstanceOf[SparkThrowable].getErrorClass === "INTERNAL_ERROR") Seq( s"maximum supported log version is v1, but encountered v9", "produced by a newer version of Spark and cannot be read by this version" ).foreach { message => -assert(e.toString.contains(message)) +assert(e.getCause.toString.contains(message)) } })) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index f00ebf51d6d..0a45cf92c6e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils -import org.apache.spark.{SparkException, SparkThrowable, TaskContext} +import org.apache.spark.TaskContext import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ @@ -3920,19 +3920,11 @@ class Dataset[
[spark] branch master updated (6d8efb515f6 -> 1e194d26ead)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 6d8efb515f6 [SPARK-39308][BUILD][SQL] Upgrade parquet to 1.12.3 add 1e194d26ead [SPARK-39305][SQL] Add the `EQUAL_NULL()` function No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../sql/catalyst/expressions/predicates.scala | 38 ++ .../sql-functions/sql-expression-schema.md | 1 + .../resources/sql-tests/inputs/null-handling.sql | 3 ++ .../sql-tests/results/null-handling.sql.out| 16 - 5 files changed, 58 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated (37a2416ca4c -> 6c4e07dbe38)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 37a2416ca4c [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty add 6c4e07dbe38 [SPARK-39255][SQL][3.3] Improve error messages No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 12 ++-- .../spark/sql/errors/QueryCompilationErrors.scala | 4 +- .../apache/spark/sql/errors/QueryErrorsBase.scala | 10 +++- .../spark/sql/errors/QueryExecutionErrors.scala| 2 +- .../apache/spark/sql/types/StructTypeSuite.scala | 22 --- .../resources/sql-tests/results/ansi/cast.sql.out | 68 +++--- .../resources/sql-tests/results/ansi/date.sql.out | 6 +- .../results/ansi/datetime-parsing-invalid.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out| 20 +++ .../results/ansi/string-functions.sql.out | 8 +-- .../test/resources/sql-tests/results/pivot.sql.out | 2 +- .../sql-tests/results/postgreSQL/boolean.sql.out | 32 +- .../sql-tests/results/postgreSQL/float4.sql.out| 8 +-- .../sql-tests/results/postgreSQL/float8.sql.out| 8 +-- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out| 2 +- .../results/postgreSQL/window_part3.sql.out| 2 +- .../results/postgreSQL/window_part4.sql.out| 2 +- .../results/timestampNTZ/timestamp-ansi.sql.out| 2 +- .../sql-tests/results/udf/udf-pivot.sql.out| 2 +- .../spark/sql/connector/InsertIntoTests.scala | 4 +- 21 files changed, 117 insertions(+), 107 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54711 - in /dev/spark/v3.3.0-rc3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: maxgekk Date: Tue May 24 14:09:25 2022 New Revision: 54711 Log: Apache Spark v3.3.0-rc3 docs [This commit notification would consist of 2650 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54708 - /dev/spark/v3.3.0-rc3-bin/
Author: maxgekk Date: Tue May 24 13:31:55 2022 New Revision: 54708 Log: Apache Spark v3.3.0-rc3 Added: dev/spark/v3.3.0-rc3-bin/ dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc Tue May 24 13:31:55 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKM3eYTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q08vEEADS36LrbaBB+bJomtt8GbqFzUXqTcwx +FLoymqs2ObRI9zKkY1w7QkIOWdssxlQQQuwKc0sFu3i77YTjLrcmRaxa/t5zvwmf +2fIcOqu1xFVhIUJbJ/IhLpGK1KlnbgQi2l+0iYLrB9u/VFceZmwGdLu/GBrnJ/e4 +3mNIOKGnPkMPhJi2eKPMLg161S5YMgBgcosRCCBeaxj37sR4RKQnJyYoo5mAE39B +yd1jcT8Q7KqJI6mLTI4d7zg8djnCn/2ZPFcrgfCnKZz4g1hoXVEzyF1xxg8vHq1B +7TDulhbBqzNABQDlKTe0xLUA0fW+0NiDy+ZG61TlqoZuBXGO0rSju2V9mnux6qw/ +hfHOuCh6pM4BG4694kV989UUt5YnVSNUyLSC5XHSQsqTgVydREtj0ETNQUSiN70y +qenMW4gtLEOLWgRsc7Lu0g7IsPgP43kh2llL8vOkXfQVVD2L73vWT6V1iTVFkpT3 +oW0AQ9fdiFgbT4q7nEcfxUm/uDFlzgSxD70QrV8oe8aZRCUeogp1cCZQedZEM+V8 +1qKCMM/5zVCeyrpRZpO9DJdKsIskpM6mIZElOvvo3EHZnf5FDt43KNtFXg+ogWWf +xiQoERy08pQKK+rgAMJZjA/6wL/SrDebmYrXD8WNBx54G1ZLaUurcWZqkVAZ3+Ts +lPEabYDmVZKovA== +=PIrx +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 Tue May 24 13:31:55 2022 @@ -0,0 +1 @@ +5a6b1460f360dd505009f4fac85f53c7e5e312c116734a4838713d420f74bff516799ca823c2f100f451aa80bc931d490a18b7e9c290b598dd0ef3e26e05f184 SparkR_3.3.0.tar.gz Added: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc Tue May 24 13:31:55 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKM3egTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q015wD/0UETD5HcWbKnScxbXEz8RvgCBFTmQs +weSyJk5VLRW4qXXG7Q5zALwY3GPemJC9aSALhKwUGOVMGcihQBC7THTohaaPgKAw +20pgaX4iBiSOoRaJANqxcNz1CceK/VKqH58YFlYpEbOJw0r8boCkd/pPXan2myl0 +w5hb+lE7Cw9DL8tl4W2IPmo4iNP4dI+u46yxd7Yj4W3I+jM2dHWHxIJiJl66nvDd +6EKmvYRqGEBfcwGES6CrngIyQD4lylJ/FORQE1vVZ0TiAaN+Hqn7k5mr9FETtl8L +HDFZRro+REB9Xz2h7d31ywWqvMnqWQ3VnKSeVBLLGfMcYP2pHxf2DJiCXaAZAtIy +RdbnxAK4wjZM3Qe0bJY6Wm0H8lJ8GiO+EQgKGmQjMKwgSSwn/dtHWvEqqX36p0G/ +vyWH46+9I8TxL+w5vNQMdRxXmAJbye8vdyUFBv5AO2hOM2UlXLvz76VElSSVzgi/ +FNhalZ47cVDmFh1B8EsljS8WnNnFxYNNfmwxW6ds2N6nIctMsD+cKRiRdCxh/EbO +gn7TonhvlFkEv5M4W5HmAALY1Jn/e9r/Ciy+uX4avEKiMZOvuwEkU57WSwjiUfLL +onxlwWxf+xXQG0ELMezPP8t3QylPziol38P6MMgsRg3HAVTtcaSGsISa1oSBYOdm +iTiGb1BmLrJ3uA== +=0Wz6 +-END PGP SIGNATURE- Added: dev/spark
[spark] branch master updated: [SPARK-39255][SQL] Improve error messages
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 625afb4e1ae [SPARK-39255][SQL] Improve error messages 625afb4e1ae is described below commit 625afb4e1aefda59191d79b31f8c94941aedde1e Author: Max Gekk AuthorDate: Tue May 24 14:15:38 2022 +0300 [SPARK-39255][SQL] Improve error messages ### What changes were proposed in this pull request? In the PR, I propose to improve errors of the following error classes: 1. NON_PARTITION_COLUMN - `a non-partition column name` -> `the non-partition column` 2. UNSUPPORTED_SAVE_MODE - `a not existent path` -> `a non existent path`. 3. INVALID_FIELD_NAME. Quote ids to follow the rules https://github.com/apache/spark/pull/36621. 4. FAILED_SET_ORIGINAL_PERMISSION_BACK. It is renamed to FAILED_PERMISSION_RESET_ORIGINAL. 5. NON_LITERAL_PIVOT_VALUES - Wrap error's expression by double quotes. The PR adds new helper method `toSQLExpr()` for that. 6. CAST_INVALID_INPUT - Add the recommendation: `... Correct the syntax for the value before casting it, or change the type to one appropriate for the value.` ### Why are the changes needed? To improve user experience with Spark SQL by making error message more clear. ### Does this PR introduce _any_ user-facing change? Yes, it changes user-facing error messages. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "sql/testOnly *QueryCompilationErrorsDSv2Suite" $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite" $ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite" $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite" $ build/sbt "sql/testOnly *QueryParsingErrorsSuite*" ``` Closes #36635 from MaxGekk/error-class-improve-msg-3. Lead-authored-by: Max Gekk Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 14 ++--- .../spark/sql/errors/QueryCompilationErrors.scala | 4 +- .../apache/spark/sql/errors/QueryErrorsBase.scala | 2 + .../spark/sql/errors/QueryExecutionErrors.scala| 2 +- .../apache/spark/sql/types/StructTypeSuite.scala | 22 --- .../resources/sql-tests/results/ansi/cast.sql.out | 68 +++--- .../resources/sql-tests/results/ansi/date.sql.out | 6 +- .../results/ansi/datetime-parsing-invalid.sql.out | 4 +- .../sql-tests/results/ansi/interval.sql.out| 20 +++ .../results/ansi/string-functions.sql.out | 8 +-- .../test/resources/sql-tests/results/pivot.sql.out | 2 +- .../sql-tests/results/postgreSQL/boolean.sql.out | 32 +- .../sql-tests/results/postgreSQL/float4.sql.out| 8 +-- .../sql-tests/results/postgreSQL/float8.sql.out| 8 +-- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out| 2 +- .../results/postgreSQL/window_part3.sql.out| 2 +- .../results/postgreSQL/window_part4.sql.out| 2 +- .../results/timestampNTZ/timestamp-ansi.sql.out| 2 +- .../sql-tests/results/udf/udf-pivot.sql.out| 2 +- .../errors/QueryCompilationErrorsDSv2Suite.scala | 4 +- .../sql/errors/QueryCompilationErrorsSuite.scala | 5 +- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 3 +- .../sql/errors/QueryExecutionErrorsSuite.scala | 6 +- 24 files changed, 119 insertions(+), 113 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index eb328c6e20a..23f99524a7e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -23,7 +23,7 @@ "message" : [ "Cannot up cast from to .\n" ] }, "CAST_INVALID_INPUT" : { -"message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], +"message" : [ "The value of the type cannot be cast to because it is malformed. Correct the value as per the syntax, or change its target type. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { @@ -52,9 +52,6 @@ "message" : [ "Failed to rename to as destination already exists" ], "sqlState" : "22023" }, - "FAILED_SET_ORIGINAL_PERMISSION_BACK" : { -"message
[spark] branch branch-3.3 updated (459c4b0c94a -> d491e390ada)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 459c4b0c94a [SPARK-39144][SQL] Nested subquery expressions deduplicate relations should be done bottom up add a7259279d07 Preparing Spark release v3.3.0-rc3 new d491e390ada Preparing development version 3.3.1-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit d491e390adaba04dc238868b7adc33251d880095 Author: Maxim Gekk AuthorDate: Tue May 24 10:15:35 2022 + Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87..0e449e841cf 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b..d12f2ad73fa 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e..842d63f5d38 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d75..f7d187bf952 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e7747..53f38df8851 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e..845f6659407 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370a..8e159089193 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3..1987c133285 100644
[spark] 01/01: Preparing Spark release v3.3.0-rc3
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to tag v3.3.0-rc3 in repository https://gitbox.apache.org/repos/asf/spark.git commit a7259279d07b302a51456adb13dc1e41a6fd06ed Author: Maxim Gekk AuthorDate: Tue May 24 10:15:29 2022 + Preparing Spark release v3.3.0-rc3 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf..9479bb3bf87 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fa..2e9c4d9960b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d38..2a9acfa335e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf952..7b17e625d75 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df8851..c5c920e7747 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407..697b5a3928e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e159089193..ad2db11370a 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c133285..1a7bdee70f3 100644 --- a/common/tags
[spark] tag v3.3.0-rc3 created (now a7259279d07)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to tag v3.3.0-rc3 in repository https://gitbox.apache.org/repos/asf/spark.git at a7259279d07 (commit) This tag includes the following new commits: new a7259279d07 Preparing Spark release v3.3.0-rc3 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38687][SQL] Use error classes in the compilation errors of generators
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 5d5e7f94315 [SPARK-38687][SQL] Use error classes in the compilation errors of generators 5d5e7f94315 is described below commit 5d5e7f94315c233d983139fa39163a838882be89 Author: panbingkun AuthorDate: Mon May 23 17:35:33 2022 +0300 [SPARK-38687][SQL] Use error classes in the compilation errors of generators ## What changes were proposed in this pull request? Migrate the following errors in QueryCompilationErrors onto use error classes: - nestedGeneratorError => UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS - moreThanOneGeneratorError => UNSUPPORTED_GENERATOR.MULTI_GENERATOR - generatorOutsideSelectError => UNSUPPORTED_GENERATOR.OUTSIDE_SELECT - generatorNotExpectedError => UNSUPPORTED_GENERATOR.NOT_GENERATOR ### Why are the changes needed? Porting compilation errors of generator to new error framework, improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36617 from panbingkun/SPARK-38687. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 17 ++ .../spark/sql/errors/QueryCompilationErrors.scala | 22 .../apache/spark/sql/errors/QueryErrorsBase.scala | 8 ++- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 23 .../apache/spark/sql/GeneratorFunctionSuite.scala | 9 ++-- .../sql/errors/QueryCompilationErrorsSuite.scala | 61 ++ 6 files changed, 111 insertions(+), 29 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f6fba105872..eb328c6e20a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -295,6 +295,23 @@ }, "sqlState" : "0A000" }, + "UNSUPPORTED_GENERATOR" : { +"message" : [ "The generator is not supported: " ], +"subClass" : { + "MULTI_GENERATOR" : { +"message" : [ "only one generator allowed per clause but found : " ] + }, + "NESTED_IN_EXPRESSIONS" : { +"message" : [ "nested in expressions " ] + }, + "NOT_GENERATOR" : { +"message" : [ " is expected to be a generator. However, its class is , which is not a generator." ] + }, + "OUTSIDE_SELECT" : { +"message" : [ "outside the SELECT clause, found: " ] + } +} + }, "UNSUPPORTED_GROUPING_EXPRESSION" : { "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 3d133d6cfab..008f13961a6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeRef import org.apache.spark.sql.catalyst.plans.JoinType import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, LogicalPlan, SerdeInfo, Window} import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode} -import org.apache.spark.sql.catalyst.util.{toPrettySQL, FailFastMode, ParseMode, PermissiveMode} +import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode} import org.apache.spark.sql.connector.catalog._ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._ import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction} @@ -112,21 +112,19 @@ object QueryCompilationErrors extends QueryErrorsBase { } def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = { -new AnalysisException( - "Generators are not supported when it's nested in " + -"expressions, but got: " + toPrettySQL(trimmedNestedGenerator)) +new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR", + messageParameters = Array("NESTED_IN_EXPRESSIONS", toSQLExpr(trimmedNestedGenerator))) } def moreThanOneGeneratorError(generators: Seq[Expre
[spark] branch branch-3.3 updated: [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new fa400c666c4 [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages fa400c666c4 is described below commit fa400c666c41cf864103ba8705116a24092b3687 Author: Max Gekk AuthorDate: Sun May 22 18:58:25 2022 +0300 [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages ### What changes were proposed in this pull request? In the PR, I propose to describe the rules of quoting elements in error messages introduced by the PRs: - https://github.com/apache/spark/pull/36210 - https://github.com/apache/spark/pull/36233 - https://github.com/apache/spark/pull/36259 - https://github.com/apache/spark/pull/36324 - https://github.com/apache/spark/pull/36335 - https://github.com/apache/spark/pull/36359 - https://github.com/apache/spark/pull/36579 ### Why are the changes needed? To improve code maintenance, and the process of code review. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By existing GAs. Closes #36621 from MaxGekk/update-error-class-guide. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 2a4d8a4ea709339175257027e31a75bdeed5daec) Signed-off-by: Max Gekk --- .../org/apache/spark/sql/errors/QueryErrorsBase.scala | 17 + 1 file changed, 17 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index 89bc1039e73..52ffa6d32fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -23,6 +23,23 @@ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} +/** + * The trait exposes util methods for preparing error messages such as quoting of error elements. + * All classes that extent `QueryErrorsBase` shall follow the rules: + * 1. Any values shall be outputted in the SQL standard style by using `toSQLValue()`. + * For example: 'a string value', 1, NULL. + * 2. SQL types shall be double quoted and outputted in the upper case using `toSQLType()`. + * For example: "INT", "DECIMAL(10,0)". + * 3. Elements of identifiers shall be wrapped by backticks by using `toSQLId()`. + * For example: `namespaceA`.`funcB`, `tableC`. + * 4. SQL statements shall be in the upper case prepared by using `toSQLStmt`. + * For example: DESC PARTITION, DROP TEMPORARY FUNCTION. + * 5. SQL configs and datasource options shall be wrapped by double quotes by using + * `toSQLConf()`/`toDSOption()`. + * For example: "spark.sql.ansi.enabled". + * 6. Any values of datasource options or SQL configs shall be double quoted. + * For example: "true", "CORRECTED". + */ trait QueryErrorsBase { // Converts an error class parameter to its SQL representation def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2a4d8a4ea70 [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages 2a4d8a4ea70 is described below commit 2a4d8a4ea709339175257027e31a75bdeed5daec Author: Max Gekk AuthorDate: Sun May 22 18:58:25 2022 +0300 [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages ### What changes were proposed in this pull request? In the PR, I propose to describe the rules of quoting elements in error messages introduced by the PRs: - https://github.com/apache/spark/pull/36210 - https://github.com/apache/spark/pull/36233 - https://github.com/apache/spark/pull/36259 - https://github.com/apache/spark/pull/36324 - https://github.com/apache/spark/pull/36335 - https://github.com/apache/spark/pull/36359 - https://github.com/apache/spark/pull/36579 ### Why are the changes needed? To improve code maintenance, and the process of code review. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By existing GAs. Closes #36621 from MaxGekk/update-error-class-guide. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../org/apache/spark/sql/errors/QueryErrorsBase.scala | 17 + 1 file changed, 17 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index ab1f8c57480..81c4d0ac408 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -23,6 +23,23 @@ import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.util.quoteIdentifier import org.apache.spark.sql.types.{DataType, DoubleType, FloatType} +/** + * The trait exposes util methods for preparing error messages such as quoting of error elements. + * All classes that extent `QueryErrorsBase` shall follow the rules: + * 1. Any values shall be outputted in the SQL standard style by using `toSQLValue()`. + * For example: 'a string value', 1, NULL. + * 2. SQL types shall be double quoted and outputted in the upper case using `toSQLType()`. + * For example: "INT", "DECIMAL(10,0)". + * 3. Elements of identifiers shall be wrapped by backticks by using `toSQLId()`. + * For example: `namespaceA`.`funcB`, `tableC`. + * 4. SQL statements shall be in the upper case prepared by using `toSQLStmt`. + * For example: DESC PARTITION, DROP TEMPORARY FUNCTION. + * 5. SQL configs and datasource options shall be wrapped by double quotes by using + * `toSQLConf()`/`toDSOption()`. + * For example: "spark.sql.ansi.enabled". + * 6. Any values of datasource options or SQL configs shall be double quoted. + * For example: "true", "CORRECTED". + */ trait QueryErrorsBase { // Converts an error class parameter to its SQL representation def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 49562f41678 [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression 49562f41678 is described below commit 49562f416788cab05b3f82a2471a1f2f6561a1d8 Author: panbingkun AuthorDate: Sat May 21 07:50:59 2022 +0300 [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression ### What changes were proposed in this pull request? In the PR, I propose to use the MULTI_VALUE_SUBQUERY_ERROR error classes for multiple rows from a subquery used as an expression. ### Why are the changes needed? Porting the executing errors for multiple rows from a subquery used as an expression to the new error framework should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added new test suite Closes #36580 from panbingkun/SPARK-39167. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 3 +++ .../spark/sql/errors/QueryExecutionErrors.scala| 5 .../org/apache/spark/sql/execution/subquery.scala | 5 ++-- .../scala/org/apache/spark/sql/SubquerySuite.scala | 11 - .../sql/errors/QueryExecutionErrorsSuite.scala | 27 ++ 5 files changed, 37 insertions(+), 14 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 1a139c018e8..f6fba105872 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -160,6 +160,9 @@ "MULTI_UDF_INTERFACE_ERROR" : { "message" : [ "Not allowed to implement multiple UDF interfaces, UDF class " ] }, + "MULTI_VALUE_SUBQUERY_ERROR" : { +"message" : [ "more than one row returned by a subquery used as an expression: " ] + }, "NON_LITERAL_PIVOT_VALUES" : { "message" : [ "Literal expressions required for pivot values, found ''" ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 1e664100545..f79b30f0d0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2005,4 +2005,9 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path), cause = null) } + + def multipleRowSubqueryError(plan: String): Throwable = { +new SparkException( + errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = Array(plan), cause = null) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala index 209b0f79243..c6f5983f243 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike} import org.apache.spark.sql.catalyst.trees.TreePattern._ +import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{BooleanType, DataType} @@ -79,9 +80,7 @@ case class ScalarSubquery( def updateResult(): Unit = { val rows = plan.executeCollect() if (rows.length > 1) { - // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery - throw new IllegalStateException( -s"more than one row returned by a subquery used as an expression:\n$plan") + throw QueryExecutionErrors.multipleRowSubqueryError(plan.toString) } if (rows.length == 1) { assert(rows(0).numFields == 1, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala index 396fca47634..500913fb289 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql import sc
[spark] branch master updated: [SPARK-39213][SQL] Create ANY_VALUE aggregate function
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new efc1e8ac8bc [SPARK-39213][SQL] Create ANY_VALUE aggregate function efc1e8ac8bc is described below commit efc1e8ac8bc61872601ac2244629a9d54f8889fb Author: Vitalii Li AuthorDate: Fri May 20 22:28:18 2022 +0300 [SPARK-39213][SQL] Create ANY_VALUE aggregate function ### What changes were proposed in this pull request? Adding implementation for ANY_VALUE aggregate function. During optimization stage it is rewritten to `First` aggregate function. ### Why are the changes needed? This feature provides feature parity with popular DBs and DWHs ### Does this PR introduce _any_ user-facing change? Yes - introducing new aggregate function `ANY_VALUE`. Respective documentation is updated. ### How was this patch tested? Unit tests Closes #36584 from vli-databricks/SPARK-39213. Authored-by: Vitalii Li Signed-off-by: Max Gekk --- docs/sql-ref-ansi-compliance.md| 1 + .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 3 + .../spark/sql/catalyst/analysis/Analyzer.scala | 1 + .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../catalyst/expressions/aggregate/AnyValue.scala | 64 +++ .../spark/sql/catalyst/parser/AstBuilder.scala | 10 +- .../spark/sql/catalyst/SQLKeywordSuite.scala | 2 +- .../expressions/aggregate/FirstLastTestSuite.scala | 4 + .../sql-functions/sql-expression-schema.md | 1 + .../resources/sql-tests/inputs/udf/udf-window.sql | 8 +- .../src/test/resources/sql-tests/inputs/window.sql | 29 +- .../sql-tests/results/udf/udf-window.sql.out | 46 +- .../resources/sql-tests/results/window.sql.out | 574 +++-- 14 files changed, 446 insertions(+), 299 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 257f53caef1..bb55cec52f5 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -346,6 +346,7 @@ Below is a list of all the keywords in Spark SQL. |AND|reserved|non-reserved|reserved| |ANTI|non-reserved|strict-non-reserved|non-reserved| |ANY|reserved|non-reserved|reserved| +|ANY_VALUE|non-reserved|non-reserved|non-reserved| |ARCHIVE|non-reserved|non-reserved|non-reserved| |ARRAY|non-reserved|non-reserved|reserved| |AS|reserved|non-reserved|reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index fac87c62de0..1cbd6d24dea 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -95,6 +95,7 @@ ANALYZE: 'ANALYZE'; AND: 'AND'; ANTI: 'ANTI'; ANY: 'ANY'; +ANY_VALUE: 'ANY_VALUE'; ARCHIVE: 'ARCHIVE'; ARRAY: 'ARRAY'; AS: 'AS'; diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index ed57e9062c1..ce37a09d5ba 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -824,6 +824,7 @@ primaryExpression | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first +| ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN#position | constant #constantDefault @@ -1072,6 +1073,7 @@ ansiNonReserved | ALTER | ANALYZE | ANTI +| ANY_VALUE | ARCHIVE | ARRAY | ASC @@ -1314,6 +1316,7 @@ nonReserved | ANALYZE | AND | ANY +| ANY_VALUE | ARCHIVE | ARRAY | AS diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4dd2081c67f..c5bee6f55fe 100644 --- a/sql/cata
[spark] branch master updated: [SPARK-39163][SQL] Throw an exception w/ error class for an invalid bucket file
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c6dccc7dd41 [SPARK-39163][SQL] Throw an exception w/ error class for an invalid bucket file c6dccc7dd41 is described below commit c6dccc7dd412a95007f5bb2584d69b85ff9ebf8e Author: panbingkun AuthorDate: Thu May 19 20:39:35 2022 +0300 [SPARK-39163][SQL] Throw an exception w/ error class for an invalid bucket file ### What changes were proposed in this pull request? In the PR, I propose to use the INVALID_BUCKET_FILE error classes for an invalid bucket file. ### Why are the changes needed? Porting the executing errors for multiple rows from a subquery used as an expression to the new error framework should improve user experience with Spark SQL. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #36603 from panbingkun/SPARK-39163. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 3 +++ .../spark/sql/errors/QueryExecutionErrors.scala| 5 .../spark/sql/execution/DataSourceScanExec.scala | 4 ++-- .../sql/errors/QueryExecutionErrorsSuite.scala | 28 -- .../adaptive/AdaptiveQueryExecSuite.scala | 6 ++--- .../spark/sql/sources/BucketedReadSuite.scala | 23 -- 6 files changed, 38 insertions(+), 31 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e4ee09ea8a7..1a139c018e8 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -115,6 +115,9 @@ "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { "message" : [ "The index is out of bounds. The array has elements. To return NULL instead, use `try_element_at`. If necessary set to \"false\" to bypass this error." ] }, + "INVALID_BUCKET_FILE" : { +"message" : [ "Invalid bucket file: " ] + }, "INVALID_FIELD_NAME" : { "message" : [ "Field name is invalid: is not a struct." ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index a155b0694b5..1e664100545 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2000,4 +2000,9 @@ object QueryExecutionErrors extends QueryErrorsBase { s"add ${toSQLValue(amount, IntegerType)} $unit to " + s"${toSQLValue(DateTimeUtils.microsToInstant(micros), TimestampType)}")) } + + def invalidBucketFile(path: String): Throwable = { +new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path), + cause = null) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index f7b627cef08..f5d349d975f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning} import org.apache.spark.sql.catalyst.util.{truncatedString, CaseInsensitiveMap} +import org.apache.spark.sql.errors.QueryExecutionErrors import org.apache.spark.sql.execution.datasources._ import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat => ParquetSource} import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators @@ -618,8 +619,7 @@ case class FileSourceScanExec( }.groupBy { f => BucketingUtils .getBucketId(new Path(f.filePath).getName) - // TODO(SPARK-39163): Throw an exception w/ error class for an invalid bucket file - .getOrElse(throw new IllegalStateException(s"Invalid bucket file ${f.filePath}")) + .getOrElse(throw QueryExecutionErrors.invalidBucketFile(f.filePath)) } val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala inde
[spark] branch master updated: [SPARK-39234][SQL] Code clean up in SparkThrowableHelper.getMessage
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new ebd916b0054 [SPARK-39234][SQL] Code clean up in SparkThrowableHelper.getMessage ebd916b0054 is described below commit ebd916b005499c724bbec54b3df85cd28a864e03 Author: Gengliang Wang AuthorDate: Thu May 19 19:13:15 2022 +0300 [SPARK-39234][SQL] Code clean up in SparkThrowableHelper.getMessage ### What changes were proposed in this pull request? 1. Remove the starting "\n" in `Origin.context`. The "\n" will be append in the method `SparkThrowableHelper.getMessage` instead. 2. Code clean up the method SparkThrowableHelper.getMessage to eliminate redundant code. ### Why are the changes needed? Code clean up to eliminate redundant code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #36612 from gengliangwang/moveNewLine. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../src/main/scala/org/apache/spark/ErrorInfo.scala | 21 + .../apache/spark/sql/catalyst/trees/TreeNode.scala | 2 +- .../spark/sql/catalyst/trees/TreeNodeSuite.scala| 3 +-- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index e11e6485851..4639e56aa50 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -77,20 +77,25 @@ private[spark] object SparkThrowableHelper { queryContext: String = ""): String = { val errorInfo = errorClassToInfoMap.getOrElse(errorClass, throw new IllegalArgumentException(s"Cannot find error class '$errorClass'")) -if (errorInfo.subClass.isDefined) { +val (displayClass, displayMessageParameters, displayFormat) = if (errorInfo.subClass.isEmpty) { + (errorClass, messageParameters, errorInfo.messageFormat) +} else { val subClass = errorInfo.subClass.get val subErrorClass = messageParameters.head val errorSubInfo = subClass.getOrElse(subErrorClass, throw new IllegalArgumentException(s"Cannot find sub error class '$subErrorClass'")) - val subMessageParameters = messageParameters.tail - "[" + errorClass + "." + subErrorClass + "] " + String.format((errorInfo.messageFormat + -errorSubInfo.messageFormat).replaceAll("<[a-zA-Z0-9_-]+>", "%s"), -subMessageParameters: _*) + queryContext + (errorClass + "." + subErrorClass, messageParameters.tail, +errorInfo.messageFormat + errorSubInfo.messageFormat) +} +val displayMessage = String.format( + displayFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), + displayMessageParameters : _*) +val displayQueryContext = if (queryContext.isEmpty) { + "" } else { - "[" + errorClass + "] " + String.format( -errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), -messageParameters: _*) + queryContext + s"\n$queryContext" } +s"[$displayClass] $displayMessage$displayQueryContext" } def getSqlState(errorClass: String): String = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 0714898e19d..54c64515ee4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -89,7 +89,7 @@ case class Origin( "" } val builder = new StringBuilder - builder ++= s"\n== SQL$objectContext$positionContext ==\n" + builder ++= s"== SQL$objectContext$positionContext ==\n" val text = sqlText.get val start = math.max(startIndex.get, 0) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index ffbc5d89bdb..899a740bdae 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -876,8 +876,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { objectType = Some("VIEW"), objectName = Some("some_view&
[spark] branch master updated: [SPARK-37939][SQL] Use error classes in the parsing errors of properties
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7309e76d8b9 [SPARK-37939][SQL] Use error classes in the parsing errors of properties 7309e76d8b9 is described below commit 7309e76d8b95e306d6f3d2f611316b748949e9cf Author: panbingkun AuthorDate: Thu May 19 11:29:37 2022 +0300 [SPARK-37939][SQL] Use error classes in the parsing errors of properties ## What changes were proposed in this pull request? Migrate the following errors in QueryParsingErrors onto use error classes: - cannotCleanReservedNamespacePropertyError => UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY - cannotCleanReservedTablePropertyError => UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY - invalidPropertyKeyForSetQuotedConfigurationError => INVALID_PROPERTY_KEY - invalidPropertyValueForSetQuotedConfigurationError => INVALID_PROPERTY_VALUE - propertiesAndDbPropertiesBothSpecifiedError => UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES ### Why are the changes needed? Porting parsing errors of partitions to new error framework, improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryParsingErrorsSuite*" ``` Closes #36561 from panbingkun/SPARK-37939. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 15 .../spark/sql/errors/QueryParsingErrors.scala | 28 +-- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 88 ++ .../spark/sql/execution/SparkSqlParserSuite.scala | 6 +- .../command/CreateNamespaceParserSuite.scala | 3 +- 5 files changed, 129 insertions(+), 11 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 21fde82adbb..e4ee09ea8a7 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -133,6 +133,12 @@ "message" : [ "The value of parameter(s) '' in is invalid: " ], "sqlState" : "22023" }, + "INVALID_PROPERTY_KEY" : { +"message" : [ " is an invalid property key, please use quotes, e.g. SET =" ] + }, + "INVALID_PROPERTY_VALUE" : { +"message" : [ " is an invalid property value, please use quotes, e.g. SET =" ] + }, "INVALID_SQL_SYNTAX" : { "message" : [ "Invalid SQL syntax: " ], "sqlState" : "42000" @@ -262,6 +268,15 @@ "REPEATED_PIVOT" : { "message" : [ "Repeated PIVOT operation." ] }, + "SET_NAMESPACE_PROPERTY" : { +"message" : [ " is a reserved namespace property, ." ] + }, + "SET_PROPERTIES_AND_DBPROPERTIES" : { +"message" : [ "set PROPERTIES and DBPROPERTIES at the same time." ] + }, + "SET_TABLE_PROPERTY" : { +"message" : [ " is a reserved table property, ." ] + }, "TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS" : { "message" : [ "UDF class with type arguments." ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index debfe1b0891..8fa28c0d347 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -267,16 +267,26 @@ object QueryParsingErrors extends QueryErrorsBase { def cannotCleanReservedNamespacePropertyError( property: String, ctx: ParserRuleContext, msg: String): Throwable = { -new ParseException(s"$property is a reserved namespace property, $msg.", ctx) +new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array("SET_NAMESPACE_PROPERTY", property, msg), + ctx) } def propertiesAndDbPropertiesBothSpecifiedError(ctx: CreateNamespaceContext): Throwable = { -new ParseException("Either PROPERTIES or DBPROPERTIES is allowed.", ctx) +new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array("SET_PROPERTIES_AND_DBPROPERTIES"), + ctx +) } def cannotCleanReservedTablePropertyError( property: String, ctx:
[spark] branch master updated: [SPARK-39229][SQL] Separate query contexts from error-classes.json
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3c74aed2cbd [SPARK-39229][SQL] Separate query contexts from error-classes.json 3c74aed2cbd is described below commit 3c74aed2cbde2968fab93b2799a56d075420e7d3 Author: Gengliang Wang AuthorDate: Thu May 19 11:00:16 2022 +0300 [SPARK-39229][SQL] Separate query contexts from error-classes.json ### What changes were proposed in this pull request? Separate query contexts for runtime errors from error-classes.json. ### Why are the changes needed? The message is JSON should only contain parameters explicitly thrown. It is more elegant to separate query contexts from error-classes.json. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #36604 from gengliangwang/refactorErrorClass. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../apache/spark/memory/SparkOutOfMemoryError.java | 2 +- core/src/main/resources/error/error-classes.json | 10 +++ .../main/scala/org/apache/spark/ErrorInfo.scala| 9 -- .../scala/org/apache/spark/SparkException.scala| 34 +++--- .../org/apache/spark/SparkThrowableSuite.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala| 29 +- 6 files changed, 53 insertions(+), 33 deletions(-) diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java index 22dfe4d4dbe..c5f19a0c201 100644 --- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java +++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java @@ -39,7 +39,7 @@ public final class SparkOutOfMemoryError extends OutOfMemoryError implements Spa } public SparkOutOfMemoryError(String errorClass, String[] messageParameters) { -super(SparkThrowableHelper.getMessage(errorClass, messageParameters)); +super(SparkThrowableHelper.getMessage(errorClass, messageParameters, "")); this.errorClass = errorClass; this.messageParameters = messageParameters; } diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index f4eadd4a368..21fde82adbb 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -4,7 +4,7 @@ "sqlState" : "42000" }, "ARITHMETIC_OVERFLOW" : { -"message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], +"message" : [ ". If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22003" }, "CANNOT_CAST_DATATYPE" : { @@ -12,7 +12,7 @@ "sqlState" : "22005" }, "CANNOT_CHANGE_DECIMAL_PRECISION" : { -"message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error." ], +"message" : [ " cannot be represented as Decimal(, ). If necessary set to \"false\" to bypass this error." ], "sqlState" : "22005" }, "CANNOT_PARSE_DECIMAL" : { @@ -23,7 +23,7 @@ "message" : [ "Cannot up cast from to .\n" ] }, "CAST_INVALID_INPUT" : { -"message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], +"message" : [ "The value of the type cannot be cast to because it is malformed. To return NULL instead, use `try_cast`. If necessary set to \"false\" to bypass this error." ], "sqlState" : "42000" }, "CAST_OVERFLOW" : { @@ -38,7 +38,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { -"message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], +"message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to \"false\" (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -138,7 +138,7 @@ "sqlState" : "42000" }, &
[spark] branch branch-3.3 updated (b5ce32f41f9 -> 47c47b6e864)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from b5ce32f41f9 [SPARK-39162][SQL][3.3] Jdbc dialect should decide which function could be pushed down add 47c47b6e864 [SPARK-39214][SQL][3.3] Improve errors related to CAST No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 12 +-- .../spark/sql/catalyst/expressions/Cast.scala | 8 +- .../spark/sql/catalyst/util/DateTimeUtils.scala| 8 +- .../spark/sql/catalyst/util/UTF8StringUtils.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala| 62 .../scala/org/apache/spark/sql/types/Decimal.scala | 2 +- .../catalyst/expressions/AnsiCastSuiteBase.scala | 104 + .../spark/sql/catalyst/expressions/CastSuite.scala | 70 +++--- .../sql/catalyst/util/DateFormatterSuite.scala | 2 +- .../catalyst/util/TimestampFormatterSuite.scala| 3 +- .../org/apache/spark/sql/types/DecimalSuite.scala | 4 +- .../resources/sql-tests/results/ansi/cast.sql.out | 82 .../resources/sql-tests/results/ansi/date.sql.out | 8 +- .../results/ansi/datetime-parsing-invalid.sql.out | 8 +- .../sql-tests/results/ansi/interval.sql.out| 28 +++--- .../results/ansi/string-functions.sql.out | 8 +- .../sql-tests/results/postgreSQL/boolean.sql.out | 62 ++-- .../sql-tests/results/postgreSQL/float4.sql.out| 14 +-- .../sql-tests/results/postgreSQL/float8.sql.out| 10 +- .../sql-tests/results/postgreSQL/int8.sql.out | 8 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out| 2 +- .../results/postgreSQL/window_part3.sql.out| 2 +- .../results/postgreSQL/window_part4.sql.out| 2 +- .../results/timestampNTZ/timestamp-ansi.sql.out| 4 +- .../org/apache/spark/sql/SQLInsertTestSuite.scala | 3 +- .../org/apache/spark/sql/sources/InsertSuite.scala | 12 ++- 27 files changed, 294 insertions(+), 240 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (30cf796bdb0 -> 96f4b7dbc1f)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 30cf796bdb0 [BUILD] When building spark project, remove spark-tags-tests.jar from… add 96f4b7dbc1f [SPARK-39212][SQL] Use double quotes for values of SQL configs/DS options in error messages No new revisions were added by this update. Summary of changes: core/src/main/resources/error/error-classes.json | 30 +- .../org/apache/spark/SparkThrowableSuite.scala | 2 +- .../apache/spark/sql/errors/QueryErrorsBase.scala | 4 ++ .../spark/sql/errors/QueryExecutionErrors.scala| 12 ++-- .../resources/sql-tests/results/ansi/array.sql.out | 24 .../resources/sql-tests/results/ansi/cast.sql.out | 70 +++--- .../resources/sql-tests/results/ansi/date.sql.out | 12 ++-- .../results/ansi/datetime-parsing-invalid.sql.out | 20 +++ .../ansi/decimalArithmeticOperations.sql.out | 8 +-- .../sql-tests/results/ansi/interval.sql.out| 40 ++--- .../resources/sql-tests/results/ansi/map.sql.out | 8 +-- .../results/ansi/string-functions.sql.out | 8 +-- .../sql-tests/results/ansi/timestamp.sql.out | 14 ++--- .../test/resources/sql-tests/results/date.sql.out | 6 +- .../results/datetime-formatting-invalid.sql.out| 44 +++--- .../results/datetime-parsing-invalid.sql.out | 16 ++--- .../resources/sql-tests/results/interval.sql.out | 18 +++--- .../sql-tests/results/json-functions.sql.out | 4 +- .../sql-tests/results/postgreSQL/boolean.sql.out | 32 +- .../sql-tests/results/postgreSQL/float4.sql.out| 14 ++--- .../sql-tests/results/postgreSQL/float8.sql.out| 10 ++-- .../sql-tests/results/postgreSQL/int4.sql.out | 12 ++-- .../sql-tests/results/postgreSQL/int8.sql.out | 22 +++ .../results/postgreSQL/select_having.sql.out | 2 +- .../sql-tests/results/postgreSQL/text.sql.out | 4 +- .../results/postgreSQL/window_part2.sql.out| 6 +- .../results/postgreSQL/window_part3.sql.out| 2 +- .../results/postgreSQL/window_part4.sql.out| 2 +- .../resources/sql-tests/results/timestamp.sql.out | 12 ++-- .../results/timestampNTZ/timestamp-ansi.sql.out| 6 +- .../results/timestampNTZ/timestamp.sql.out | 2 +- .../native/stringCastAndExpressions.sql.out| 6 +- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- .../sql/errors/QueryCompilationErrorsSuite.scala | 2 +- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 18 +++--- .../sql/errors/QueryExecutionErrorsSuite.scala | 12 ++-- 36 files changed, 257 insertions(+), 249 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54558 - in /dev/spark/v3.3.0-rc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: maxgekk Date: Mon May 16 09:33:34 2022 New Revision: 54558 Log: Apache Spark v3.3.0-rc2 docs [This commit notification would consist of 2650 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54555 - /dev/spark/v3.3.0-rc2-bin/
Author: maxgekk Date: Mon May 16 08:58:23 2022 New Revision: 54555 Log: Apache Spark v3.3.0-rc2 Added: dev/spark/v3.3.0-rc2-bin/ dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc Mon May 16 08:58:23 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKCEcwTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0/ZYEACat+N6zgwj76NfgHJJcbtBc5mhIw7y +G1UOmOTZAkKp1Q0J9pXCmhRdC5jhJnIDCXhFEvLAUDTS8HpmnTyFAs2kCSxZazn7 +AIkqWoX3VYAYa3OKcvnKjosOwuI5FqI9RDmKCi9Al53eSfY5W7D/sgAdKtfRFPom +F3F/piqTr1z0OdaWvcsNh4VCO/gFNw8SrA9npNxtMoRsRgEe6PaOEruHGDjzVBBD +nYxC+9NbIH24y+hfVR1aP4o7uv3n+th7s+kHnfNXcORz1bf6udFB2iNlgavQRbsl +mehJcLcYCcEaWa3QVChK6fFUyiowFtswKqJEj/vP4SOf7uRICEOI5eA8SrNCG1FT +4ftApO/yqQeeRCMfZziflWEQJN0ZQSkAs0MAADkxeOTaQqYwXIBAT9Vl90Kjj2mE +sux18CTfj505k3DzN4T60DSA4bcUpaaRWgH+CatRskwomHzfVvB3EsM+Os2Kcl4O +WXrv90VCyIDIZWqb6UKkIEqqvTsx5TVec7jJk63vp0TX+toszfap49Gim3HDpUIw +v9q8EiYxJD1MPgM5WLW8RNX9gD7sDh46DxflZFovtyWq3j96k++Nku3ehE3tq/Md +pPo+9qBgOkZFtyV4U6FXrnwygoH36YoFHgBcRSLr2J6v3mcHOBSsYuHQkGhEIk7B +8YdA3Sa7fiDvTA== +=t6GN +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 Mon May 16 08:58:23 2022 @@ -0,0 +1,3 @@ +SparkR_3.3.0.tar.gz: EFB28305 D8B97FB0 50F0EF30 680C259D AA5039E1 99F1EC09 + 08FAC78A B5E41D91 C8621784 CEB78BC2 BCDCAE25 915BAB0C + B8E1AF82 05007C36 997F48F0 FD933E4C Added: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc Mon May 16 08:58:23 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKCEc4THG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0336D/9FbAXNRXFgl/I9YoHFS5Ci4fvZJN9F +TR2UGv6zX6T5oEEZfAwYHOvf/4hp7Ob4Oy8Yyor5DjTJGizkpGCa9hLJ+PbOpKBK +HuVeLjG/gS79euEeMYkHmyDWEnvwB96dH1FfbM/H/9bejBwNBaYxsW0G3TqJRSmC +oyka0xgAK4e2CDPB9Ks/j59qn0NobyhtLeJCdgXDW/TX/yPWs0NO4zKpmWXiozct +3Yb6OTa5TOPUNjehpYQxh5yOgzLRsNgNQYindil48nQO9cK0t0L6v7Rhs8YN3LAC +oqdWkU97eQLm0e/L7QuThH6oSUKZg65PfaRN7Z2P8isoo+pZxXfmnSkn81VmiFUz +y2e9Goe03k15IOEi3PWmh/ypBotgNAz4eKGHUFrbWb4VszH5uaf8HapvYihfMw/0 +HFPFqtuvxDmq6ySppAAfZ+cEnQ13+2OaTZkS9m0LxQOWtOTkZQMedeoAIxNnglXR +gir73fiN4KQ/QmaM/TLiFGjEgtRwFKjaCvrL6H9Ocb0/ijsi2paVn+AlGdsSoc39 +7ujWAi2STLe5By9+GObliZhkWzxoiQPY06xGuzLhXrSaa1PLL/oeqaB15+hh02jE +gmyIqgHyqwDPHHL3kis4qQ4ylpMSCVTI4OcScVOOzg3/YD69rQV45SJv2+/9RyJt +mmh9rbTmwSSEPQ
[spark] branch branch-3.3 updated: [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 1853eb117e2 [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException` 1853eb117e2 is described below commit 1853eb117e24bcc0509d275c4caca6c033bf0ab9 Author: Max Gekk AuthorDate: Mon May 16 11:39:37 2022 +0300 [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException` ### What changes were proposed in this pull request? Remove `SparkIllegalStateException` and replace it by `IllegalStateException` where it was used. This is a backport of https://github.com/apache/spark/pull/36550. ### Why are the changes needed? To improve code maintenance and be consistent to other places where `IllegalStateException` is used in illegal states (for instance, see https://github.com/apache/spark/pull/36524). After the PR https://github.com/apache/spark/pull/36500, the exception is substituted by `SparkException` w/ the `INTERNAL_ERROR` error class. ### Does this PR introduce _any_ user-facing change? No. Users shouldn't face to the exception in regular cases. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/test:testOnly *QueryExecutionErrorsSuite*" $ build/sbt "test:testOnly *ArrowUtilsSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 1a90512f605c490255f7b38215c207e64621475b) Signed-off-by: Max Gekk Closes #36558 from MaxGekk/remove-SparkIllegalStateException-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/scala/org/apache/spark/SparkException.scala | 12 .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala | 6 +++--- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 11 +++ .../main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 9 +++-- .../scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala | 4 ++-- .../spark/sql/errors/QueryExecutionErrorsSuite.scala | 14 -- 6 files changed, 11 insertions(+), 45 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index 8442c8eb8d3..ed6e811a4cc 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -158,18 +158,6 @@ private[spark] class SparkFileAlreadyExistsException( override def getErrorClass: String = errorClass } -/** - * Illegal state exception thrown from Spark with an error class. - */ -private[spark] class SparkIllegalStateException( -errorClass: String, -messageParameters: Array[String]) - extends IllegalStateException( -SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { - - override def getErrorClass: String = errorClass -} - /** * File not found exception thrown from Spark with an error class. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index ff40272682e..f89fbe59af6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement} -import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -571,8 +571,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { |in operator ${operator.simpleString(SQLConf.get.maxToStringFields)} """.stripMargin) - case _: UnresolvedHint => -throw QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError + case _: UnresolvedHint => throw new IllegalStateException( +"Logical hint operator should be removed during analysis.") case f @ Filter(condition, _) if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/error
[spark] branch branch-3.3 updated (386c75693b5 -> af38fce62da)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 386c75693b5 [SPARK-39186][PYTHON] Make pandas-on-Spark's skew consistent with pandas add c8c657b922a Preparing Spark release v3.3.0-rc2 new af38fce62da Preparing development version 3.3.1-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit af38fce62da393ff0b56662be050b46de115a89f Author: Maxim Gekk AuthorDate: Mon May 16 05:42:35 2022 + Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87..0e449e841cf 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b..d12f2ad73fa 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e..842d63f5d38 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d75..f7d187bf952 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e7747..53f38df8851 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e..845f6659407 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370a..8e159089193 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3..1987c133285 100644
[spark] tag v3.3.0-rc2 created (now c8c657b922a)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to tag v3.3.0-rc2 in repository https://gitbox.apache.org/repos/asf/spark.git at c8c657b922a (commit) This tag includes the following new commits: new c8c657b922a Preparing Spark release v3.3.0-rc2 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v3.3.0-rc2
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to tag v3.3.0-rc2 in repository https://gitbox.apache.org/repos/asf/spark.git commit c8c657b922ac8fd8dcf9553113e11a80079db059 Author: Maxim Gekk AuthorDate: Mon May 16 05:42:28 2022 + Preparing Spark release v3.3.0-rc2 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 0e449e841cf..9479bb3bf87 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.1 +Version: 3.3.0 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index d12f2ad73fa..2e9c4d9960b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 842d63f5d38..2a9acfa335e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f7d187bf952..7b17e625d75 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 53f38df8851..c5c920e7747 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 845f6659407..697b5a3928e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 8e159089193..ad2db11370a 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.1-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1987c133285..1a7bdee70f3 100644 --- a/common/tags
[spark] branch master updated: [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 66c6e19aad1 [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer 66c6e19aad1 is described below commit 66c6e19aad1e42d404b70b7dcddf871f28c3774f Author: panbingkun AuthorDate: Mon May 16 08:31:16 2022 +0300 [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer ### What changes were proposed in this pull request? Migrate the following errors in QueryCompilationErrors: * dataTypeMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH * fieldNumberMismatchForDeserializerError -> UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH ### Why are the changes needed? Porting compilation errors of unsupported deserializer to new error framework. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new UT. Closes #36479 from panbingkun/SPARK-38688. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 ++ .../spark/sql/errors/QueryCompilationErrors.scala | 9 +++-- .../apache/spark/sql/errors/QueryErrorsBase.scala | 4 +++ .../catalyst/encoders/EncoderResolutionSuite.scala | 26 -- .../scala/org/apache/spark/sql/DatasetSuite.scala | 18 -- .../sql/errors/QueryCompilationErrorsSuite.scala | 40 +- 6 files changed, 76 insertions(+), 32 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 3a7bc757f73..f401ea8d29a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -200,6 +200,17 @@ "message" : [ "Unsupported data type " ], "sqlState" : "0A000" }, + "UNSUPPORTED_DESERIALIZER" : { +"message" : [ "The deserializer is not supported: " ], +"subClass" : { + "DATA_TYPE_MISMATCH" : { +"message" : [ "need field but got ." ] + }, + "FIELD_NUMBER_MISMATCH" : { +"message" : [ "try to map to Tuple, but failed as the number of fields does not line up." ] + } +} + }, "UNSUPPORTED_FEATURE" : { "message" : [ "The feature is not supported: " ], "subClass" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index efb4389ec50..d803cd23df6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -147,14 +147,17 @@ object QueryCompilationErrors extends QueryErrorsBase { dataType: DataType, desiredType: String): Throwable = { val quantifier = if (desiredType.equals("array")) "an" else "a" new AnalysisException( - s"need $quantifier $desiredType field but got " + dataType.catalogString) + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = +Array("DATA_TYPE_MISMATCH", quantifier, toSQLType(desiredType), toSQLType(dataType))) } def fieldNumberMismatchForDeserializerError( schema: StructType, maxOrdinal: Int): Throwable = { new AnalysisException( - s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " + -"but failed as the number of fields does not line up.") + errorClass = "UNSUPPORTED_DESERIALIZER", + messageParameters = +Array("FIELD_NUMBER_MISMATCH", toSQLType(schema), (maxOrdinal + 1).toString)) } def upCastFailureError( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala index d51ee13acef..b47b9f12fb1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala @@ -60,6 +60,10 @@ trait QueryErrorsBase { quoteByDefault(t.sql) } + def toSQLType(text: String): String = { +quoteByDefault(text.toUpperCase(Locale.ROOT)) + } + def toSQLConf(conf: String): String = { quoteByDefault(conf) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/sr
[spark] branch master updated: [SPARK-39187][SQL] Remove `SparkIllegalStateException`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1a90512f605 [SPARK-39187][SQL] Remove `SparkIllegalStateException` 1a90512f605 is described below commit 1a90512f605c490255f7b38215c207e64621475b Author: Max Gekk AuthorDate: Mon May 16 08:24:12 2022 +0300 [SPARK-39187][SQL] Remove `SparkIllegalStateException` ### What changes were proposed in this pull request? Remove `SparkIllegalStateException` and replace it by `IllegalStateException` where it was used. ### Why are the changes needed? To improve code maintenance and be consistent to other places where `IllegalStateException` is used in illegal states (for instance, see https://github.com/apache/spark/pull/36524). After the PR https://github.com/apache/spark/pull/36500, the exception is substituted by `SparkException` w/ the `INTERNAL_ERROR` error class. ### Does this PR introduce _any_ user-facing change? No. Users shouldn't face to the exception in regular cases. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "sql/test:testOnly *QueryExecutionErrorsSuite*" $ build/sbt "test:testOnly *ArrowUtilsSuite" ``` Closes #36550 from MaxGekk/remove-SparkIllegalStateException. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/SparkException.scala | 12 .../spark/sql/catalyst/analysis/CheckAnalysis.scala| 6 +++--- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 16 +++- .../scala/org/apache/spark/sql/util/ArrowUtils.scala | 9 +++-- .../org/apache/spark/sql/util/ArrowUtilsSuite.scala| 2 +- .../spark/sql/errors/QueryExecutionErrorsSuite.scala | 18 -- 6 files changed, 14 insertions(+), 49 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala index a846e6c46a2..4feea6151b9 100644 --- a/core/src/main/scala/org/apache/spark/SparkException.scala +++ b/core/src/main/scala/org/apache/spark/SparkException.scala @@ -151,18 +151,6 @@ private[spark] class SparkFileAlreadyExistsException( override def getErrorClass: String = errorClass } -/** - * Illegal state exception thrown from Spark with an error class. - */ -private[spark] class SparkIllegalStateException( -errorClass: String, -messageParameters: Array[String]) - extends IllegalStateException( -SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable { - - override def getErrorClass: String = errorClass -} - /** * File not found exception thrown from Spark with an error class. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 1e9c431292b..f827e9effe9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreeNodeTag import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, TypeUtils} import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement} -import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.util.SchemaUtils @@ -582,8 +582,8 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { |in operator ${operator.simpleString(SQLConf.get.maxToStringFields)} """.stripMargin) - case _: UnresolvedHint => -throw QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError + case _: UnresolvedHint => throw new IllegalStateException( +"Logical hint operator should be removed during analysis.") case f @ Filter(condition, _) if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 7ed4fc3574d..b7239d3ff60 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -34,7 +34,7 @@
[spark] branch master updated: [SPARK-38739][SQL][TESTS] Test the error class: INVALID_SYNTAX_FOR_CAST
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 7221ea31b6b [SPARK-38739][SQL][TESTS] Test the error class: INVALID_SYNTAX_FOR_CAST 7221ea31b6b is described below commit 7221ea31b6bbad0d87b22e5413b8979bee56321c Author: panbingkun AuthorDate: Fri May 13 23:20:42 2022 +0300 [SPARK-38739][SQL][TESTS] Test the error class: INVALID_SYNTAX_FOR_CAST ## What changes were proposed in this pull request? This PR aims to add a test for the error class INVALID_SYNTAX_FOR_CAST to `QueryExecutionErrors`. Also the method `invalidInputSyntaxForNumericError` is removed as no longer used. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? By running new test: ``` $ build/sbt "test:testOnly *QueryExecutionAnsiErrorsSuite" ``` Closes #36493 from panbingkun/SPARK-38739. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 9 + .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 17 - 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 447a820a128..e687417d7cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -115,17 +115,10 @@ object QueryExecutionErrors extends QueryErrorsBase { context)) } - def invalidInputSyntaxForNumericError( - e: NumberFormatException, - errorContext: String): NumberFormatException = { -new NumberFormatException(s"${e.getMessage}. To return NULL instead, use 'try_cast'. " + - s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error." + errorContext) - } - def invalidInputSyntaxForNumericError( to: DataType, s: UTF8String, - errorContext: String): NumberFormatException = { + errorContext: String): SparkNumberFormatException = { new SparkNumberFormatException(errorClass = "INVALID_SYNTAX_FOR_CAST", messageParameters = Array(toSQLType(to), toSQLValue(s, StringType), SQLConf.ANSI_ENABLED.key, errorContext)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala index 78b78f99ab0..8aef4c6f345 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.errors -import org.apache.spark.{SparkArithmeticException, SparkArrayIndexOutOfBoundsException, SparkConf, SparkDateTimeException, SparkNoSuchElementException} +import org.apache.spark.{SparkArithmeticException, SparkArrayIndexOutOfBoundsException, SparkConf, SparkDateTimeException, SparkNoSuchElementException, SparkNumberFormatException} import org.apache.spark.sql.QueryTest import org.apache.spark.sql.internal.SQLConf @@ -124,4 +124,19 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with QueryErrorsSuiteBase |""".stripMargin ) } + + test("INVALID_SYNTAX_FOR_CAST: cast string to double") { +checkErrorClass( + exception = intercept[SparkNumberFormatException] { +sql("select CAST('xe23' AS DOUBLE)").collect() + }, + errorClass = "INVALID_SYNTAX_FOR_CAST", + msg = """Invalid input syntax for type "DOUBLE": 'xe23'. """ + +"""To return NULL instead, use 'try_cast'. If necessary set """ + +"""spark.sql.ansi.enabled to false to bypass this error. + |== SQL(line 1, position 7) == + |select CAST('xe23' AS DOUBLE) + | ^^ + |""".stripMargin) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38751][SQL][TESTS] Test the error class: UNRECOGNIZED_SQL_TYPE
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bbf3a2eafa0 [SPARK-38751][SQL][TESTS] Test the error class: UNRECOGNIZED_SQL_TYPE bbf3a2eafa0 is described below commit bbf3a2eafa004f712799261ef883dcc457a072fd Author: panbingkun AuthorDate: Fri May 13 19:29:02 2022 +0300 [SPARK-38751][SQL][TESTS] Test the error class: UNRECOGNIZED_SQL_TYPE ## What changes were proposed in this pull request? This PR aims to add a test for the error class UNRECOGNIZED_SQL_TYPE to `QueryExecutionErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*" ``` Closes #36463 from panbingkun/SPARK-38751. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../sql/errors/QueryExecutionErrorsSuite.scala | 89 +- 1 file changed, 86 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 7a5592c148a..cf1551298a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -19,23 +19,27 @@ package org.apache.spark.sql.errors import java.io.IOException import java.net.URL -import java.util.{Locale, ServiceConfigurationError} +import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, ResultSetMetaData} +import java.util.{Locale, Properties, ServiceConfigurationError} import org.apache.hadoop.fs.{LocalFileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission +import org.mockito.Mockito.{mock, when} import test.org.apache.spark.sql.connector.JavaSimpleWritableDataSource -import org.apache.spark.{SparkArithmeticException, SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, SparkUnsupportedOperationException, SparkUpgradeException} +import org.apache.spark.{SparkArithmeticException, SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, SparkSQLException, SparkUnsupportedOperationException, SparkUpgradeException} import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode} import org.apache.spark.sql.catalyst.util.BadRecordException import org.apache.spark.sql.connector.SimpleWritableDataSource import org.apache.spark.sql.execution.QueryExecutionException +import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions} import org.apache.spark.sql.execution.datasources.orc.OrcTest import org.apache.spark.sql.execution.datasources.parquet.ParquetTest import org.apache.spark.sql.functions.{lit, lower, struct, sum, udf} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION -import org.apache.spark.sql.types.{DecimalType, StructType, TimestampType} +import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects} +import org.apache.spark.sql.types.{DataType, DecimalType, MetadataBuilder, StructType, TimestampType} import org.apache.spark.sql.util.ArrowUtils import org.apache.spark.util.Utils @@ -514,6 +518,85 @@ class QueryExecutionErrorsSuite "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister") } } + + test("UNRECOGNIZED_SQL_TYPE: unrecognized SQL type -100") { +Utils.classForName("org.h2.Driver") + +val properties = new Properties() +properties.setProperty("user", "testUser") +properties.setProperty("password", "testPass") + +val url = "jdbc:h2:mem:testdb0" +val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass" +val tableName = "test.table1" +val unrecognizedColumnType = -100 + +var conn: java.sql.Connection = null +try { + conn = DriverManager.getConnection(url, properties) + conn.prepareStatement("create schema test").executeUpdate() + conn.commit() + + conn.prepareStatement(s"create table $tableName (a INT)").executeUpdate() + conn.prepareStatement( +s"insert into $tableName values (1)").executeUpdate() + conn.commit() +} finally { + if (null != conn) { +
[spark] branch branch-3.3 updated: [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new e743e68ce62 [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack e743e68ce62 is described below commit e743e68ce62e18ced6c49a22f5d101c72b7bfbe2 Author: Angerszh AuthorDate: Fri May 13 16:47:11 2022 +0300 [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack ### What changes were proposed in this pull request? Our user meet an case when running broadcast, throw `SparkFatalException`, but in error stack, it don't show the error case. ### Why are the changes needed? Make exception more clear ### Does this PR introduce _any_ user-facing change? User can got root cause when application throw `SparkFatalException`. ### How was this patch tested? For ut ``` test("") { throw new SparkFatalException( new OutOfMemoryError("Not enough memory to build and broadcast the table to all " + "worker nodes. As a workaround, you can either disable broadcast by setting " + s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value.") .initCause(null)) } ``` Before this pr: ``` [info] org.apache.spark.util.SparkFatalException: [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:431) [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232) [info] at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563) [info] at org.scalatest.Suite.run(Suite.scala:1112) ``` After this pr: ``` [info] org.apache.spark.util.SparkFatalException: java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting driver memory by setting spark.driver.memory to a higher value. [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest
[spark] branch master updated: [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d7317b03e97 [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack d7317b03e97 is described below commit d7317b03e975f8dc1a8c276dd0a931e00c478717 Author: Angerszh AuthorDate: Fri May 13 16:47:11 2022 +0300 [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack ### What changes were proposed in this pull request? Our user meet an case when running broadcast, throw `SparkFatalException`, but in error stack, it don't show the error case. ### Why are the changes needed? Make exception more clear ### Does this PR introduce _any_ user-facing change? User can got root cause when application throw `SparkFatalException`. ### How was this patch tested? For ut ``` test("") { throw new SparkFatalException( new OutOfMemoryError("Not enough memory to build and broadcast the table to all " + "worker nodes. As a workaround, you can either disable broadcast by setting " + s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value.") .initCause(null)) } ``` Before this pr: ``` [info] org.apache.spark.util.SparkFatalException: [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:431) [info] at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401) [info] at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396) [info] at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232) [info] at org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563) [info] at org.scalatest.Suite.run(Suite.scala:1112) ``` After this pr: ``` [info] org.apache.spark.util.SparkFatalException: java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting driver memory by setting spark.driver.memory to a higher value. [info] at org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest
[spark] branch branch-3.3 updated: [SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 1372f312052 [SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions 1372f312052 is described below commit 1372f312052dd0361e371e2ed63436f3e299c617 Author: Max Gekk AuthorDate: Fri May 13 16:43:53 2022 +0300 [SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions ### What changes were proposed in this pull request? In the PR, I propose to catch `java.lang.IllegalStateException` and `java.lang.AssertionError` (raised by asserts), and wrap them by Spark's exception w/ the `INTERNAL_ERROR` error class. The modification affects only actions so far. This PR affects the case of missing bucket file. After the changes, Spark throws `SparkException` w/ `INTERNAL_ERROR` instead of `IllegalStateException`. Since this is not Spark's illegal state, the exception should be replaced by another runtime exception. Created the ticket SPARK-39163 to fix this. This is a backport of https://github.com/apache/spark/pull/36500. ### Why are the changes needed? To improve user experience with Spark SQL and unify representation of internal errors by using error classes like for other errors. Usually, users shouldn't observe asserts and illegal states, but even if such situation happens, they should see errors in the same way as other errors (w/ error class `INTERNAL_ERROR`). ### Does this PR introduce _any_ user-facing change? Yes. At least, in one particular case, see the modified test suites and SPARK-39163. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *.BucketedReadWithoutHiveSupportSuite" $ build/sbt "test:testOnly *.AdaptiveQueryExecSuite" $ build/sbt "test:testOnly *.WholeStageCodegenSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit f5c3f0c228fef7808d1f927e134595ddd4d31723) Signed-off-by: Max Gekk Closes #36533 from MaxGekk/class-internal-error-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/sql/Dataset.scala | 21 - .../spark/sql/execution/DataSourceScanExec.scala| 1 + .../org/apache/spark/sql/execution/subquery.scala | 1 + .../scala/org/apache/spark/sql/SubquerySuite.scala | 10 ++ .../sql/execution/WholeStageCodegenSuite.scala | 14 -- .../execution/adaptive/AdaptiveQueryExecSuite.scala | 9 ++--- .../spark/sql/sources/BucketedReadSuite.scala | 8 +--- 7 files changed, 43 insertions(+), 21 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 7d16a2f5eee..56f0e8978ec 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils -import org.apache.spark.TaskContext +import org.apache.spark.{SparkException, SparkThrowable, TaskContext} import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ @@ -3848,12 +3848,23 @@ class Dataset[T] private[sql]( /** * Wrap a Dataset action to track the QueryExecution and time cost, then report to the - * user-registered callback functions. + * user-registered callback functions, and also to convert asserts/illegal states to + * the internal error exception. */ private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = { -SQLExecution.withNewExecutionId(qe, Some(name)) { - qe.executedPlan.resetMetrics() - action(qe.executedPlan) +try { + SQLExecution.withNewExecutionId(qe, Some(name)) { +qe.executedPlan.resetMetrics() +action(qe.executedPlan) + } +} catch { + case e: SparkThrowable => throw e + case e @ (_: java.lang.IllegalStateException | _: java.lang.AssertionError) => +throw new SparkException( + errorClass = "INTERNAL_ERROR", + messageParameters = Array(s"""The "$name" action failed."""), + cause = e) + case e: Throwable => throw e } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index ac0f3af5725..1ec93a614b7 100644 ---
[spark] branch branch-3.3 updated: [SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException`
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new c2bd7bac76a [SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException` c2bd7bac76a is described below commit c2bd7bac76a5cf7ffc5ef61a1df2b8bb5a72f131 Author: Max Gekk AuthorDate: Fri May 13 12:47:53 2022 +0300 [SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException` ### What changes were proposed in this pull request? Replace all invokes of `sys.error()` by throwing of `IllegalStateException` in the `sql` namespace. This is a backport of https://github.com/apache/spark/pull/36524. ### Why are the changes needed? In the context of wrapping all internal errors like asserts/illegal state exceptions (see https://github.com/apache/spark/pull/36500), it is impossible to distinguish `RuntimeException` of `sys.error()` from Spark's exceptions like `SparkRuntimeException`. The last one can be propagated to the user space but `sys.error` exceptions shouldn't be visible to users in regular cases. ### Does this PR introduce _any_ user-facing change? No, shouldn't. sys.error shouldn't propagate exception to user space in regular cases. ### How was this patch tested? By running the existing test suites. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 95c7efd7571464d8adfb76fb22e47a5816cf73fb) Signed-off-by: Max Gekk Closes #36532 from MaxGekk/sys_error-internal-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/execution/SparkStrategies.scala| 4 ++-- .../org/apache/spark/sql/execution/datasources/DataSource.scala | 8 .../sql/execution/datasources/parquet/ParquetWriteSupport.scala | 3 +-- .../apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala | 4 ++-- .../org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala | 5 +++-- .../scala/org/apache/spark/sql/execution/streaming/memory.scala | 3 ++- .../execution/streaming/sources/TextSocketMicroBatchStream.scala | 3 ++- .../src/main/scala/org/apache/spark/sql/execution/subquery.scala | 3 ++- .../apache/spark/sql/execution/window/AggregateProcessor.scala| 2 +- .../org/apache/spark/sql/execution/window/WindowExecBase.scala| 8 .../src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 3 ++- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 12 files changed, 26 insertions(+), 22 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 3b8a70ffe94..17f3cfbda89 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -503,8 +503,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { _.aggregateFunction.children.filterNot(_.foldable).toSet).distinct.length > 1) { // This is a sanity check. We should not reach here when we have multiple distinct // column sets. Our `RewriteDistinctAggregates` should take care this case. - sys.error("You hit a query analyzer bug. Please report your query to " + - "Spark user mailing list.") + throw new IllegalStateException( +"You hit a query analyzer bug. Please report your query to Spark user mailing list.") } // Ideally this should be done in `NormalizeFloatingNumbers`, but we do it here because diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala index 2bb3d48c145..143fb4cf960 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala @@ -539,8 +539,8 @@ case class DataSource( DataWritingCommand.propogateMetrics(sparkSession.sparkContext, resolved, metrics) // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring copy(userSpecifiedSchema = Some(outputColumns.toStructType.asNullable)).resolveRelation() - case _ => -sys.error(s"${providingClass.getCanonicalName} does not allow create table as select.") + case _ => throw new IllegalStateException( +s"${providingClass.getCanonicalName} does not allow create table as select.") } } @@ -556,8 +556,8 @@ case class DataSource( dis
[spark] branch master updated: [SPARK-39164][SQL] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f5c3f0c228f [SPARK-39164][SQL] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions f5c3f0c228f is described below commit f5c3f0c228fef7808d1f927e134595ddd4d31723 Author: Max Gekk AuthorDate: Thu May 12 23:54:19 2022 +0300 [SPARK-39164][SQL] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions ### What changes were proposed in this pull request? In the PR, I propose to catch `java.lang.IllegalStateException` and `java.lang.AssertionError` (raised by asserts), and wrap them by Spark's exception w/ the `INTERNAL_ERROR` error class. The modification affects only actions so far. This PR affects the case of missing bucket file. After the changes, Spark throws `SparkException` w/ `INTERNAL_ERROR` instead of `IllegalStateException`. Since this is not Spark's illegal state, the exception should be replaced by another runtime exception. Created the ticket SPARK-39163 to fix this. ### Why are the changes needed? To improve user experience with Spark SQL and unify representation of internal errors by using error classes like for other errors. Usually, users shouldn't observe asserts and illegal states, but even if such situation happens, they should see errors in the same way as other errors (w/ error class `INTERNAL_ERROR`). ### Does this PR introduce _any_ user-facing change? Yes. At least, in one particular case, see the modified test suites and SPARK-39163. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *.BucketedReadWithoutHiveSupportSuite" $ build/sbt "test:testOnly *.AdaptiveQueryExecSuite" $ build/sbt "test:testOnly *.WholeStageCodegenSuite" ``` Closes #36500 from MaxGekk/class-internal-error. Authored-by: Max Gekk Signed-off-by: Max Gekk --- .../main/scala/org/apache/spark/sql/Dataset.scala | 21 - .../spark/sql/execution/DataSourceScanExec.scala| 1 + .../org/apache/spark/sql/execution/subquery.scala | 1 + .../scala/org/apache/spark/sql/SubquerySuite.scala | 10 ++ .../sql/execution/WholeStageCodegenSuite.scala | 14 -- .../execution/adaptive/AdaptiveQueryExecSuite.scala | 9 ++--- .../spark/sql/sources/BucketedReadSuite.scala | 8 +--- 7 files changed, 43 insertions(+), 21 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 36b6d6b470d..8c89ec795de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -27,7 +27,7 @@ import scala.util.control.NonFatal import org.apache.commons.lang3.StringUtils -import org.apache.spark.TaskContext +import org.apache.spark.{SparkException, SparkThrowable, TaskContext} import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable} import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.java.function._ @@ -3906,12 +3906,23 @@ class Dataset[T] private[sql]( /** * Wrap a Dataset action to track the QueryExecution and time cost, then report to the - * user-registered callback functions. + * user-registered callback functions, and also to convert asserts/illegal states to + * the internal error exception. */ private def withAction[U](name: String, qe: QueryExecution)(action: SparkPlan => U) = { -SQLExecution.withNewExecutionId(qe, Some(name)) { - qe.executedPlan.resetMetrics() - action(qe.executedPlan) +try { + SQLExecution.withNewExecutionId(qe, Some(name)) { +qe.executedPlan.resetMetrics() +action(qe.executedPlan) + } +} catch { + case e: SparkThrowable => throw e + case e @ (_: java.lang.IllegalStateException | _: java.lang.AssertionError) => +throw new SparkException( + errorClass = "INTERNAL_ERROR", + messageParameters = Array(s"""The "$name" action failed."""), + cause = e) + case e: Throwable => throw e } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 9141a3f742e..f7b627cef08 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -618,6 +618,7 @@ case class FileSourceScanExec( }.groupBy { f =>
[spark] branch master updated (c74506cc33b -> 95c7efd7571)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from c74506cc33b [SPARK-39086][SQL] Support UDT in Spark Parquet vectorized reader add 95c7efd7571 [SPARK-39165][SQL] Replace `sys.error` by `IllegalStateException` No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/execution/SparkStrategies.scala| 4 ++-- .../org/apache/spark/sql/execution/datasources/DataSource.scala | 8 .../sql/execution/datasources/parquet/ParquetWriteSupport.scala | 3 +-- .../apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala | 4 ++-- .../org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala | 5 +++-- .../scala/org/apache/spark/sql/execution/streaming/memory.scala | 3 ++- .../execution/streaming/sources/TextSocketMicroBatchStream.scala | 3 ++- .../src/main/scala/org/apache/spark/sql/execution/subquery.scala | 3 ++- .../apache/spark/sql/execution/window/AggregateProcessor.scala| 2 +- .../org/apache/spark/sql/execution/window/WindowExecBase.scala| 8 .../src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 3 ++- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 12 files changed, 26 insertions(+), 22 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-39060][SQL][3.2] Typo in error messages of decimal overflow
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 6f9e3034ada [SPARK-39060][SQL][3.2] Typo in error messages of decimal overflow 6f9e3034ada is described below commit 6f9e3034ada72f372dafe93152e01ad5cb323989 Author: Vitalii Li AuthorDate: Thu May 12 08:13:51 2022 +0300 [SPARK-39060][SQL][3.2] Typo in error messages of decimal overflow ### What changes were proposed in this pull request? This PR removes extra curly bracket from debug string for Decimal type in SQL. This is a backport from master branch. Commit: https://github.com/apache/spark/commit/165ce4eb7d6d75201beb1bff879efa99fde24f94 ### Why are the changes needed? Typo in error messages of decimal overflow. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By running tests: ``` $ build/sbt "sql/testOnly" ``` Closes #36458 from vli-databricks/SPARK-39060-3.2. Authored-by: Vitalii Li Signed-off-by: Max Gekk --- .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 4 ++-- .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 46814297231..bc5fba8d0d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -227,9 +227,9 @@ final class Decimal extends Ordered[Decimal] with Serializable { def toDebugString: String = { if (decimalVal.ne(null)) { - s"Decimal(expanded,$decimalVal,$precision,$scale})" + s"Decimal(expanded, $decimalVal, $precision, $scale)" } else { - s"Decimal(compact,$longVal,$precision,$scale})" + s"Decimal(compact, $longVal, $precision, $scale)" } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 2f3513e734f..c65742e4d8b 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -76,7 +76,7 @@ select (5e36BD + 0.1) + 5e36BD struct<> -- !query output java.lang.ArithmeticException -Decimal(expanded,10.1,39,1}) cannot be represented as Decimal(38, 1). +Decimal(expanded, 10.1, 39, 1) cannot be represented as Decimal(38, 1). -- !query @@ -85,7 +85,7 @@ select (-4e36BD - 0.1) - 7e36BD struct<> -- !query output java.lang.ArithmeticException -Decimal(expanded,-11.1,39,1}) cannot be represented as Decimal(38, 1). +Decimal(expanded, -11.1, 39, 1) cannot be represented as Decimal(38, 1). -- !query @@ -94,7 +94,7 @@ select 12345678901234567890.0 * 12345678901234567890.0 struct<> -- !query output java.lang.ArithmeticException -Decimal(expanded,152415787532388367501905199875019052100,39,0}) cannot be represented as Decimal(38, 2). +Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be represented as Decimal(38, 2). -- !query @@ -103,7 +103,7 @@ select 1e35BD / 0.1 struct<> -- !query output java.lang.ArithmeticException -Decimal(expanded,1,37,0}) cannot be represented as Decimal(38, 6). +Decimal(expanded, 1, 37, 0) cannot be represented as Decimal(38, 6). -- !query - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 6378365011c [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc 6378365011c is described below commit 6378365011c590d7e7225ea05728bfe06490e769 Author: Yikun Jiang AuthorDate: Sat May 7 10:19:53 2022 +0300 [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc ### What changes were proposed in this pull request? Fix format error on running-on-kubernetes doc ### Why are the changes needed? Fix format syntax error ### Does this PR introduce _any_ user-facing change? No, unreleased doc only ### How was this patch tested? - `SKIP_API=1 bundle exec jekyll serve --watch` - CI passed Closes #36476 from Yikun/SPARK-39121. Authored-by: Yikun Jiang Signed-off-by: Max Gekk (cherry picked from commit 2349f74866ae1b365b5e4e0ec8a58c4f7f06885c) Signed-off-by: Max Gekk --- docs/running-on-kubernetes.md | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 763a9668d3b..ee77e37beb3 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1699,7 +1699,7 @@ Kubernetes supports [Pod priority](https://kubernetes.io/docs/concepts/schedulin Spark on Kubernetes allows defining the priority of jobs by [Pod template](#pod-template). The user can specify the priorityClassName in driver or executor Pod template spec section. Below is an example to show how to specify it: -``` +```yaml apiVersion: v1 Kind: Pod metadata: @@ -1729,8 +1729,8 @@ Spark allows users to specify a custom Kubernetes schedulers. 3. Specify scheduler feature step. Users may also consider to use spark.kubernetes.{driver/executor}.pod.featureSteps to support more complex requirements, including but not limited to: - - Create additional Kubernetes custom resources for driver/executor scheduling. - - Set scheduler hints according to configuration or existing Pod info dynamically. + - Create additional Kubernetes custom resources for driver/executor scheduling. + - Set scheduler hints according to configuration or existing Pod info dynamically. Using Volcano as Customized Scheduler for Spark on Kubernetes @@ -1766,7 +1766,7 @@ To use Volcano as a custom scheduler the user needs to specify the following con --conf spark.kubernetes.scheduler.volcano.podGroupTemplateFile=/path/to/podgroup-template.yaml # Specify driver/executor VolcanoFeatureStep --conf spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep ---conf spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep``` +--conf spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep ``` # Volcano Feature Step - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2349f74866a [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc 2349f74866a is described below commit 2349f74866ae1b365b5e4e0ec8a58c4f7f06885c Author: Yikun Jiang AuthorDate: Sat May 7 10:19:53 2022 +0300 [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc ### What changes were proposed in this pull request? Fix format error on running-on-kubernetes doc ### Why are the changes needed? Fix format syntax error ### Does this PR introduce _any_ user-facing change? No, unreleased doc only ### How was this patch tested? - `SKIP_API=1 bundle exec jekyll serve --watch` - CI passed Closes #36476 from Yikun/SPARK-39121. Authored-by: Yikun Jiang Signed-off-by: Max Gekk --- docs/running-on-kubernetes.md | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index f5f2465fb06..c8c202360f8 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -1699,7 +1699,7 @@ Kubernetes supports [Pod priority](https://kubernetes.io/docs/concepts/schedulin Spark on Kubernetes allows defining the priority of jobs by [Pod template](#pod-template). The user can specify the priorityClassName in driver or executor Pod template spec section. Below is an example to show how to specify it: -``` +```yaml apiVersion: v1 Kind: Pod metadata: @@ -1729,8 +1729,8 @@ Spark allows users to specify a custom Kubernetes schedulers. 3. Specify scheduler feature step. Users may also consider to use spark.kubernetes.{driver/executor}.pod.featureSteps to support more complex requirements, including but not limited to: - - Create additional Kubernetes custom resources for driver/executor scheduling. - - Set scheduler hints according to configuration or existing Pod info dynamically. + - Create additional Kubernetes custom resources for driver/executor scheduling. + - Set scheduler hints according to configuration or existing Pod info dynamically. Using Volcano as Customized Scheduler for Spark on Kubernetes @@ -1766,7 +1766,7 @@ To use Volcano as a custom scheduler the user needs to specify the following con --conf spark.kubernetes.scheduler.volcano.podGroupTemplateFile=/path/to/podgroup-template.yaml # Specify driver/executor VolcanoFeatureStep --conf spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep ---conf spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep``` +--conf spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep ``` # Volcano Feature Step - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39117][SQL][TESTS] Do not include number of functions in sql-expression-schema.md
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 986b0f769b8 [SPARK-39117][SQL][TESTS] Do not include number of functions in sql-expression-schema.md 986b0f769b8 is described below commit 986b0f769b8ffa8a033d0f182217e83faa38fb4a Author: Wenchen Fan AuthorDate: Fri May 6 20:43:36 2022 +0300 [SPARK-39117][SQL][TESTS] Do not include number of functions in sql-expression-schema.md ### What changes were proposed in this pull request? `sql-expression-schema.md` is a golden file for tracking purposes: whenever we change a function or add a new function, this file must be updated. However, the number of functions in this file is not very useful and stops people from adding functions at the same time. This PR prints the summary information during test instead of putting it in the golden file. ### Why are the changes needed? Increase development velocity. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? N/A Closes #36472 from cloud-fan/small. Authored-by: Wenchen Fan Signed-off-by: Max Gekk --- .../sql-functions/sql-expression-schema.md | 4 -- .../apache/spark/sql/ExpressionsSchemaSuite.scala | 48 +- 2 files changed, 11 insertions(+), 41 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index accf9ea4577..0115578e909 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,8 +1,4 @@ -## Summary - - Number of queries: 390 - - Number of expressions that missing example: 12 - - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint ## Schema of Built-in Functions | Class name | Function name or alias | Query example | Output schema | | -- | -- | - | - | diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index f8071e6cda1..d6ef90ce0b7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -133,10 +133,6 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { val header = Seq( s"", - "## Summary", - s" - Number of queries: ${outputs.size}", - s" - Number of expressions that missing example: ${missingExamples.size}", - s" - Expressions missing examples: ${missingExamples.mkString(",")}", "## Schema of Built-in Functions", "| Class name | Function name or alias | Query example | Output schema |", "| -- | -- | - | - |" @@ -149,11 +145,20 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { assert(parent.mkdirs(), "Could not create directory: " + parent) } stringToFile(resultFile, goldenOutput) + // scalastyle:off println + println( +s""" + |## Summary + | - Number of queries: ${outputs.size} + | - Number of expressions that missing example: ${missingExamples.size} + | - Expressions missing examples: ${missingExamples.mkString(",")} + |""".stripMargin) + // scalastyle:on println } val outputSize = outputs.size val headerSize = header.size -val (expectedMissingExamples, expectedOutputs) = { +val expectedOutputs = { val expectedGoldenOutput = fileToString(resultFile) val lines = expectedGoldenOutput.split("\n") val expectedSize = lines.size @@ -162,8 +167,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { s"Expected $expectedSize blocks in result file but got " + s"${outputSize + headerSize}. Try regenerating the result files.") - val numberOfQueries = lines(2).split(":")(1).trim.toInt - val expectedOutputs = Seq.tabulate(outputSize) { i => + Seq.tabulate(outputSize) { i => val segments = lines(i + headerSize).split('|') QueryOutput( className = segments(1).trim, @@ -171,28 +175,6 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { sql = segments(3).trim, s
[spark] branch branch-3.0 updated (4e38563d39c -> 19942e7be86)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git from 4e38563d39c [SPARK-38918][SQL][3.0] Nested column pruning should filter out attributes that do not belong to the current relation add 19942e7be86 [SPARK-39060][SQL][3.0] Typo in error messages of decimal overflow No new revisions were added by this update. Summary of changes: .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 4 ++-- .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.1 updated (8f6a3a50b4b -> 19576c412b7)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git from 8f6a3a50b4b [SPARK-39084][PYSPARK] Fix df.rdd.isEmpty() by using TaskContext to stop iterator on task completion add 19576c412b7 [SPARK-39060][SQL][3.1] Typo in error messages of decimal overflow No new revisions were added by this update. Summary of changes: .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 4 ++-- .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39108][SQL] Show hints for try_add/try_substract/try_multiply in int/long overflow errors
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c274812284a [SPARK-39108][SQL] Show hints for try_add/try_substract/try_multiply in int/long overflow errors c274812284a is described below commit c274812284a3b7ec725e6b8afc2e7ab0f91b923e Author: Gengliang Wang AuthorDate: Thu May 5 23:03:44 2022 +0300 [SPARK-39108][SQL] Show hints for try_add/try_substract/try_multiply in int/long overflow errors ### What changes were proposed in this pull request? Show hints for try_add/try_substract/try_multiply in int/long overflow errors ### Why are the changes needed? Better error message for resolving the overflow errors under ANSI mode. ### Does this PR introduce _any_ user-facing change? No, minor error message improvement ### How was this patch tested? UT Closes #36456 from gengliangwang/tryHint. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/catalyst/util/MathUtils.scala | 12 ++-- .../test/resources/sql-tests/results/postgreSQL/int4.sql.out | 12 ++-- .../test/resources/sql-tests/results/postgreSQL/int8.sql.out | 8 .../sql-tests/results/postgreSQL/window_part2.sql.out| 4 ++-- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala index f96c9fba5a3..e5c87a41ea8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala @@ -27,32 +27,32 @@ object MathUtils { def addExact(a: Int, b: Int): Int = withOverflow(Math.addExact(a, b)) def addExact(a: Int, b: Int, errorContext: String): Int = -withOverflow(Math.addExact(a, b), errorContext = errorContext) +withOverflow(Math.addExact(a, b), hint = "try_add", errorContext = errorContext) def addExact(a: Long, b: Long): Long = withOverflow(Math.addExact(a, b)) def addExact(a: Long, b: Long, errorContext: String): Long = -withOverflow(Math.addExact(a, b), errorContext = errorContext) +withOverflow(Math.addExact(a, b), hint = "try_add", errorContext = errorContext) def subtractExact(a: Int, b: Int): Int = withOverflow(Math.subtractExact(a, b)) def subtractExact(a: Int, b: Int, errorContext: String): Int = -withOverflow(Math.subtractExact(a, b), errorContext = errorContext) +withOverflow(Math.subtractExact(a, b), hint = "try_subtract", errorContext = errorContext) def subtractExact(a: Long, b: Long): Long = withOverflow(Math.subtractExact(a, b)) def subtractExact(a: Long, b: Long, errorContext: String): Long = -withOverflow(Math.subtractExact(a, b), errorContext = errorContext) +withOverflow(Math.subtractExact(a, b), hint = "try_subtract", errorContext = errorContext) def multiplyExact(a: Int, b: Int): Int = withOverflow(Math.multiplyExact(a, b)) def multiplyExact(a: Int, b: Int, errorContext: String): Int = -withOverflow(Math.multiplyExact(a, b), errorContext = errorContext) +withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", errorContext = errorContext) def multiplyExact(a: Long, b: Long): Long = withOverflow(Math.multiplyExact(a, b)) def multiplyExact(a: Long, b: Long, errorContext: String): Long = -withOverflow(Math.multiplyExact(a, b), errorContext = errorContext) +withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", errorContext = errorContext) def negateExact(a: Int): Int = withOverflow(Math.negateExact(a)) diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out index 6b42e31340f..a39cdbc340c 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out @@ -200,7 +200,7 @@ SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i struct<> -- !query output org.apache.spark.SparkArithmeticException -[ARITHMETIC_OVERFLOW] integer overflow. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. +[ARITHMETIC_OVERFLOW] integer overflow. To return NULL instead, use 'try_multiply'. If necessary set spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this error. == SQL(line 1, position 25) == SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
[spark] branch master updated: [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4b1c2fb7a27 [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases 4b1c2fb7a27 is described below commit 4b1c2fb7a27757ebf470416c8ec02bb5c1f7fa49 Author: Max Gekk AuthorDate: Thu May 5 20:10:06 2022 +0300 [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases ### What changes were proposed in this pull request? Add missed dependencies to `dev/create-release/spark-rm/Dockerfile`. ### Why are the changes needed? To be able to build Spark releases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By building the Spark 3.3 release via: ``` $ dev/create-release/do-release-docker.sh -d /home/ubuntu/max/spark-3.3-rc1 ``` Closes #36449 from MaxGekk/deps-Dockerfile. Authored-by: Max Gekk Signed-off-by: Max Gekk --- dev/create-release/spark-rm/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index ffd60c07af0..c6555e0463d 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y" # We should use the latest Sphinx version once this is fixed. # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. -ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 pyarrow==3.0.0 plotly==5.4.0" +ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17" ARG GEM_PKGS="bundler:2.2.9" # Install extra needed repos and refresh. @@ -79,9 +79,9 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ # Note that PySpark doc generation also needs pandoc due to nbsphinx $APT_INSTALL r-base r-base-dev && \ $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \ - $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \ + $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf texlive-latex-extra && \ $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \ - Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ + Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ Rscript -e "devtools::install_github('jimhester/lintr')" && \ Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \ Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 6a61f95a359 [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases 6a61f95a359 is described below commit 6a61f95a359e6aa9d09f8044019074dc7effcf30 Author: Max Gekk AuthorDate: Thu May 5 20:10:06 2022 +0300 [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases ### What changes were proposed in this pull request? Add missed dependencies to `dev/create-release/spark-rm/Dockerfile`. ### Why are the changes needed? To be able to build Spark releases. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By building the Spark 3.3 release via: ``` $ dev/create-release/do-release-docker.sh -d /home/ubuntu/max/spark-3.3-rc1 ``` Closes #36449 from MaxGekk/deps-Dockerfile. Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 4b1c2fb7a27757ebf470416c8ec02bb5c1f7fa49) Signed-off-by: Max Gekk --- dev/create-release/spark-rm/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile index ffd60c07af0..c6555e0463d 100644 --- a/dev/create-release/spark-rm/Dockerfile +++ b/dev/create-release/spark-rm/Dockerfile @@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y" # We should use the latest Sphinx version once this is fixed. # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. -ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 pyarrow==3.0.0 plotly==5.4.0" +ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17" ARG GEM_PKGS="bundler:2.2.9" # Install extra needed repos and refresh. @@ -79,9 +79,9 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \ # Note that PySpark doc generation also needs pandoc due to nbsphinx $APT_INSTALL r-base r-base-dev && \ $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \ - $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \ + $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf texlive-latex-extra && \ $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \ - Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ + Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 'testthat', 'knitr', 'rmarkdown', 'markdown', 'roxygen2', 'e1071', 'survival'), repos='https://cloud.r-project.org/')" && \ Rscript -e "devtools::install_github('jimhester/lintr')" && \ Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \ Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" && \ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR] Remove unused import
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bf447046327 [MINOR] Remove unused import bf447046327 is described below commit bf447046327b80f176fd638db418d0513b9c2516 Author: panbingkun AuthorDate: Thu May 5 19:25:32 2022 +0300 [MINOR] Remove unused import ### What changes were proposed in this pull request? Remove unused import in `numerics`. ### Why are the changes needed? Cleanup ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #36454 from panbingkun/minor. Authored-by: panbingkun Signed-off-by: Max Gekk --- sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala index fea792f08d0..c3d893d82fc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala @@ -18,7 +18,6 @@ package org.apache.spark.sql.types import scala.math.Numeric._ -import scala.math.Ordering import org.apache.spark.sql.catalyst.util.{MathUtils, SQLOrderingUtil} import org.apache.spark.sql.errors.QueryExecutionErrors - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37938][SQL][TESTS] Use error classes in the parsing errors of partitions
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 29ff671933e [SPARK-37938][SQL][TESTS] Use error classes in the parsing errors of partitions 29ff671933e is described below commit 29ff671933e3b432e69a26761bc79856f21b82c7 Author: panbingkun AuthorDate: Thu May 5 19:22:28 2022 +0300 [SPARK-37938][SQL][TESTS] Use error classes in the parsing errors of partitions ## What changes were proposed in this pull request? Migrate the following errors in QueryParsingErrors onto use error classes: - emptyPartitionKeyError => INVALID_SQL_SYNTAX - partitionTransformNotExpectedError => INVALID_SQL_SYNTAX - descColumnForPartitionUnsupportedError => UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION - incompletePartitionSpecificationError => INVALID_SQL_SYNTAX ### Why are the changes needed? Porting parsing errors of partitions to new error framework, improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryParsingErrorsSuite*" ``` Closes #36416 from panbingkun/SPARK-37938. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 3 ++ .../spark/sql/errors/QueryParsingErrors.scala | 22 ++-- .../spark/sql/catalyst/parser/DDLParserSuite.scala | 2 +- .../resources/sql-tests/results/describe.sql.out | 2 +- .../spark/sql/errors/QueryErrorsSuiteBase.scala| 16 -- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 60 ++ .../command/ShowPartitionsParserSuite.scala| 22 +--- .../command/TruncateTableParserSuite.scala | 21 +--- 8 files changed, 125 insertions(+), 23 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 24b50c4209a..3a7bc757f73 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -206,6 +206,9 @@ "AES_MODE" : { "message" : [ "AES- with the padding by the function." ] }, + "DESC_TABLE_COLUMN_PARTITION" : { +"message" : [ "DESC TABLE COLUMN for a specific partition." ] + }, "DISTRIBUTE_BY" : { "message" : [ "DISTRIBUTE BY clause." ] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index ed5773f4f82..1d15557c9d0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -77,7 +77,11 @@ object QueryParsingErrors extends QueryErrorsBase { } def emptyPartitionKeyError(key: String, ctx: PartitionSpecContext): Throwable = { -new ParseException(s"Found an empty partition key '$key'.", ctx) +new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = +Array(s"Partition key ${toSQLId(key)} must set value (can't be empty)."), + ctx) } def combinationQueryResultClausesUnsupportedError(ctx: QueryOrganizationContext): Throwable = { @@ -243,7 +247,11 @@ object QueryParsingErrors extends QueryErrorsBase { def partitionTransformNotExpectedError( name: String, describe: String, ctx: ApplyTransformContext): Throwable = { -new ParseException(s"Expected a column reference for transform $name: $describe", ctx) +new ParseException( + errorClass = "INVALID_SQL_SYNTAX", + messageParameters = +Array(s"Expected a column reference for transform ${toSQLId(name)}: $describe"), + ctx) } def tooManyArgumentsForTransformError(name: String, ctx: ApplyTransformContext): Throwable = { @@ -298,12 +306,18 @@ object QueryParsingErrors extends QueryErrorsBase { } def descColumnForPartitionUnsupportedError(ctx: DescribeRelationContext): Throwable = { -new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx) +new ParseException( + errorClass = "UNSUPPORTED_FEATURE", + messageParameters = Array("DESC_TABLE_COLUMN_PARTITION"), + ctx) } def incompletePartitionSpecificationError( key: String, ctx: DescribeRelationContext): Throwable = { -new ParseException(s"PARTITION specifi
svn commit: r54275 - in /dev/spark/v3.3.0-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: maxgekk Date: Thu May 5 08:51:39 2022 New Revision: 54275 Log: Apache Spark v3.3.0-rc1 docs [This commit notification would consist of 2649 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54273 - /dev/spark/v3.3.0-rc1-bin/
Author: maxgekk Date: Thu May 5 08:17:05 2022 New Revision: 54273 Log: Apache Spark v3.3.0-rc1 Added: dev/spark/v3.3.0-rc1-bin/ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc Thu May 5 08:17:05 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJzh6QTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q07HcEACkCSXRG7LXd0+/jBU49syIUIpOsUrN +bgbq90ifbo6eCidbhj4wJl5OZO7tKCsV2IrbQYRHVP0Lq7GTCw1Fg4/mY4QiLkhi +RWDizZrKrr9CbHXVFo7ZTlIiaxjnTOcIxauKRtu6rbIJdfIzZyRZwhAYerdK6WOx +atrcWfrY/MhKW/v6/25b8R4SWpLssNXaGj5RRqhs/cn/Kjwus8WkBDzQIibcE2ac +TJA+agMH2fkyC1sUaZOVEo1E68nUBV/vv5GyEtctjnESGDsh90/d+6X8L2cmME9H +YGUO91cT1byN3LCR0FDqMSTea8yh3HsdTQ4Ly+s1Ia7h5UCwnDlpFXTyHsHX9sv7 +osXKz4b1ejogjxHlCiPpFgZ+P3gNa31mpJWmOwMLE49Cgxcn7DdZUXTZaAwZmwhH +YURgYtpqrG+4oKpAOLGR+wx+2ZGv0a0QeLd4iTUEhxhiPFRw9QkNG5VUmHgz237b +ZJzz9Ef0wLbaS5F6ZySk0FBqHTPgCsPZS3ZtmdU76zg37mNPej2xotLrLon2TXhN +TJkcLI8azbRoqcrNSOWKjBWYbLJ3nG4bDNqEkqdi/QApiisnneuXX89w152SI8vF +/GoyJK0xs6rjCsUURXWUZ/kzeVQHxtXfBNLk967+TSOHVDaKFehhS0hJbRNUP0jp +O+gTjMZQfQh+Uw== +=saiU +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 Thu May 5 08:17:05 2022 @@ -0,0 +1,3 @@ +SparkR_3.3.0.tar.gz: 98A2665A 04513C1A BE26952E 7396E3B7 AF63715B B6CCFAF3 + CD8C04EC A9F2374F F9E159D3 635CA631 22E4DCEE 1F6B6FE9 + F91F2E18 C9518AAF 713DC95A 3D39D496 Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc Thu May 5 08:17:05 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJzh6YTHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0+4LD/wMGUzSXVcBCbUsVYtEtmoWjqBDZks7 +wN0SrnaI4UNXKlV0/rRbSMGRnVuqdwAlwJsb2RYNS56wswgTz9bhUB9cUUiSWftp +Pf5XE9LqarekEF48kSYv6XOGCoXIA4wa9BdfzBF8Q43kCI4WTRibv9xaMv+F60or +0xwgLl+8666M0L+Jg2tzrdI+cnkf42j07pL1HfqCsoZJSjxFmgSexXigZj+oSw+p +4bTTofAWUfj+jILpPw8s7Vnf0Gvi7YEGpfchUv9oB8N1LzKLyS1HYNLGSAqbE1vm +CvG9X8IzWQr4wIVqWSMWnsfImJL7EcA+G1SrUZP//d5UitvbF3ZZ5tMUvPYqgfKz +S7kwyxuI1/uQ6CpJ5vxdrQQfRauYA4oWws4jWf2O6xOF5VIB1F0aF0//SLdauR+r +GX4aYzQF+2DG6pIGJWYfrE9I4U4/LQLbdVVawItNnMKjphxD3Vi1kn9ITzJAtpLE +75T9wPvlqSY7bLQlpBLd2+mModF2K+Gonr8Z06Xe0kr/R+tyrjrP5Oa++egLcaFo +ZCr+L6WvkW8XnCfzU7T7d7wNKlskw7sh9BqOluMr+YW9rL+CKEYiM4JZrlUZCT3R +rcLnVX47qigSw+WETHtMLA/TWYS6FQpKqs49cYbWAAT2K6mvmPiM1MupZSo6HgS+ +/KROoSIKLGVTRA
[spark] branch branch-3.3 updated (94d3d6b5fce -> 1fa3171f387)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 94d3d6b5fce [SPARK-38891][SQL] Skipping allocating vector for repetition & definition levels when possible add 1fa3171f387 [SPARK-39060][SQL][3.3] Typo in error messages of decimal overflow No new revisions were added by this update. Summary of changes: .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 4 ++-- sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out | 2 +- .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54271 - in /dev/spark: v3.3.0-rc1-bin/ v3.3.0-rc1-docs/
Author: maxgekk Date: Thu May 5 05:58:09 2022 New Revision: 54271 Log: Remove v3.3.0-rc1 Removed: dev/spark/v3.3.0-rc1-bin/ dev/spark/v3.3.0-rc1-docs/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54255 - in /dev/spark/v3.3.0-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api
Author: maxgekk Date: Wed May 4 19:42:41 2022 New Revision: 54255 Log: Apache Spark v3.3.0-rc1 docs [This commit notification would consist of 2661 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38744][SQL][TESTS] Test the error class: NON_LITERAL_PIVOT_VALUES
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8f0aca27916 [SPARK-38744][SQL][TESTS] Test the error class: NON_LITERAL_PIVOT_VALUES 8f0aca27916 is described below commit 8f0aca279168fba23695a4919a01b79dc776f21d Author: panbingkun AuthorDate: Wed May 4 21:49:44 2022 +0300 [SPARK-38744][SQL][TESTS] Test the error class: NON_LITERAL_PIVOT_VALUES ## What changes were proposed in this pull request? This PR aims to add a test for the error class NON_LITERAL_PIVOT_VALUES to `QueryCompilationErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36431 from panbingkun/SPARK-38744. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../sql/errors/QueryCompilationErrorsSuite.scala | 20 1 file changed, 20 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 252c7298cb5..40b18ad3cc7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -525,6 +525,26 @@ class QueryCompilationErrorsSuite msg = "Field name m.n is invalid: m is not a struct.; line 1 pos 27") } } + + test("NON_LITERAL_PIVOT_VALUES: literal expressions required for pivot values") { +val df = Seq( + ("dotNET", 2012, 1), + ("Java", 2012, 2), + ("dotNET", 2012, 5000), + ("dotNET", 2013, 48000), + ("Java", 2013, 3) +).toDF("course", "year", "earnings") + +checkErrorClass( + exception = intercept[AnalysisException] { +df.groupBy(df("course")). + pivot(df("year"), Seq($"earnings")). + agg(sum($"earnings")).collect() + }, + errorClass = "NON_LITERAL_PIVOT_VALUES", + msg = "Literal expressions required for pivot values, found 'earnings#\\w+'", + matchMsg = true) + } } class MyCastToString extends SparkUserDefinedFunction( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38733][SQL][TESTS] Test the error class: INCOMPATIBLE_DATASOURCE_REGISTER
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 834841ef5da [SPARK-38733][SQL][TESTS] Test the error class: INCOMPATIBLE_DATASOURCE_REGISTER 834841ef5da is described below commit 834841ef5dab150f249d4171fddb474251beecac Author: panbingkun AuthorDate: Wed May 4 14:59:12 2022 +0300 [SPARK-38733][SQL][TESTS] Test the error class: INCOMPATIBLE_DATASOURCE_REGISTER ## What changes were proposed in this pull request? This PR aims to add a test for the error class INCOMPATIBLE_DATASOURCE_REGISTER to `QueryExecutionErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*" ``` Closes #36429 from panbingkun/SPARK-38733. Lead-authored-by: panbingkun Co-authored-by: Maxim Gekk Signed-off-by: Max Gekk --- .../sql/errors/QueryExecutionErrorsSuite.scala | 33 -- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index baa731571f7..7a5592c148a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -18,13 +18,14 @@ package org.apache.spark.sql.errors import java.io.IOException -import java.util.Locale +import java.net.URL +import java.util.{Locale, ServiceConfigurationError} import org.apache.hadoop.fs.{LocalFileSystem, Path} import org.apache.hadoop.fs.permission.FsPermission import test.org.apache.spark.sql.connector.JavaSimpleWritableDataSource -import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, SparkUnsupportedOperationException, SparkUpgradeException} +import org.apache.spark.{SparkArithmeticException, SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, SparkUnsupportedOperationException, SparkUpgradeException} import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode} import org.apache.spark.sql.catalyst.util.BadRecordException import org.apache.spark.sql.connector.SimpleWritableDataSource @@ -485,6 +486,34 @@ class QueryExecutionErrorsSuite } } } + + test("INCOMPATIBLE_DATASOURCE_REGISTER: create table using an incompatible data source") { +val newClassLoader = new ClassLoader() { + + override def getResources(name: String): java.util.Enumeration[URL] = { +if (name.equals("META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")) { + // scalastyle:off + throw new ServiceConfigurationError(s"Illegal configuration-file syntax: $name", +new NoClassDefFoundError("org.apache.spark.sql.sources.HadoopFsRelationProvider")) + // scalastyle:on throwerror +} else { + super.getResources(name) +} + } +} + +Utils.withContextClassLoader(newClassLoader) { + val e = intercept[SparkClassNotFoundException] { +sql("CREATE TABLE student (id INT, name STRING, age INT) USING org.apache.spark.sql.fake") + } + checkErrorClass( +exception = e, +errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER", +msg = "Detected an incompatible DataSourceRegister. Please remove the incompatible library " + + "from classpath or upgrade it. Error: Illegal configuration-file syntax: " + + "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister") +} + } } class FakeFileSystemSetPermission extends LocalFileSystem { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r54250 - /dev/spark/v3.3.0-rc1-bin/
Author: maxgekk Date: Wed May 4 08:35:46 2022 New Revision: 54250 Log: Apache Spark v3.3.0-rc1 Added: dev/spark/v3.3.0-rc1-bin/ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz (with props) dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512 dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz (with props) dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.asc dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.sha512 Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc Wed May 4 08:35:46 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJyOn4THG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q06HUD/9qNA8U+46FRVU0vZYPE1tA1ydBDQLf +LR2JWFa5/JpV2P8cu1iJFojnbLM1nUk5giDrJaNUeRKG7x5OUMyP3bDpoZQOn14V +U/hP3Lom5Ms/GXFw1h8lQv5Ijaq3/GZXaAQi1Ha1aafb9HTPPBkjy4YMLMHmGaIm +7N3q6eqGxQcVekopxZ73LmXwhWyRK2PEGgrlqbmvbs3CB+VEa/9qm0q8gRplKFQE +cAiCAu80BueS/pn90Tv77QmQDyXBTnlDG0hlrxkTLa4MIkmkbwaEwpRJSKlWIgRo +0emTcHMdwVa1kRICZk5gJ+ceGc/X0pChacv8aY/rP79rc4SprDP+iiQGlL6G38e1 +67h4KgpQFCzKUIidRCXoewbdIrX5VEf2Np+7XddYuMjA8SJML/Cg/2eKU24A+o9b +kOs3Xo/RRrpnGVtcLvWDUCfevATzwN23gmbEtC5L872IhLwd9lbIvaAbqI7LvjMf +VCT0+5rDztAYCjTviMQBPsqg7DjVrjBAUmci5zjXPIiXHCGyTRZDB4tTsm2a3A6R +iIsffD4HvzFJpsqOT1cs0/NrF29mBlc3IDEWWQYZ0Ig+zZo0dJ4ktxwKNF3yJCcz +kLBayDuc0pvzz2VmPs2WB6mwWvtlLxJJmc4nBlJhciOR8pghEnpKuvRqth9H+hWM +Cx9xgDFDm3EK2g== +=QBEZ +-END PGP SIGNATURE- Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 == --- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 (added) +++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 Wed May 4 08:35:46 2022 @@ -0,0 +1,3 @@ +SparkR_3.3.0.tar.gz: B1BC497B 9C52A984 346E2BBD 1BD74227 E5DA7830 3735D2FF + 99AE8E55 4543DCB9 F293847F B4781ACB B88369D8 27AF5DD9 + E0DDFEB1 0B9F7B26 21AC2569 3E3E26F2 Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz == Binary file - no diff available. Propchange: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc == --- dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc (added) +++ dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc Wed May 4 08:35:46 2022 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- + +iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJyOoATHG1heGdla2tA +YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0wkXD/sGsAKaIECEZja5U2CfmVNHcmT0jQnK +y/oAf+8rtT6vLXZ2EVuHFAdSPa4Rzq0gBnXcQk9gZcDXQhH/Hfu0CkTLZdgs0YUY +jZqKks6oBlNObjEh9G8BNyeRB/Q1efRVupBW1koyex43XHNBPTCwinqR/t6WY+C6 +zdUQ935yhvuPbk3qHAbuLwy/qE5xXNr7YskthIjNoh3md9viLDEeqE4SabPY+o6Z +WoMliOBQD2B1drIzmr97dqCcMh9mtDuy2dMLb5RFk/JD2XTAIk+w4FmFGdZ7CgIA +/gwTEtdst3jqqtPJ5YI2FJAedfY7WIYlBlD1+lxUHxqVcwA6dmYpVQhqsR3DYUKw +JMdd+SyG0IvyBWDe5wo+MG0dBodZVXnv6Ap9dFsF36BLnuN358S+EifbvakbuVD0 +trWLvjO/F1yGkYnTN49OCs8DXz23jPMSjDOZJLH8lvk7bl4lDRg7C8AmAfwLFocG +wTeSS7DxZoEOuQ2WVuclsMm5SCZNrjV6e7AHkyo34I+8Oo7O/tVC7+l8Q+2Qi1Ky +AEkcWsAiRvn3M83tmwP2qPp7FMixdEJqktr9GDhX1B75OIScu87pbmUw3t2fyYiY +S8wuQ0DU9hLLGQaFf6XQuVEr+/0OmIBXPPZoHZTG9o/94Wb0tFyxDfvxJkWrLcOF +l7/5dzJLvxrDkA
[spark] branch master updated: [SPARK-39060][SQL] Typo in error messages of decimal overflow
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 165ce4eb7d6 [SPARK-39060][SQL] Typo in error messages of decimal overflow 165ce4eb7d6 is described below commit 165ce4eb7d6d75201beb1bff879efa99fde24f94 Author: Vitalii Li AuthorDate: Wed May 4 09:41:53 2022 +0300 [SPARK-39060][SQL] Typo in error messages of decimal overflow ### What changes were proposed in this pull request? This PR removes extra curly bracket from debug string for Decimal type in SQL. ### Why are the changes needed? Typo in error messages of decimal overflow. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running updated test: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z decimalArithmeticOperations.sql" ``` Closes #36397 from vli-databricks/SPARK-39060. Authored-by: Vitalii Li Signed-off-by: Max Gekk --- .../src/main/scala/org/apache/spark/sql/types/Decimal.scala | 4 ++-- sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out | 2 +- .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +- .../apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 12ce7a30601..1eeaa46736e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -227,9 +227,9 @@ final class Decimal extends Ordered[Decimal] with Serializable { def toDebugString: String = { if (decimalVal.ne(null)) { - s"Decimal(expanded,$decimalVal,$precision,$scale})" + s"Decimal(expanded, $decimalVal, $precision, $scale)" } else { - s"Decimal(compact,$longVal,$precision,$scale})" + s"Decimal(compact, $longVal, $precision, $scale)" } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 566e27a0e20..476ec158f1f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -666,7 +666,7 @@ select cast('123.45' as decimal(4, 2)) struct<> -- !query output org.apache.spark.SparkArithmeticException -[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,123.45,5,2}) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 123.45, 5, 2) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select cast('123.45' as decimal(4, 2)) ^^^ diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out index 1640875973e..d4b15d92952 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out @@ -76,7 +76,7 @@ select (5e36BD + 0.1) + 5e36BD struct<> -- !query output org.apache.spark.SparkArithmeticException -[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,10.1,39,1}) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 10.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. == SQL(line 1, position 7) == select (5e36BD + 0.1) + 5e36BD ^^^ @@ -88,7 +88,7 @@ select (-4e36BD - 0.1) - 7e36BD struct<> -- !query output org.apache.spark.SparkArithmeticException -[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,-11.1,39,1}) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass this error. +[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, -11.1, 39, 1) cannot be represented as Decimal(38, 1).
[spark] branch branch-3.3 updated: [SPARK-39087][SQL][3.3] Improve messages of error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new d3aadb40370 [SPARK-39087][SQL][3.3] Improve messages of error classes d3aadb40370 is described below commit d3aadb40370c0613c2d2ce41d8b905f0fafcd69c Author: Max Gekk AuthorDate: Wed May 4 08:45:03 2022 +0300 [SPARK-39087][SQL][3.3] Improve messages of error classes ### What changes were proposed in this pull request? In the PR, I propose to modify error messages of the following error classes: - INVALID_JSON_SCHEMA_MAP_TYPE - INCOMPARABLE_PIVOT_COLUMN - INVALID_ARRAY_INDEX_IN_ELEMENT_AT - INVALID_ARRAY_INDEX - DIVIDE_BY_ZERO This is a backport of https://github.com/apache/spark/pull/36428. ### Why are the changes needed? To improve readability of error messages. ### Does this PR introduce _any_ user-facing change? Yes. It changes user-facing error messages. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*" $ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite" $ build/sbt "test:testOnly *SparkThrowableSuite" ``` Authored-by: Max Gekk Signed-off-by: Max Gekk (cherry picked from commit 040526391a45ad610422a48c05aa69ba5133f922) Signed-off-by: Max Gekk Closes #36439 from MaxGekk/error-class-improve-msg-3.3. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 12 - .../org/apache/spark/SparkThrowableSuite.scala | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 6 ++--- .../expressions/ArithmeticExpressionSuite.scala| 30 +++--- .../expressions/CollectionExpressionsSuite.scala | 4 +-- .../catalyst/expressions/ComplexTypeSuite.scala| 4 +-- .../expressions/IntervalExpressionsSuite.scala | 10 .../expressions/StringExpressionsSuite.scala | 6 ++--- .../sql/catalyst/util/IntervalUtilsSuite.scala | 2 +- .../resources/sql-tests/results/ansi/array.sql.out | 24 - .../sql-tests/results/ansi/interval.sql.out| 4 +-- .../resources/sql-tests/results/interval.sql.out | 4 +-- .../test/resources/sql-tests/results/pivot.sql.out | 4 +-- .../sql-tests/results/postgreSQL/case.sql.out | 6 ++--- .../sql-tests/results/postgreSQL/int8.sql.out | 6 ++--- .../results/postgreSQL/select_having.sql.out | 2 +- .../results/udf/postgreSQL/udf-case.sql.out| 6 ++--- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- .../sql-tests/results/udf/udf-pivot.sql.out| 4 +-- .../apache/spark/sql/ColumnExpressionSuite.scala | 12 - .../org/apache/spark/sql/DataFrameSuite.scala | 2 +- .../apache/spark/sql/execution/SQLViewSuite.scala | 4 +-- .../sql/streaming/FileStreamSourceSuite.scala | 2 +- 23 files changed, 79 insertions(+), 79 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 463a5eae534..78934667ac0 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -37,7 +37,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { -"message" : [ "divide by zero. To return NULL instead, use 'try_divide'. If necessary set to false (except for ANSI interval type) to bypass this error." ], +"message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to false (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -72,7 +72,7 @@ "message" : [ "Grouping sets size cannot be greater than " ] }, "INCOMPARABLE_PIVOT_COLUMN" : { -"message" : [ "Invalid pivot column ''. Pivot columns must be comparable." ], +"message" : [ "Invalid pivot column . Pivot columns must be comparable." ], "sqlState" : "42000" }, "INCOMPATIBLE_DATASOURCE_REGISTER" : { @@ -89,10 +89,10 @@ "message" : [ "" ] }, "INVALID_ARRAY_INDEX" : { -"message" : [ "Invalid index: , numElements: . If necessary set to false to bypass this error." ] +"message" : [ "The index is out of bounds. The array has elements. If necessary set to false to bypass this error." ] }, "INVALID_ARRAY_
[spark] branch branch-3.3 updated (4177626e634 -> 0515536e6d1)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from 4177626e634 [SPARK-35320][SQL][FOLLOWUP] Remove duplicated test add 482b7d54b52 Preparing Spark release v3.3.0-rc1 new 0515536e6d1 Preparing development version 3.3.1-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 0515536e6d1b4819eeab59cecb9a045b1a0d3325 Author: Maxim Gekk AuthorDate: Tue May 3 18:15:51 2022 + Preparing development version 3.3.1-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 6 +++--- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 40 insertions(+), 40 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 9479bb3bf87..0e449e841cf 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.3.0 +Version: 3.3.1 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>. Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 2e9c4d9960b..d12f2ad73fa 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a9acfa335e..842d63f5d38 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 7b17e625d75..f7d187bf952 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c5c920e7747..53f38df8851 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 697b5a3928e..845f6659407 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index ad2db11370a..8e159089193 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0 +3.3.1-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 1a7bdee70f3..1987c133285 100644 --- a
[spark] 01/01: Preparing Spark release v3.3.0-rc1
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to tag v3.3.0-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git commit 482b7d54b522c4d1e25f3e84eabbc78126f22a3d Author: Maxim Gekk AuthorDate: Tue May 3 18:15:45 2022 + Preparing Spark release v3.3.0-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10-token-provider/pom.xml | 2 +- external/kafka-0-10/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 37 files changed, 38 insertions(+), 38 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 0f88fe4feaf..2e9c4d9960b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 15f7b8fa828..2a9acfa335e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d652b6d1c8d..7b17e625d75 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index db36da4799f..c5c920e7747 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 9e0a202edd1..697b5a3928e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 068ef60b77f..ad2db11370a 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 5081579e38d..1a7bdee70f3 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 500f4083805..66dc93de059 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 -3.3.0-SNAPSHOT +3.3.0 ../../pom.xml diff --git a
[spark] tag v3.3.0-rc1 created (now 482b7d54b52)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to tag v3.3.0-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git at 482b7d54b52 (commit) This tag includes the following new commits: new 482b7d54b52 Preparing Spark release v3.3.0-rc1 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39085][SQL] Move the error message of `INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new db7f346729d [SPARK-39085][SQL] Move the error message of `INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json db7f346729d is described below commit db7f346729d481f6ea6fcc88e381fda33de9b3f1 Author: Max Gekk AuthorDate: Tue May 3 08:28:27 2022 +0300 [SPARK-39085][SQL] Move the error message of `INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json ### What changes were proposed in this pull request? In the PR, I propose to create two new sub-classes of the error class `INCONSISTENT_BEHAVIOR_CROSS_VERSION`: - READ_ANCIENT_DATETIME - WRITE_ANCIENT_DATETIME and move their error messages from source code to the json file `error-classes.json`. ### Why are the changes needed? 1. To improve maintainability of error messages in the one place. 2. To follow the general rule that bodies of error messages should be placed to the json file, and only parameters are passed from source code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the modified test suite: ``` $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*" $ build/sbt "test:testOnly *SparkThrowableSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "test:testOnly *DateFormatterSuite" $ build/sbt "test:testOnly *DateExpressionsSuite" $ build/sbt "test:testOnly *TimestampFormatterSuite" ``` Closes #36426 from MaxGekk/error-subclass-INCONSISTENT_BEHAVIOR_CROSS_VERSION. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 19 +- .../scala/org/apache/spark/SparkException.scala| 7 --- .../spark/sql/errors/QueryExecutionErrors.scala| 67 ++ .../resources/sql-tests/results/ansi/date.sql.out | 9 ++- .../results/ansi/datetime-parsing-invalid.sql.out | 24 +--- .../sql-tests/results/ansi/timestamp.sql.out | 18 -- .../test/resources/sql-tests/results/date.sql.out | 9 ++- .../results/datetime-formatting-invalid.sql.out| 66 ++--- .../results/datetime-parsing-invalid.sql.out | 24 +--- .../sql-tests/results/json-functions.sql.out | 6 +- .../resources/sql-tests/results/timestamp.sql.out | 18 -- .../results/timestampNTZ/timestamp-ansi.sql.out| 3 +- .../results/timestampNTZ/timestamp.sql.out | 3 +- .../native/stringCastAndExpressions.sql.out| 9 ++- .../sql/errors/QueryExecutionErrorsSuite.scala | 4 +- 15 files changed, 177 insertions(+), 109 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index eacbeec570f..24b50c4209a 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -79,7 +79,24 @@ "message" : [ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: " ] }, "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : { -"message" : [ "You may get a different result due to the upgrading to Spark >= : " ] +"message" : [ "You may get a different result due to the upgrading to" ], +"subClass" : { + "DATETIME_PATTERN_RECOGNITION" : { +"message" : [ " Spark >= 3.0: \nFail to recognize pattern in the DateTimeFormatter. 1) You can set to 'LEGACY' to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"; ] + }, + "FORMAT_DATETIME_BY_NEW_PARSER" : { +"message" : [ " Spark >= 3.0: \nFail to format it to in the new formatter. You can set\n to 'LEGACY' to restore the behavior before\nSpark 3.0, or set to 'CORRECTED' and treat it as an invalid datetime string.\n" ] + }, + "PARSE_DATETIME_BY_NEW_PARSER" : { +"message" : [ " Spark >= 3.0: \nFail to parse in the new parser. You can set to 'LEGACY' to restore the behavior before Spark 3.0, or set to 'CORRECTED' and treat it as an invalid datetime string." ] + }, + "READ_ANCIENT_DATETIME" : { +"message" : [ " Spark >= 3.0: \nreading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z\nfrom files can be ambiguous, as the
[spark] branch master updated: [SPARK-39087][SQL] Improve messages of error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 040526391a4 [SPARK-39087][SQL] Improve messages of error classes 040526391a4 is described below commit 040526391a45ad610422a48c05aa69ba5133f922 Author: Max Gekk AuthorDate: Tue May 3 08:17:02 2022 +0300 [SPARK-39087][SQL] Improve messages of error classes ### What changes were proposed in this pull request? In the PR, I propose to modify error messages of the following error classes: - INVALID_JSON_SCHEMA_MAP_TYPE - INCOMPARABLE_PIVOT_COLUMN - INVALID_ARRAY_INDEX_IN_ELEMENT_AT - INVALID_ARRAY_INDEX - DIVIDE_BY_ZERO ### Why are the changes needed? To improve readability of error messages. ### Does this PR introduce _any_ user-facing change? Yes. It changes user-facing error messages. ### How was this patch tested? By running the modified test suites: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" $ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*" $ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite" $ build/sbt "test:testOnly *SparkThrowableSuite" ``` Closes #36428 from MaxGekk/error-class-improve-msg. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 12 - .../org/apache/spark/SparkThrowableSuite.scala | 4 +-- .../spark/sql/errors/QueryCompilationErrors.scala | 6 ++--- .../expressions/ArithmeticExpressionSuite.scala| 30 +++--- .../expressions/CollectionExpressionsSuite.scala | 4 +-- .../catalyst/expressions/ComplexTypeSuite.scala| 4 +-- .../expressions/IntervalExpressionsSuite.scala | 10 .../expressions/StringExpressionsSuite.scala | 6 ++--- .../sql/catalyst/util/IntervalUtilsSuite.scala | 2 +- .../resources/sql-tests/results/ansi/array.sql.out | 24 - .../sql-tests/results/ansi/interval.sql.out| 4 +-- .../resources/sql-tests/results/interval.sql.out | 4 +-- .../test/resources/sql-tests/results/pivot.sql.out | 4 +-- .../sql-tests/results/postgreSQL/case.sql.out | 6 ++--- .../sql-tests/results/postgreSQL/int8.sql.out | 6 ++--- .../results/postgreSQL/select_having.sql.out | 2 +- .../results/udf/postgreSQL/udf-case.sql.out| 6 ++--- .../udf/postgreSQL/udf-select_having.sql.out | 2 +- .../sql-tests/results/udf/udf-pivot.sql.out| 4 +-- .../apache/spark/sql/ColumnExpressionSuite.scala | 12 - .../org/apache/spark/sql/DataFrameSuite.scala | 2 +- .../sql/errors/QueryCompilationErrorsSuite.scala | 10 +++- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 8 +++--- .../sql/errors/QueryExecutionErrorsSuite.scala | 25 +- .../apache/spark/sql/execution/SQLViewSuite.scala | 4 +-- .../sql/streaming/FileStreamSourceSuite.scala | 2 +- 26 files changed, 101 insertions(+), 102 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index aa38f8b9747..eacbeec570f 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -34,7 +34,7 @@ "sqlState" : "22008" }, "DIVIDE_BY_ZERO" : { -"message" : [ "divide by zero. To return NULL instead, use 'try_divide'. If necessary set to false (except for ANSI interval type) to bypass this error." ], +"message" : [ "Division by zero. To return NULL instead, use `try_divide`. If necessary set to false (except for ANSI interval type) to bypass this error." ], "sqlState" : "22012" }, "DUPLICATE_KEY" : { @@ -72,7 +72,7 @@ "message" : [ "Grouping sets size cannot be greater than " ] }, "INCOMPARABLE_PIVOT_COLUMN" : { -"message" : [ "Invalid pivot column ''. Pivot columns must be comparable." ], +"message" : [ "Invalid pivot column . Pivot columns must be comparable." ], "sqlState" : "42000" }, "INCOMPATIBLE_DATASOURCE_REGISTER" : { @@ -89,10 +89,10 @@ "message" : [ "" ] }, "INVALID_ARRAY_INDEX" : { -"message" : [ "Invalid index: , numElements: . If necessary set to false to bypass this error." ] +"message" : [ "The index is out of bounds. The array has elements. If necessary set to false to bypass this error." ] }, "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : { -"mes
[spark] branch master updated (81786a2e960 -> 501519e5a52)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 81786a2e960 [SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME add 501519e5a52 [SPARK-38729][SQL][TESTS] Test the error class: FAILED_SET_ORIGINAL_PERMISSION_BACK No new revisions were added by this update. Summary of changes: .../spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../sql/errors/QueryExecutionErrorsSuite.scala | 34 +- 2 files changed, 34 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 81786a2e960 [SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME 81786a2e960 is described below commit 81786a2e96018ded474b353c004ac2f63fde Author: panbingkun AuthorDate: Sun May 1 11:35:09 2022 +0300 [SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME ## What changes were proposed in this pull request? This PR aims to add a test for the error class INVALID_FIELD_NAME to `QueryCompilationErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36404 from panbingkun/SPARK-38737. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../spark/sql/errors/QueryCompilationErrorsSuite.scala | 14 ++ 1 file changed, 14 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 1115db07f21..8fffccbed40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -513,6 +513,20 @@ class QueryCompilationErrorsSuite msg = "Invalid pivot value 'struct(col1, dotnet, col2, Experts)': value data type " + "struct does not match pivot column data type int") } + + test("INVALID_FIELD_NAME: add a nested field for not struct parent") { +withTable("t") { + sql("CREATE TABLE t(c struct, m string) USING parquet") + + val e = intercept[AnalysisException] { +sql("ALTER TABLE t ADD COLUMNS (m.n int)") + } + checkErrorClass( +exception = e, +errorClass = "INVALID_FIELD_NAME", +msg = "Field name m.n is invalid: m is not a struct.; line 1 pos 27") +} + } } class MyCastToString extends SparkUserDefinedFunction( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-38700][SQL] Use error classes in the execution errors of save mode
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b30d1d41414 [SPARK-38700][SQL] Use error classes in the execution errors of save mode b30d1d41414 is described below commit b30d1d41414e200f1cc7ec9675e5c013bdf5b214 Author: panbingkun AuthorDate: Sun May 1 10:34:31 2022 +0300 [SPARK-38700][SQL] Use error classes in the execution errors of save mode ### What changes were proposed in this pull request? Migrate the following errors in QueryExecutionErrors: * unsupportedSaveModeError -> UNSUPPORTED_SAVE_MODE ### Why are the changes needed? Porting execution errors of unsupported saveMode to new error framework. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add new UT. Closes #36350 from panbingkun/SPARK-38700. Authored-by: panbingkun Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 11 .../main/scala/org/apache/spark/ErrorInfo.scala| 6 ++--- .../spark/sql/errors/QueryExecutionErrors.scala| 9 +-- .../InsertIntoHadoopFsRelationCommand.scala| 2 +- .../sql/errors/QueryExecutionErrorsSuite.scala | 31 -- 5 files changed, 51 insertions(+), 8 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 4908a9b6c2e..aa38f8b9747 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -246,6 +246,17 @@ "UNSUPPORTED_GROUPING_EXPRESSION" : { "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ] }, + "UNSUPPORTED_SAVE_MODE" : { +"message" : [ "The save mode is not supported for: " ], +"subClass" : { + "EXISTENT_PATH" : { +"message" : [ "an existent path." ] + }, + "NON_EXISTENT_PATH" : { +"message" : [ "a not existent path." ] + } +} + }, "UNTYPED_SCALA_UDF" : { "message" : [ "You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. To get rid of this error, you could:\n1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`\n2. use Java UDF APIs, e.g. `udf(ne [...] }, diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala index a21f33e8833..0447572bb1c 100644 --- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala +++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala @@ -80,9 +80,9 @@ private[spark] object SparkThrowableHelper { val errorSubInfo = subClass.getOrElse(subErrorClass, throw new IllegalArgumentException(s"Cannot find sub error class '$subErrorClass'")) val subMessageParameters = messageParameters.tail - "[" + errorClass + "." + subErrorClass + "] " + errorInfo.messageFormat + - String.format(errorSubInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), - subMessageParameters: _*) + "[" + errorClass + "." + subErrorClass + "] " + String.format((errorInfo.messageFormat + +errorSubInfo.messageFormat).replaceAll("<[a-zA-Z0-9_-]+>", "%s"), +subMessageParameters: _*) } else { "[" + errorClass + "] " + String.format( errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 225315d3f02..4b8d76e8e6f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -592,8 +592,13 @@ object QueryExecutionErrors extends QueryErrorsBase { """.stripMargin) } - def unsupportedSaveModeError(saveMode: String, pathExists: Boolean): Throwable = { -new IllegalStateException(s"unsupported save mode $saveMode ($pathExists)") + def saveModeUnsupportedError(saveMode: Any, pathExists: Boolean): Throwable = { +pathEx
[spark] branch master updated: [SPARK-38748][SQL][TESTS] Test the error class: PIVOT_VALUE_DATA_TYPE_MISMATCH
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 30a2d9bd3a0 [SPARK-38748][SQL][TESTS] Test the error class: PIVOT_VALUE_DATA_TYPE_MISMATCH 30a2d9bd3a0 is described below commit 30a2d9bd3a0fbf19d6862f9a0904457fac16ff5d Author: panbingkun AuthorDate: Fri Apr 29 09:29:35 2022 +0300 [SPARK-38748][SQL][TESTS] Test the error class: PIVOT_VALUE_DATA_TYPE_MISMATCH ## What changes were proposed in this pull request? This PR aims to add a test for the error class PIVOT_VALUE_DATA_TYPE_MISMATCH to `QueryCompilationErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36400 from panbingkun/SPARK-38748. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../sql/errors/QueryCompilationErrorsSuite.scala | 24 +- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index ec8edd2acd6..1115db07f21 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.errors import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row} import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test} import org.apache.spark.sql.expressions.SparkUserDefinedFunction -import org.apache.spark.sql.functions.{grouping, grouping_id, sum, udf} +import org.apache.spark.sql.functions.{grouping, grouping_id, lit, struct, sum, udf} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType} @@ -491,6 +491,28 @@ class QueryCompilationErrorsSuite msg = "Field name c.X is ambiguous and has 2 matching fields in the struct.; line 1 pos 0") } } + + test("PIVOT_VALUE_DATA_TYPE_MISMATCH: can't cast pivot value data type (struct) " + +"to pivot column data type (int)") { +val df = Seq( + ("dotNET", 2012, 1), + ("Java", 2012, 2), + ("dotNET", 2012, 5000), + ("dotNET", 2013, 48000), + ("Java", 2013, 3) +).toDF("course", "year", "earnings") + +checkErrorClass( + exception = intercept[AnalysisException] { +df.groupBy(df("course")).pivot(df("year"), Seq( + struct(lit("dotnet"), lit("Experts")), + struct(lit("java"), lit("Dummies". + agg(sum($"earnings")).collect() + }, + errorClass = "PIVOT_VALUE_DATA_TYPE_MISMATCH", + msg = "Invalid pivot value 'struct(col1, dotnet, col2, Experts)': value data type " + +"struct does not match pivot column data type int") + } } class MyCastToString extends SparkUserDefinedFunction( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to UNSUPPORTED_FEATURE
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new af016d9eb87 [SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to UNSUPPORTED_FEATURE af016d9eb87 is described below commit af016d9eb87dcdd2423dc6eae691a52a5a23ae63 Author: Serge Rielau AuthorDate: Fri Apr 29 08:57:36 2022 +0300 [SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to UNSUPPORTED_FEATURE ### What changes were proposed in this pull request? UNSUPPORTED_OPERATION will be removed and replaced with the existing UNSUPPORTED_FEATURE. This effects three errors: ARROW TIMESTAMP, ORC TMESTAMP TO TIMESTAMP_NTZ and ORC TMESTAMP_NTZ TO TIMESTAMP ### Why are the changes needed? Clean up ERROR CLASSES before publishing them. ### Does this PR introduce _any_ user-facing change? No, this is still internal, unreleased code ### How was this patch tested? Run existing QueryExecutionErrorsSuite Closes #36385 from srielau/SPARK-39050-UNSUPPORTED_OPERATION-to-UNSUPPORTED_FEATURE. Authored-by: Serge Rielau Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 6 ++--- .../spark/sql/errors/QueryExecutionErrors.scala| 30 ++ .../org/apache/spark/sql/util/ArrowUtils.scala | 2 +- .../apache/spark/sql/util/ArrowUtilsSuite.scala| 4 +-- .../sql/errors/QueryExecutionErrorsSuite.scala | 23 + 5 files changed, 31 insertions(+), 34 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 4738599685b..4908a9b6c2e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -213,6 +213,9 @@ "NATURAL_CROSS_JOIN" : { "message" : [ "NATURAL CROSS JOIN." ] }, + "ORC_TYPE_CAST" : { +"message" : [ "Unable to convert of Orc to data type ." ] + }, "PANDAS_UDAF_IN_PIVOT" : { "message" : [ "Pandas user defined aggregate function in the PIVOT clause." ] }, @@ -243,9 +246,6 @@ "UNSUPPORTED_GROUPING_EXPRESSION" : { "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ] }, - "UNSUPPORTED_OPERATION" : { -"message" : [ "The operation is not supported: " ] - }, "UNTYPED_SCALA_UDF" : { "message" : [ "You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. To get rid of this error, you could:\n1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`\n2. use Java UDF APIs, e.g. `udf(ne [...] }, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index d5e42a1dde7..225315d3f02 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -68,6 +68,11 @@ import org.apache.spark.util.CircularBuffer */ object QueryExecutionErrors extends QueryErrorsBase { + def internalMissingTimezoneIdError(): Throwable = { +new SparkIllegalStateException(errorClass = "INTERNAL_ERROR", + messageParameters = Array("Missing timezoneId where it is mandatory.")) + } + def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = { new SparkIllegalStateException(errorClass = "INTERNAL_ERROR", messageParameters = Array( @@ -1614,15 +1619,6 @@ object QueryExecutionErrors extends QueryErrorsBase { new SparkException(s"Can not load in UserDefinedType ${name} for user class ${userClass}.") } - def timeZoneIdNotSpecifiedForTimestampTypeError(): Throwable = { -new SparkUnsupportedOperationException( - errorClass = "UNSUPPORTED_OPERATION", - messageParameters = Array( -s"${toSQLType(TimestampType)} must supply timeZoneId parameter " + - s"while converting to the arrow timestamp type.") -) - } - def notPublicClassError(name: String): Throwable = { new UnsupportedOperationException( s"$name is not a public class. Only public classes are supported.") @@ -1936,18 +1932
[spark] branch master updated: [SPARK-38718][SQL][TESTS] Test the error class: AMBIGUOUS_FIELD_NAME
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 581c801e02f [SPARK-38718][SQL][TESTS] Test the error class: AMBIGUOUS_FIELD_NAME 581c801e02f is described below commit 581c801e02f97712545399f37ce6e7acac7af5b5 Author: panbingkun AuthorDate: Thu Apr 28 23:29:17 2022 +0300 [SPARK-38718][SQL][TESTS] Test the error class: AMBIGUOUS_FIELD_NAME ## What changes were proposed in this pull request? This PR aims to add a test for the error class AMBIGUOUS_FIELD_NAME to `QueryCompilationErrorsSuite`. ### Why are the changes needed? The changes improve test coverage, and document expected error messages in tests. ### Does this PR introduce any user-facing change? No ### How was this patch tested? By running new test: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36395 from panbingkun/SPARK-38718. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../spark/sql/errors/QueryCompilationErrorsSuite.scala| 15 +++ 1 file changed, 15 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 2d1e6f94925..ec8edd2acd6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -476,6 +476,21 @@ class QueryCompilationErrorsSuite checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) } } + + test("AMBIGUOUS_FIELD_NAME: alter column matching multi fields in the struct") { +withTable("t") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { +sql("CREATE TABLE t(c struct) USING parquet") + } + + checkErrorClass( +exception = intercept[AnalysisException] { + sql("ALTER TABLE t CHANGE COLUMN c.X COMMENT 'new comment'") +}, +errorClass = "AMBIGUOUS_FIELD_NAME", +msg = "Field name c.X is ambiguous and has 2 matching fields in the struct.; line 1 pos 0") +} + } } class MyCastToString extends SparkUserDefinedFunction( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ec2bfa566ed -> ecade78526b)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ec2bfa566ed [SPARK-39055][DOC] Fix documentation 404 page add ecade78526b [SPARK-38741][SQL][TESTS] Test the error class: MAP_KEY_DOES_NOT_EXIST No new revisions were added by this update. Summary of changes: .../sql/errors/QueryExecutionAnsiErrorsSuite.scala| 19 ++- 1 file changed, 18 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39052][SQL] Support Literal.create(Char, StringType)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new abc2dc03fc8 [SPARK-39052][SQL] Support Literal.create(Char, StringType) abc2dc03fc8 is described below commit abc2dc03fc8f910ab95054205cdea4e3cb25801f Author: Hyukjin Kwon AuthorDate: Thu Apr 28 07:53:50 2022 +0300 [SPARK-39052][SQL] Support Literal.create(Char, StringType) ### What changes were proposed in this pull request? This is sort of a followup of https://github.com/apache/spark/commit/54fcaafb094e299f21c18370fddb4a727c88d875. `Literal.create` should also support `Char` too. ### Why are the changes needed? To make the support of external type `Char` same as `Literla.apply`. ### Does this PR introduce _any_ user-facing change? No, this isn't exposed to users. `Literal.create(Char, StringType)` isn't also used in the current codebase internally. This PR is just for completeness. ### How was this patch tested? Unittests were added. Closes #36389 from HyukjinKwon/SPARK-39052. Authored-by: Hyukjin Kwon Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala | 1 + .../org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala | 1 + .../spark/sql/catalyst/expressions/LiteralExpressionSuite.scala | 4 3 files changed, 6 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala index 3e6d31e79b7..263d3734217 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala @@ -499,6 +499,7 @@ object CatalystTypeConverters { */ def convertToCatalyst(a: Any): Any = a match { case s: String => StringConverter.toCatalyst(s) +case c: Char => StringConverter.toCatalyst(c.toString) case d: Date => DateConverter.toCatalyst(d) case ld: LocalDate => LocalDateConverter.toCatalyst(ld) case t: Timestamp => TimestampConverter.toCatalyst(t) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala index b559e219882..bf194a2288b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala @@ -152,6 +152,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper { val converter = CatalystTypeConverters.createToCatalystConverter(StringType) val expected = UTF8String.fromString("X") assert(converter(chr) === expected) +assert(CatalystTypeConverters.convertToCatalyst('a') === UTF8String.fromString("a")) } test("SPARK-33390: Make Literal support char array") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index 6ce51f1eec8..80e7a3206aa 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -247,6 +247,10 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { // scalastyle:on } + test("SPARK-39052: Support Char in Literal.create") { +checkEvaluation(Literal.create('a', StringType), "a") + } + test("construct literals from java.time.LocalDate") { Seq( LocalDate.of(1, 1, 1), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39047][SQL] Replace the error class ILLEGAL_SUBSTRING by INVALID_PARAMETER_VALUE
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9dcc24c36f6 [SPARK-39047][SQL] Replace the error class ILLEGAL_SUBSTRING by INVALID_PARAMETER_VALUE 9dcc24c36f6 is described below commit 9dcc24c36f6fcdf43bf66fe50415be575f7b2918 Author: Max Gekk AuthorDate: Thu Apr 28 07:46:44 2022 +0300 [SPARK-39047][SQL] Replace the error class ILLEGAL_SUBSTRING by INVALID_PARAMETER_VALUE ### What changes were proposed in this pull request? In the PR, I propose to remove the `ILLEGAL_SUBSTRING` error class, and use `INVALID_PARAMETER_VALUE` in the case when the `strfmt` parameter of the `format_string()` function contains `%0$`. The last value is handled differently by JDKs: _"... Java 8 and Java 11 uses it as "%1$", and Java 17 throws IllegalFormatArgumentIndexException(Illegal format argument index = 0)"_. ### Why are the changes needed? To improve code maintenance and user experience with Spark SQL by reducing the number of user-facing error classes. ### Does this PR introduce _any_ user-facing change? Yes, it changes user-facing error message. Before: ```sql spark-sql> select format_string('%0$s', 'Hello'); Error in query: [ILLEGAL_SUBSTRING] The argument_index of string format cannot contain position 0$.; line 1 pos 7 ``` After: ```sql spark-sql> select format_string('%0$s', 'Hello'); Error in query: [INVALID_PARAMETER_VALUE] The value of parameter(s) 'strfmt' in `format_string` is invalid: expects %1$, %2$ and so on, but got %0$.; line 1 pos 7 ``` ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *SparkThrowableSuite" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z text.sql" $ build/sbt "test:testOnly *QueryCompilationErrorsSuite" ``` Closes #36380 from MaxGekk/error-class-ILLEGAL_SUBSTRING. Authored-by: Max Gekk Signed-off-by: Max Gekk --- core/src/main/resources/error/error-classes.json | 3 --- .../apache/spark/sql/catalyst/expressions/stringExpressions.scala | 3 +-- .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala | 7 --- .../src/test/resources/sql-tests/results/postgreSQL/text.sql.out | 2 +- .../org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala | 7 --- 5 files changed, 10 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 673866e6c35..4738599685b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -71,9 +71,6 @@ "GROUPING_SIZE_LIMIT_EXCEEDED" : { "message" : [ "Grouping sets size cannot be greater than " ] }, - "ILLEGAL_SUBSTRING" : { -"message" : [ " cannot contain ." ] - }, "INCOMPARABLE_PIVOT_COLUMN" : { "message" : [ "Invalid pivot column ''. Pivot columns must be comparable." ], "sqlState" : "42000" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 976caeb3502..9089ff46637 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -1898,8 +1898,7 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC */ private def checkArgumentIndexNotZero(expression: Expression): Unit = expression match { case StringLiteral(pattern) if pattern.contains("%0$") => - throw QueryCompilationErrors.illegalSubstringError( -"The argument_index of string format", "position 0$") + throw QueryCompilationErrors.zeroArgumentIndexError() case _ => // do nothing } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 7f212ed5891..3d379fb4f71 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -66,10 +66,11 @@ object QueryCompilationErrors extends QueryErrorsBase { messageParameters = Array(sizeLimit.toString))
[spark] branch branch-3.3 updated (b3ecff34ab6 -> b25276f4385)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git from b3ecff34ab6 [SPARK-34079][SQL][FOLLOW-UP] Revert some changes in InjectRuntimeFilterSuite add b25276f4385 [SPARK-39015][SQL][3.3] Remove the usage of toSQLValue(v) without an explicit type No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/expressions/Cast.scala | 58 -- .../expressions/complexTypeExtractors.scala| 5 +- .../spark/sql/catalyst/util/DateTimeUtils.scala| 14 -- .../spark/sql/catalyst/util/IntervalUtils.scala| 23 + .../apache/spark/sql/errors/QueryErrorsBase.scala | 14 ++ .../spark/sql/errors/QueryExecutionErrors.scala| 47 ++ .../scala/org/apache/spark/sql/types/Decimal.scala | 21 +--- .../org/apache/spark/sql/types/numerics.scala | 13 +++-- .../catalyst/expressions/AnsiCastSuiteBase.scala | 3 +- .../test/resources/sql-tests/inputs/ansi/map.sql | 1 + .../resources/sql-tests/results/ansi/map.sql.out | 14 +- 11 files changed, 125 insertions(+), 88 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (e49147af4a8 -> 4e84f339973)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from e49147af4a8 [SPARK-39015][SQL] Remove the usage of toSQLValue(v) without an explicit type add 4e84f339973 [SPARK-39027][SQL] Output SQL statements in error messages in upper case and w/o double quotes No new revisions were added by this update. Summary of changes: python/pyspark/sql/tests/test_udf.py | 2 +- .../apache/spark/sql/errors/QueryErrorsBase.scala| 3 +-- .../ExtractPythonUDFFromJoinConditionSuite.scala | 2 +- .../resources/sql-tests/results/describe.sql.out | 4 ++-- .../sql/errors/QueryCompilationErrorsSuite.scala | 6 +++--- .../spark/sql/errors/QueryParsingErrorsSuite.scala | 20 ++-- .../spark/sql/execution/command/DDLParserSuite.scala | 4 ++-- 7 files changed, 20 insertions(+), 21 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (d05e01d5402 -> e49147af4a8)
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from d05e01d5402 [SPARK-34079][SQL][FOLLOW-UP] Revert some changes in InjectRuntimeFilterSuite add e49147af4a8 [SPARK-39015][SQL] Remove the usage of toSQLValue(v) without an explicit type No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/expressions/Cast.scala | 58 -- .../expressions/complexTypeExtractors.scala| 5 +- .../spark/sql/catalyst/util/DateTimeUtils.scala| 14 -- .../spark/sql/catalyst/util/IntervalUtils.scala| 23 + .../apache/spark/sql/errors/QueryErrorsBase.scala | 14 ++ .../spark/sql/errors/QueryExecutionErrors.scala| 47 ++ .../scala/org/apache/spark/sql/types/Decimal.scala | 21 +--- .../org/apache/spark/sql/types/numerics.scala | 13 +++-- .../catalyst/expressions/AnsiCastSuiteBase.scala | 3 +- .../test/resources/sql-tests/inputs/ansi/map.sql | 1 + .../resources/sql-tests/results/ansi/map.sql.out | 14 +- .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 5 +- 12 files changed, 128 insertions(+), 90 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39028][SQL] Use SparkDateTimeException when casting to datetime types failed
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new ead45889278 [SPARK-39028][SQL] Use SparkDateTimeException when casting to datetime types failed ead45889278 is described below commit ead45889278e8c5f71dc2ff2c7b020592e5e897f Author: Gengliang Wang AuthorDate: Tue Apr 26 22:06:07 2022 +0300 [SPARK-39028][SQL] Use SparkDateTimeException when casting to datetime types failed ### What changes were proposed in this pull request? Use SparkDateTimeException when casting to datetime types failed ### Why are the changes needed? It is more reasonable to throw `SparkDateTimeException` instead of `java.time.DateTimeException` ### Does this PR introduce _any_ user-facing change? Yes, a minor change for the exception type. ### How was this patch tested? UT Closes #36362 from gengliangwang/datetimeException. Authored-by: Gengliang Wang Signed-off-by: Max Gekk --- .../spark/sql/errors/QueryExecutionErrors.scala | 5 ++--- .../resources/sql-tests/results/ansi/cast.sql.out| 20 ++-- .../resources/sql-tests/results/ansi/date.sql.out| 4 ++-- .../results/ansi/datetime-parsing-invalid.sql.out| 8 .../sql-tests/results/ansi/interval.sql.out | 16 .../results/postgreSQL/window_part3.sql.out | 2 +- .../results/timestampNTZ/timestamp-ansi.sql.out | 4 ++-- 7 files changed, 29 insertions(+), 30 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 59172682925..dd45f62ac09 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1019,9 +1019,8 @@ object QueryExecutionErrors extends QueryErrorsBase { } else { toSQLValue(value) } -new DateTimeException(s"Invalid input syntax for type ${toSQLType(to)}: $valueString. " + - s"To return NULL instead, use 'try_cast'. If necessary set ${SQLConf.ANSI_ENABLED.key} " + - s"to false to bypass this error." + errorContext) +new SparkDateTimeException("INVALID_SYNTAX_FOR_CAST", + Array(toSQLType(to), valueString, SQLConf.ANSI_ENABLED.key, errorContext)) } def registeringStreamingQueryListenerError(e: Exception): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index 96db4f2db42..566e27a0e20 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -697,8 +697,8 @@ select cast('a' as date) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "DATE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "DATE": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as date) ^ @@ -717,8 +717,8 @@ select cast('a' as timestamp) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "TIMESTAMP": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. == SQL(line 1, position 7) == select cast('a' as timestamp) ^^ @@ -737,8 +737,8 @@ select cast('a' as timestamp_ntz) -- !query schema struct<> -- !query output -java.time.DateTimeException -Invalid input syntax for type "TIMESTAMP_NTZ": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this error. +org.apache.spark.SparkDateTimeException +[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "TIMESTAMP_NTZ": 'a'. To return NULL instead, use 'try_cast'. If necessary set spark.sql
[spark] branch master updated: [SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` from SQLQuerySuite to QueryCompilationErrorsSuite
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new da51dc7aa76 [SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` from SQLQuerySuite to QueryCompilationErrorsSuite da51dc7aa76 is described below commit da51dc7aa7674f158fb82f9f735af7d46f6a9399 Author: panbingkun AuthorDate: Mon Apr 25 21:53:17 2022 +0300 [SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` from SQLQuerySuite to QueryCompilationErrorsSuite ### What changes were proposed in this pull request? This pr aims to move tests for the error class MISSING_COLUMN from SQLQuerySuite to QueryCompilationErrorsSuite, it's a followup of SPARK-37935. ### Why are the changes needed? To improve code maintenance. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? By running the moved tests: ``` $ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*" ``` Closes #36280 from panbingkun/SPARK-38742. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 38 - .../sql/errors/QueryCompilationErrorsSuite.scala | 63 ++ 2 files changed, 63 insertions(+), 38 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 70b38db034f..4d384d3286b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1114,31 +1114,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark ) } - test("SPARK-17863: SELECT distinct does not work correctly if order by missing attribute") { -checkAnswer( - sql("""select distinct struct.a, struct.b - |from ( - | select named_struct('a', 1, 'b', 2, 'c', 3) as struct - | union all - | select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp - |order by a, b - |""".stripMargin), - Row(1, 2) :: Nil) - -val error = intercept[AnalysisException] { - sql("""select distinct struct.a, struct.b -|from ( -| select named_struct('a', 1, 'b', 2, 'c', 3) as struct -| union all -| select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp -|order by struct.a, struct.b -|""".stripMargin) -} -assert(error.getErrorClass == "MISSING_COLUMN") -assert(error.messageParameters.sameElements(Array("struct.a", "a, b"))) - - } - test("cast boolean to string") { // TODO Ensure true/false string letter casing is consistent with Hive in all cases. checkAnswer( @@ -2734,19 +2709,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("SPARK-21335: support un-aliased subquery") { -withTempView("v") { - Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v") - checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1)) - - val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)")) - assert(e.getErrorClass == "MISSING_COLUMN") - assert(e.messageParameters.sameElements(Array("v.i", "__auto_generated_subquery_name.i"))) - - checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1)) -} - } - test("SPARK-21743: top-most limit should not cause memory leak") { // In unit test, Spark will fail the query if memory leak detected. spark.range(100).groupBy("id").count().limit(1).collect() diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 8b63ba52ab8..f1325a68366 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -409,6 +409,69 @@ class QueryCompilationErrorsSuite "can only contain StringType as a key type for a MapType." ) } + + test("MISSING_COLUMN: SELECT distinct does not work correctly " + +"if order by missing attribute") { +checkAnswer( + sql( +&quo