from:"maxgekk"

svn commit: r54854 - /dev/spark/v3.3.0-rc5-bin/

2022-06-04 Thread maxgekk

Author: maxgekk
Date: Sat Jun  4 09:15:39 2022
New Revision: 54854

Log:
Apache Spark v3.3.0-rc5

Added:
dev/spark/v3.3.0-rc5-bin/
dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc5-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc5-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.asc Sat Jun  4 09:15:39 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKbIlYTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02YyD/wPYvJ8VvweSAAYMFrbIU66mZdfqcS3
+IXY1KjmwzirOnWPN9ovrcwLjHKFkWtsOLVDum4x4ffQoZUBKn+4xz/FGQx5j70Uw
+pJ+GXXWQSBrhIs/CApCDz98Fx4KX+u6d/0qr5fpftOPJIQn/D9nwPOlF3NBaIhOv
+jYqvZKEXqVgkZ/TSVnFHP4BRUYW7norV3F6s229KVvdvHE1wlUt3TnMk6ouSEmgM
+a4AZXxryhD0BtkB+9+8WmVWjYQjX8NlPp4wB7fi2p4RfSLKDiDWP+ompSqwh65GD
+k1hc32cgEQIAeVrh5O1ssT65PPOjNacVKzjRojujo1esKFyzaBdl6Ew04aCtsliF
+mEO9XI1Jh+NijDnhxcRA8gck+hI/AqDf9aXUcSWyQm/BclygtHs/UXPYWPhgi1jm
+4pGPBtsYOESCU0PewomFbwU36nMX/roLPJRGxk3m1ItxxU9FfPEYpRCaBa4KQxzB
+g4gtAlYs3CokukoskZMl6nF22CnCaB/1PzaFExp7Tys9UgX6Pv5Vf0gwc94wRvKK
+9RWtbAsvL6Cqd5pwu8cmauS3++BIGq6r09bzLvL5hG2fQwZ4jrIZqyFBSoxghSTb
+uulNdjthA+c82How6/ACzTrTrtTh8LammrIuX95E545fMdDGBO2DYfjrK1g2BprC
+fKF4UyooOJUriA==
+=YOtt
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc5-bin/SparkR_3.3.0.tar.gz.sha512 Sat Jun  4 09:15:39 2022
@@ -0,0 +1 @@
+1b9fb801c955e1038122804678defdd2eaba0f26dd501e09a2f61e13c77b9292bf14ca9c25a8561ce23ff4ee50ebad6d047a34696394f520f059f8e32dc91a9a
  SparkR_3.3.0.tar.gz

Added: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc5-bin/pyspark-3.3.0.tar.gz.asc Sat Jun  4 09:15:39 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKbIlkTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q01pkEACwe03A1jrjWnAN6evlwk0xxMugbZI+
+2xNUuHOAPNc6Z1rsYuZnh8WCHKVo/Ik0JEdpDAPQDGqC1Pwn4l4LFf9c6BiTTCRS
+14VsiJrERpzzBNT8lqVIT09Z2esLFjTiw7S/tXFwkSNPT6o+IZb3KxuTm6XREc1Q
+QmsbC/EfOmqxSlTdBf3Dq7T2RSSNyFHOLwdgtPUWNxSXhKGzQd6WYceUx2aCGkrv
+u/TGoPhQL+F15EmhrK5Pfrycvo4UbJrsWzBswUeQFbJ3klyQlPvOfdm/VZhWzG/a
+XGggZmTFiPEdFRJ9FRnArK9lng/8uUME/2Am9WTU28dkFRiaND/CARJ9NvYKyYIR
+TBOudzm+advHgOjiHS1FWLXG9sHdGvgjwFe/g3byzPqiCl2LmPencXCgH0lmRd/x
+H7HFp4nRQtWIVByedwSeFGJS4zZh42fWg4h7K6iP8dP4ZoepcuPGZw6qIi0P+tFh
+ATTimLDx28LhsiaRE7QP2xvYXI0yCIjeDLPGgbM9rpUapqwUMTcuDUtnFSKzV7QW
+Ly+jJpyBL6lSAy7N7e4mpCm8yEep/sdPCL/H7XF9cHCEV5Afnh/vqG63jXKxZYgz
+vRTW5oDMCn/mpxt8NxQXtiu7iXNJvAIPPJZWclCSoTBfkueQhyRCypYXF5//O6l8
+YZ4yF8LA+z0gDA==
+=2iWV
+-END PGP SIGNATURE-

Added: dev/spark

[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit bf3c472ff87ab7ec17f55e4730d6c6c9a7f299ad
Author: Maxim Gekk 
AuthorDate: Sat Jun 4 06:43:12 2022 +

Preparing development version 3.3.1-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 9479bb3bf87..0e449e841cf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.0
+Version: 3.3.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e9c4d9960b..d12f2ad73fa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 2a9acfa335e..842d63f5d38 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7b17e625d75..f7d187bf952 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c5c920e7747..53f38df8851 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 697b5a3928e..845f6659407 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ad2db11370a..8e159089193 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1a7bdee70f3..1987c133285 100644
--- a

[spark] branch branch-3.3 updated (b7e95bad882 -> bf3c472ff87)

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from b7e95bad882 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary 
incompatibilities in transformDownWithSubqueries
 add 7cf29705272 Preparing Spark release v3.3.0-rc5
 new bf3c472ff87 Preparing development version 3.3.1-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] tag v3.3.0-rc5 created (now 7cf29705272)

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to tag v3.3.0-rc5
in repository https://gitbox.apache.org/repos/asf/spark.git


  at 7cf29705272 (commit)
This tag includes the following new commits:

 new 7cf29705272 Preparing Spark release v3.3.0-rc5

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing Spark release v3.3.0-rc5

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to tag v3.3.0-rc5
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 7cf29705272ab8e8c70e8885a3664ad8ae3cd5e9
Author: Maxim Gekk 
AuthorDate: Sat Jun 4 06:43:05 2022 +

Preparing Spark release v3.3.0-rc5
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0e449e841cf..9479bb3bf87 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.1
+Version: 3.3.0
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d12f2ad73fa..2e9c4d9960b 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 842d63f5d38..2a9acfa335e 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index f7d187bf952..7b17e625d75 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 53f38df8851..c5c920e7747 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 845f6659407..697b5a3928e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 8e159089193..ad2db11370a 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1987c133285..1a7bdee70f3 100644
--- a/common/tags

[spark] branch branch-3.3 updated: [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new b7e95bad882 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary 
incompatibilities in transformDownWithSubqueries
b7e95bad882 is described below

commit b7e95bad882482168b7dd301fcfa3daf80477a7a
Author: Josh Rosen 
AuthorDate: Sat Jun 4 09:12:42 2022 +0300

[SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in 
transformDownWithSubqueries

### What changes were proposed in this pull request?

This is a followup to #36654. That PR modified the existing 
`QueryPlan.transformDownWithSubqueries` to add additional arguments for tree 
pattern pruning.

In this PR, I roll back the change to that method's signature and instead 
add a new `transformDownWithSubqueriesAndPruning` method.

### Why are the changes needed?

The original change breaks binary and source compatibility in Catalyst. 
Technically speaking, Catalyst APIs are considered internal to Spark and are 
subject to change between minor releases (see 
[source](https://github.com/apache/spark/blob/bb51add5c79558df863d37965603387d40cc4387/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala#L20-L24)),
 but I think it's nice to try to avoid API breakage when possible.

While trying to compile some custom Catalyst code, I ran into issues when 
trying to call the `transformDownWithSubqueries` method without supplying a 
tree pattern filter condition. If I do `transformDownWithSubqueries() { f} ` 
then I get a compilation error. I think this is due to the first parameter 
group containing all default parameters.

My PR's solution of adding a new `transformDownWithSubqueriesAndPruning` 
method solves this problem. It's also more consistent with the naming 
convention used for other pruning-enabled tree transformation methods.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #36765 from JoshRosen/SPARK-39259-binary-compatibility-followup.

Authored-by: Josh Rosen 
Signed-off-by: Max Gekk 
(cherry picked from commit eda6c4b9987f0515cb0aae4686c8a0ae0a3987d4)
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/optimizer/finishAnalysis.scala|  2 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala   | 22 --
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 242c799dd22..a33069051d9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -84,7 +84,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
   treePatternbits.containsPattern(CURRENT_LIKE)
 }
 
-plan.transformDownWithSubqueries(transformCondition) {
+plan.transformDownWithSubqueriesAndPruning(transformCondition) {
   case subQuery =>
 subQuery.transformAllExpressionsWithPruning(transformCondition) {
   case cd: CurrentDate =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index d0283f4d367..cc62c81b101 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
* to rewrite the whole plan, include its subqueries, in one go.
*/
   def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): 
PlanType =
-transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f)
+transformDownWithSubqueries(f)
 
   /**
* Returns a copy of this node where the given partial function has been 
recursively applied
@@ -479,10 +479,20 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
* first to this node, then this node's subqueries and finally this node's 
children.
* When the partial function does not apply to a given node, it is left 
unchanged.
*/
-  def transformDownWithSubqueries(
-cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = 
UnknownRuleId)
-(f: PartialFunction[PlanType, PlanType])
-: PlanType = {
+  def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): 
PlanType = {
+transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f)
+  }
+
+  /**
+   * This meth

[spark] branch master updated: [SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in transformDownWithSubqueries

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new eda6c4b9987 [SPARK-39259][SQL][FOLLOWUP] Fix source and binary 
incompatibilities in transformDownWithSubqueries
eda6c4b9987 is described below

commit eda6c4b9987f0515cb0aae4686c8a0ae0a3987d4
Author: Josh Rosen 
AuthorDate: Sat Jun 4 09:12:42 2022 +0300

[SPARK-39259][SQL][FOLLOWUP] Fix source and binary incompatibilities in 
transformDownWithSubqueries

### What changes were proposed in this pull request?

This is a followup to #36654. That PR modified the existing 
`QueryPlan.transformDownWithSubqueries` to add additional arguments for tree 
pattern pruning.

In this PR, I roll back the change to that method's signature and instead 
add a new `transformDownWithSubqueriesAndPruning` method.

### Why are the changes needed?

The original change breaks binary and source compatibility in Catalyst. 
Technically speaking, Catalyst APIs are considered internal to Spark and are 
subject to change between minor releases (see 
[source](https://github.com/apache/spark/blob/bb51add5c79558df863d37965603387d40cc4387/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/package.scala#L20-L24)),
 but I think it's nice to try to avoid API breakage when possible.

While trying to compile some custom Catalyst code, I ran into issues when 
trying to call the `transformDownWithSubqueries` method without supplying a 
tree pattern filter condition. If I do `transformDownWithSubqueries() { f} ` 
then I get a compilation error. I think this is due to the first parameter 
group containing all default parameters.

My PR's solution of adding a new `transformDownWithSubqueriesAndPruning` 
method solves this problem. It's also more consistent with the naming 
convention used for other pruning-enabled tree transformation methods.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

Existing tests.

Closes #36765 from JoshRosen/SPARK-39259-binary-compatibility-followup.

Authored-by: Josh Rosen 
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/optimizer/finishAnalysis.scala|  2 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala   | 22 --
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 242c799dd22..a33069051d9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -84,7 +84,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
   treePatternbits.containsPattern(CURRENT_LIKE)
 }
 
-plan.transformDownWithSubqueries(transformCondition) {
+plan.transformDownWithSubqueriesAndPruning(transformCondition) {
   case subQuery =>
 subQuery.transformAllExpressionsWithPruning(transformCondition) {
   case cd: CurrentDate =>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index d0283f4d367..cc62c81b101 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -454,7 +454,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
* to rewrite the whole plan, include its subqueries, in one go.
*/
   def transformWithSubqueries(f: PartialFunction[PlanType, PlanType]): 
PlanType =
-transformDownWithSubqueries(AlwaysProcess.fn, UnknownRuleId)(f)
+transformDownWithSubqueries(f)
 
   /**
* Returns a copy of this node where the given partial function has been 
recursively applied
@@ -479,10 +479,20 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
* first to this node, then this node's subqueries and finally this node's 
children.
* When the partial function does not apply to a given node, it is left 
unchanged.
*/
-  def transformDownWithSubqueries(
-cond: TreePatternBits => Boolean = AlwaysProcess.fn, ruleId: RuleId = 
UnknownRuleId)
-(f: PartialFunction[PlanType, PlanType])
-: PlanType = {
+  def transformDownWithSubqueries(f: PartialFunction[PlanType, PlanType]): 
PlanType = {
+transformDownWithSubqueriesAndPruning(AlwaysProcess.fn, UnknownRuleId)(f)
+  }
+
+  /**
+   * This method is the top-down (pre-order) counterpart of 
transformUpWithSubqueries.
+   * Returns a copy of this n

svn commit: r54845 - in /dev/spark/v3.3.0-rc4-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-06-03 Thread maxgekk

Author: maxgekk
Date: Fri Jun  3 12:28:47 2022
New Revision: 54845

Log:
Apache Spark v3.3.0-rc4 docs


[This commit notification would consist of 2665 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54843 - /dev/spark/v3.3.0-rc4-bin/

2022-06-03 Thread maxgekk

Author: maxgekk
Date: Fri Jun  3 11:54:40 2022
New Revision: 54843

Log:
Apache Spark v3.3.0-rc4

Added:
dev/spark/v3.3.0-rc4-bin/
dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc4-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc4-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.asc Fri Jun  3 11:54:40 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKZ9hwTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02g6EADly9nJXABQs9frXWjgUexvm5TY6+lY
+mbUg3K+faPfljt1NKRjqzkue5ePMm6zm2x2Sj33Rco9iIGQk8H3BKc+6IIOreknJ
+bgGBmZ/ffo7NM2RlReVTKUuVllrFtmXECznG+o4K2w8HrOr498KtXQ2eE33XKG2h
+SzDhMyn6VIIal2FDwc63Edyh2CV89wQpHOFhrhMQbhBziV/IQ5d4ggrbMB+WOVQi
+IK5l0PqUEB+8LYODMC2F5OVt8p0VRr8OOv5YzA6/3Dca5hKHElbDqDgU0KVFQR2d
+03CHh3DmQP7QDfsGN4z+w/VbXu9oBLPeCd4N8mxIRwReqJUuGYrkpgOa1X+5wPKN
+NfR4LBnde7MiBWaonKl/UtvyuYqjA1bxIi/Ff0juhzpWkffLz/dB434HqJe2wArA
+B/wjzcYKkcMt+402si0/B00rjGS2bC8tuTnQbppr1Ln+7i9qDrX0WBzaqSeHAR2l
+J9dwPrGf0w0XPni0fqM3+tZyIkIxWCjhBT4OgBYX/yT3EyBj3KRTjVkpJ3In/fpe
+YD90gZGKR8/YdU0cbnKA6oV9vC3aH8fXUC8gM74cot9OLvczBTYG1GwLVh86e7VG
+qMBcNSxJabiK0uEI2mt09eXrAINxAlw+1vi2NM0ZuAZ0j5pi/SZu23QIiSu8FiIt
+AaoHVlpVgkCL+g==
+=tqAA
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc4-bin/SparkR_3.3.0.tar.gz.sha512 Fri Jun  3 11:54:40 2022
@@ -0,0 +1 @@
+c53dcb750d9c7ace040b9c6a11661aaea3bdd0500b0da688521fb6a0989ad95dba82655b2c523fbcb6ded11f9c2c81542263fff4d7e28f1e06e7e697c0299bc4
  SparkR_3.3.0.tar.gz

Added: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc4-bin/pyspark-3.3.0.tar.gz.asc Fri Jun  3 11:54:40 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKZ9h4THG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q02biEACsBNascO1EuZR/a4+rjpSP5RVUC6KD
+8GL8oXtB6KKMy4wUlYPj3xODg3AI7L/9+OQ3lAUpSTgUBr3RvzKEgyhxUYSyTdx4
+CIv7r1ft1NDgYA59sreFu2YuKMY6CsyP9Ze6KSHG2zWxAps9VPN/Ar9dzGUFFC22
+0MdZVXmnl3Ea2KXrxCPINH6p1xANbmQA+G3gLX73oT3z1jCzwbSxubWhj6Yw55YQ
+sMIvWT/4IIkYldEDaGVmZWCAQ/UyCXiLRraymmG2DQVhAeoHxGo5jxdggnRLlSqW
+0J5PWmtNUHjj9g9pFjbm76x4BJLUGuLptnumvbkqYgh5X6h+OKBWMw5ceIpMR2/f
+vPRGa9y1Bk0WluNeN3IIsMe7UuFoJBIuCeOi8UmTbVGoV+naY5psSMtJPylQ8mJR
+c8nY8gXCWeMCWxokNQQIWxXZpRMwWlojoV2AmRUR+nYG+roebyhI3H4rU6SiVXlP
+vae+kIjPQCILPqEwRlCa+vfqj9ukfE0AmusnGhN3/Mc0qOTtkOqRVd2+KHpF+i4C
+JnXqqJhtg4KUCsLqey3gUJsjXgTAHIXxISYWzPWQYBrKBnXBA0/GP1+cow9vTeuB
+TzmirWfaVBv4DkSoWzQ0q8ils3aKsiML07VSyhcVCTWQcoLJ+WR8z3kV+a0vYr5j
+oY4OgV1u6UmElA==
+=n+my
+-END PGP SIGNATURE-

Added: dev/spark

[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 03012f432ac24049291c71415a32677f612a7afd
Author: Maxim Gekk 
AuthorDate: Fri Jun 3 09:20:38 2022 +

Preparing development version 3.3.1-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 9479bb3bf87..0e449e841cf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.0
+Version: 3.3.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e9c4d9960b..d12f2ad73fa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 2a9acfa335e..842d63f5d38 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7b17e625d75..f7d187bf952 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c5c920e7747..53f38df8851 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 697b5a3928e..845f6659407 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ad2db11370a..8e159089193 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1a7bdee70f3..1987c133285 100644
--- a

[spark] branch branch-3.3 updated (61d22b6f313 -> 03012f432ac)

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 61d22b6f313 [SPARK-39371][DOCS][CORE] Review and fix issues in 
Scala/Java API docs of Core module
 add 4e3599bc11a Preparing Spark release v3.3.0-rc4
 new 03012f432ac Preparing development version 3.3.1-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] tag v3.3.0-rc4 created (now 4e3599bc11a)

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to tag v3.3.0-rc4
in repository https://gitbox.apache.org/repos/asf/spark.git


  at 4e3599bc11a (commit)
This tag includes the following new commits:

 new 4e3599bc11a Preparing Spark release v3.3.0-rc4

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing Spark release v3.3.0-rc4

2022-06-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to tag v3.3.0-rc4
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 4e3599bc11a1cb0ea9fc819e7f752d2228e54baf
Author: Maxim Gekk 
AuthorDate: Fri Jun 3 09:20:31 2022 +

Preparing Spark release v3.3.0-rc4
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0e449e841cf..9479bb3bf87 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.1
+Version: 3.3.0
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d12f2ad73fa..2e9c4d9960b 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 842d63f5d38..2a9acfa335e 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index f7d187bf952..7b17e625d75 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 53f38df8851..c5c920e7747 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 845f6659407..697b5a3928e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 8e159089193..ad2db11370a 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1987c133285..1a7bdee70f3 100644
--- a/common/tags

[spark] branch master updated (9e6f2dd7268 -> 873ad5596b5)

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN`
 add 873ad5596b5 [SPARK-37623][SQL] Support ANSI Aggregate Function: 
regr_intercept

No new revisions were added by this update.

Summary of changes:
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  1 +
 .../expressions/aggregate/Covariance.scala |  4 +-
 .../expressions/aggregate/linearRegression.scala   | 57 +-
 .../aggregate/AggregateExpressionSuite.scala   | 17 +++
 .../sql-functions/sql-expression-schema.md |  1 +
 .../sql-tests/inputs/linear-regression.sql |  6 +++
 .../inputs/postgreSQL/aggregates_part1.sql |  2 +-
 .../inputs/udf/postgreSQL/udf-aggregates_part1.sql |  2 +-
 .../sql-tests/results/linear-regression.sql.out| 35 -
 .../results/postgreSQL/aggregates_part1.sql.out| 10 +++-
 .../udf/postgreSQL/udf-aggregates_part1.sql.out| 10 +++-
 11 files changed, 136 insertions(+), 9 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new 4a0f0ff6c22 [SPARK-39259][SQL][3.3] Evaluate timestamps consistently 
in subqueries
4a0f0ff6c22 is described below

commit 4a0f0ff6c22b85cb0fc1eef842da8dbe4c90543a
Author: Ole Sasse 
AuthorDate: Fri Jun 3 09:12:26 2022 +0300

[SPARK-39259][SQL][3.3] Evaluate timestamps consistently in subqueries

### What changes were proposed in this pull request?

Apply the optimizer rule ComputeCurrentTime consistently across subqueries.

This is a backport of https://github.com/apache/spark/pull/36654.

### Why are the changes needed?

At the moment timestamp functions like now() can return different values 
within a query if subqueries are involved

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

A new unit test was added

Closes #36752 from olaky/SPARK-39259-spark_3_3.

Authored-by: Ole Sasse 
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/optimizer/finishAnalysis.scala| 41 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala   | 11 ++-
 .../optimizer/ComputeCurrentTimeSuite.scala| 89 --
 3 files changed, 95 insertions(+), 46 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index ef9c4b9af40..242c799dd22 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,14 +17,16 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import scala.collection.mutable
+import java.time.{Instant, LocalDateTime}
 
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ}
+import org.apache.spark.sql.catalyst.trees.TreePatternBits
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, 
localDateTimeToMicros}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -73,29 +75,30 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] 
{
  */
 object ComputeCurrentTime extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-val currentDates = mutable.Map.empty[String, Literal]
-val timeExpr = CurrentTimestamp()
-val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
-val currentTime = Literal.create(timestamp, timeExpr.dataType)
+val instant = Instant.now()
+val currentTimestampMicros = instantToMicros(instant)
+val currentTime = Literal.create(currentTimestampMicros, TimestampType)
 val timezone = Literal.create(conf.sessionLocalTimeZone, StringType)
-val localTimestamps = mutable.Map.empty[String, Literal]
 
-plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) {
-  case currentDate @ CurrentDate(Some(timeZoneId)) =>
-currentDates.getOrElseUpdate(timeZoneId, {
-  Literal.create(currentDate.eval().asInstanceOf[Int], DateType)
-})
-  case CurrentTimestamp() | Now() => currentTime
-  case CurrentTimeZone() => timezone
-  case localTimestamp @ LocalTimestamp(Some(timeZoneId)) =>
-localTimestamps.getOrElseUpdate(timeZoneId, {
-  Literal.create(localTimestamp.eval().asInstanceOf[Long], 
TimestampNTZType)
-})
+def transformCondition(treePatternbits: TreePatternBits): Boolean = {
+  treePatternbits.containsPattern(CURRENT_LIKE)
+}
+
+plan.transformDownWithSubqueries(transformCondition) {
+  case subQuery =>
+subQuery.transformAllExpressionsWithPruning(transformCondition) {
+  case cd: CurrentDate =>
+Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, 
cd.zoneId), DateType)
+  case CurrentTimestamp() | Now() => currentTime
+  case CurrentTimeZone() => timezone
+  case localTimestamp: LocalTimestamp =>
+val asDateTime = LocalDateTime.ofInstant(instant, 
localTimestamp.zoneId)
+Literal.create(localDateTimeToMicros(as

[spark] branch master updated: [SPARK-39320][SQL] Support aggregate function `MEDIAN`

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 9e6f2dd7268 [SPARK-39320][SQL] Support aggregate function `MEDIAN`
9e6f2dd7268 is described below

commit 9e6f2dd72686a9ac44fd4573b5a408f8a8e55fe1
Author: Jiaan Geng 
AuthorDate: Fri Jun 3 08:23:22 2022 +0300

[SPARK-39320][SQL] Support aggregate function `MEDIAN`

### What changes were proposed in this pull request?
Many mainstream database supports aggregate function `MEDIAN`.

**Syntax：**
Aggregate function
`MEDIAN(  )`

Window function
`MEDIAN(  ) OVER ( [ PARTITION BY  ] )`
**Arguments：**
expr: The expression must evaluate to a numeric data type (INTEGER, FLOAT, 
DECIMAL, or equivalent).

**Examples**：
```
select k, median(v) from aggr group by k order by k;
+---+---+
| K | MEDIAN(V) |
|---+---|
| 1 |  20.0 |
| 2 |  22.5 |
| 3 |  NULL |
+---+---+
```

### Why are the changes needed?
The mainstream database supports `MEDIAN` show below:
**Snowflake**
https://docs.snowflake.com/en/sql-reference/functions/median.html

**Oracle**

https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/MEDIAN.html#GUID-DE15705A-AC18-4416-8487-B9E1D70CE01A

**ClickHouse**

https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/median

**Redshift**
https://docs.aws.amazon.com/redshift/latest/dg/r_MEDIAN.html

**Teradata**

https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Ordered-Analytical/Window-Aggregate-Functions/MEDIAN

**DB2**
https://www.ibm.com/docs/en/db2/11.5?topic=functions-median

**Vertica**

https://www.vertica.com/docs/9.2.x/HTML/Content/Authoring/SQLReferenceManual/Functions/Analytic/MEDIANAnalytic.htm?tocpath=SQL%20Reference%20Manual%7CSQL%20Functions%7CAnalytic%20Functions%7C_20

**H2**
http://www.h2database.com/html/functions-aggregate.html#median

**Sybase**

https://infocenter.sybase.com/help/index.jsp?topic=/com.sybase.infocenter.dc01776.1601/doc/html/san1278453109663.html

**Exasol**

https://docs.exasol.com/db/latest/sql_references/functions/alphabeticallistfunctions/median.htm

**Yellowbrick**
https://www.yellowbrick.com/docs/5.2/ybd_sqlref/median.html

**Mariadb**
https://mariadb.com/kb/en/median/

**Singlestore**

https://docs.singlestore.com/db/v7.6/en/reference/sql-reference/aggregate-functions/median.html

**InfluxDB**
https://docs.influxdata.com/flux/v0.x/stdlib/universe/median/

### Does this PR introduce _any_ user-facing change?
'No'.
New feature.

### How was this patch tested?
New tests.

Closes #36714 from beliefer/SPARK-39320.

Authored-by: Jiaan Geng 
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/analysis/CheckAnalysis.scala  |   5 +-
 .../sql/catalyst/analysis/FunctionRegistry.scala   |   1 +
 .../expressions/aggregate/percentiles.scala|  26 +-
 .../sql-functions/sql-expression-schema.md |   1 +
 .../test/resources/sql-tests/inputs/group-by.sql   |  29 --
 .../resources/sql-tests/inputs/percentiles.sql | 212 +++
 .../src/test/resources/sql-tests/inputs/window.sql | 112 --
 .../resources/sql-tests/results/group-by.sql.out   |  70 +---
 .../sql-tests/results/percentiles.sql.out  | 417 +
 .../resources/sql-tests/results/window.sql.out | 229 +--
 10 files changed, 661 insertions(+), 441 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index ed2e9ba2b6b..7635918279a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
-import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
PercentileCont, PercentileDisc}
+import 
org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, 
Median, PercentileCont, PercentileDisc}
 import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, 
DecorrelateInnerQuery, InlineCTE}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -243,7 +243,8 @@ trait CheckAnalysis extends PredicateH

[spark] branch branch-3.3 updated (4da8f3a76b1 -> bc4aab5c26d)

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 4da8f3a76b1 [SPARK-39361] Don't use Log4J2's extended throwable 
conversion pattern in default logging configurations
 add bc4aab5c26d [SPARK-39295][DOCS][PYTHON][3.3] Improve documentation of 
pandas API supported list

No new revisions were added by this update.

Summary of changes:
 .../pandas_on_spark/supported_pandas_api.rst   | 62 +++---
 1 file changed, 30 insertions(+), 32 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39259][SQL] Evaluate timestamps consistently in subqueries

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 52e2717c2d1 [SPARK-39259][SQL] Evaluate timestamps consistently in 
subqueries
52e2717c2d1 is described below

commit 52e2717c2d1b6e1f449de5714b6e202074bac26f
Author: Ole Sasse 
AuthorDate: Thu Jun 2 21:42:10 2022 +0300

[SPARK-39259][SQL] Evaluate timestamps consistently in subqueries

### What changes were proposed in this pull request?

Apply the optimizer rule ComputeCurrentTime consistently across subqueries

### Why are the changes needed?

At the moment timestamp functions like now() can return different values 
within a query if subqueries are involved

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

A new unit test was added

Closes #36654 from olaky/SPARK-39259.

Authored-by: Ole Sasse 
Signed-off-by: Max Gekk 
---
 .../sql/catalyst/optimizer/finishAnalysis.scala| 41 +-
 .../spark/sql/catalyst/plans/QueryPlan.scala   | 11 ++-
 .../optimizer/ComputeCurrentTimeSuite.scala| 89 --
 3 files changed, 95 insertions(+), 46 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index ef9c4b9af40..242c799dd22 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -17,14 +17,16 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import scala.collection.mutable
+import java.time.{Instant, LocalDateTime}
 
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ}
+import org.apache.spark.sql.catalyst.trees.TreePatternBits
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, 
convertSpecialTimestamp, convertSpecialTimestampNTZ, instantToMicros, 
localDateTimeToMicros}
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
@@ -73,29 +75,30 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] 
{
  */
 object ComputeCurrentTime extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
-val currentDates = mutable.Map.empty[String, Literal]
-val timeExpr = CurrentTimestamp()
-val timestamp = timeExpr.eval(EmptyRow).asInstanceOf[Long]
-val currentTime = Literal.create(timestamp, timeExpr.dataType)
+val instant = Instant.now()
+val currentTimestampMicros = instantToMicros(instant)
+val currentTime = Literal.create(currentTimestampMicros, TimestampType)
 val timezone = Literal.create(conf.sessionLocalTimeZone, StringType)
-val localTimestamps = mutable.Map.empty[String, Literal]
 
-plan.transformAllExpressionsWithPruning(_.containsPattern(CURRENT_LIKE)) {
-  case currentDate @ CurrentDate(Some(timeZoneId)) =>
-currentDates.getOrElseUpdate(timeZoneId, {
-  Literal.create(currentDate.eval().asInstanceOf[Int], DateType)
-})
-  case CurrentTimestamp() | Now() => currentTime
-  case CurrentTimeZone() => timezone
-  case localTimestamp @ LocalTimestamp(Some(timeZoneId)) =>
-localTimestamps.getOrElseUpdate(timeZoneId, {
-  Literal.create(localTimestamp.eval().asInstanceOf[Long], 
TimestampNTZType)
-})
+def transformCondition(treePatternbits: TreePatternBits): Boolean = {
+  treePatternbits.containsPattern(CURRENT_LIKE)
+}
+
+plan.transformDownWithSubqueries(transformCondition) {
+  case subQuery =>
+subQuery.transformAllExpressionsWithPruning(transformCondition) {
+  case cd: CurrentDate =>
+Literal.create(DateTimeUtils.microsToDays(currentTimestampMicros, 
cd.zoneId), DateType)
+  case CurrentTimestamp() | Now() => currentTime
+  case CurrentTimeZone() => timezone
+  case localTimestamp: LocalTimestamp =>
+val asDateTime = LocalDateTime.ofInstant(instant, 
localTimestamp.zoneId)
+Literal.create(localDateTimeToMicros(asDateTime), TimestampNTZType)
+}
 }
   }
 }
 
-
 /**
  * Replaces the expression of CurrentDatabase with the current data

[spark] branch branch-3.3 updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new ef521d30a3b [SPARK-39354][SQL] Ensure show `Table or view not found` 
even if there are `dataTypeMismatchError` related to `Filter` at the same time
ef521d30a3b is described below

commit ef521d30a3b023213bbc3076911a93c0c0c425dc
Author: yangjie01 
AuthorDate: Thu Jun 2 13:06:14 2022 +0300

[SPARK-39354][SQL] Ensure show `Table or view not found` even if there are 
`dataTypeMismatchError` related to `Filter` at the same time

### What changes were proposed in this pull request?
After SPARK-38118,  `dataTypeMismatchError` related to `Filter` will be 
checked and throw in `RemoveTempResolvedColumn`,  this will cause compatibility 
issue with exception message presentation.

For example, the following case:

```
spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;")
spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where 
t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show
```

The expected message is

```
Table or view not found: t2
```

But the actual message is
```
org.apache.spark.sql.AnalysisException: cannot resolve 
'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires 
date type, however, ''2020-12-27'' is of string type.; line 1 pos 76
```

For forward compatibility, this pr change to only records 
`DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check  
process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis`

### Why are the changes needed?
Fix analysis exception message compatibility.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass Github Actions and add a new test case

Closes #36746 from LuciferYang/SPARK-39354.

Authored-by: yangjie01 
Signed-off-by: Max Gekk 
(cherry picked from commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89)
Signed-off-by: Max Gekk 
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala   |  7 ++-
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 -
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++--
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index ba492e58f6e..51c1d1f768f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
-import 
org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression,
 DATA_TYPE_MISMATCH_ERROR}
+import 
org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions.{Expression, 
FrameLessOffsetWindowFunction, _}
@@ -4328,10 +4328,7 @@ object RemoveTempResolvedColumn extends 
Rule[LogicalPlan] {
   case e: Expression if e.childrenResolved && 
e.checkInputDataTypes().isFailure =>
 e.checkInputDataTypes() match {
   case TypeCheckResult.TypeCheckFailure(message) =>
-e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
-e.failAnalysis(
-  s"cannot resolve '${e.sql}' due to data type mismatch: 
$message" +
-extraHintForAnsiTypeCoercionExpression(plan))
+e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
 }
   case _ =>
 })
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 9c72b9974c4..b0d1d6c2a30 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog {
 
   val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
 
+  val DATA_TYPE_MISMATCH_ERROR_MESSAGE = 
TreeNodeTag[String]("dataTypeMismatchError")
+
   protected def

[spark] branch master updated: [SPARK-39354][SQL] Ensure show `Table or view not found` even if there are `dataTypeMismatchError` related to `Filter` at the same time

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 89fdb8a6fb6 [SPARK-39354][SQL] Ensure show `Table or view not found` 
even if there are `dataTypeMismatchError` related to `Filter` at the same time
89fdb8a6fb6 is described below

commit 89fdb8a6fb6a669c458891b3abeba236e64b1e89
Author: yangjie01 
AuthorDate: Thu Jun 2 13:06:14 2022 +0300

[SPARK-39354][SQL] Ensure show `Table or view not found` even if there are 
`dataTypeMismatchError` related to `Filter` at the same time

### What changes were proposed in this pull request?
After SPARK-38118,  `dataTypeMismatchError` related to `Filter` will be 
checked and throw in `RemoveTempResolvedColumn`,  this will cause compatibility 
issue with exception message presentation.

For example, the following case:

```
spark.sql("create table t1(user_id int, auct_end_dt date) using parquet;")
spark.sql("select * from t1 join t2 on t1.user_id = t2.user_id where 
t1.auct_end_dt >= Date_sub('2020-12-27', 90)").show
```

The expected message is

```
Table or view not found: t2
```

But the actual message is
```
org.apache.spark.sql.AnalysisException: cannot resolve 
'date_sub('2020-12-27', 90)' due to data type mismatch: argument 1 requires 
date type, however, ''2020-12-27'' is of string type.; line 1 pos 76
```

For forward compatibility, this pr change to only records 
`DATA_TYPE_MISMATCH_ERROR_MESSAGE` in the `RemoveTempResolvedColumn` check  
process , and move `failAnalysis` to `CheckAnalysis#checkAnalysis`

### Why are the changes needed?
Fix analysis exception message compatibility.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Pass Github Actions and add a new test case

Closes #36746 from LuciferYang/SPARK-39354.

Authored-by: yangjie01 
Signed-off-by: Max Gekk 
---
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala   |  7 ++-
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 17 -
 .../spark/sql/catalyst/analysis/AnalysisSuite.scala | 16 ++--
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b13dede2acc..3017fc10dfd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -28,7 +28,7 @@ import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
-import 
org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.{extraHintForAnsiTypeCoercionExpression,
 DATA_TYPE_MISMATCH_ERROR}
+import 
org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions.{Expression, 
FrameLessOffsetWindowFunction, _}
@@ -4361,10 +4361,7 @@ object RemoveTempResolvedColumn extends 
Rule[LogicalPlan] {
   case e: Expression if e.childrenResolved && 
e.checkInputDataTypes().isFailure =>
 e.checkInputDataTypes() match {
   case TypeCheckResult.TypeCheckFailure(message) =>
-e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
-e.failAnalysis(
-  s"cannot resolve '${e.sql}' due to data type mismatch: 
$message" +
-extraHintForAnsiTypeCoercionExpression(plan))
+e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
 }
   case _ =>
 })
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 95b0226f00d..ed2e9ba2b6b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -50,6 +50,8 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog {
 
   val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
 
+  val DATA_TYPE_MISMATCH_ERROR_MESSAGE = 
TreeNodeTag[String]("dataTypeMismatchError")
+
   protected def failAnalysis(msg: String): Nothing = {
 throw new AnalysisException(msg)
   }
@@ -174,7 +176,20 @@ trait Ch

[spark] branch branch-3.3 updated (4bbaf3777e9 -> fef569507bc)

2022-06-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 4bbaf3777e9 [SPARK-38675][CORE] Fix race during unlock in 
BlockInfoManager
 add fef569507bc [SPARK-39346][SQL][3.3] Convert asserts/illegal state 
exception to internal errors on each phase

No new revisions were added by this update.

Summary of changes:
 .../sql/kafka010/KafkaMicroBatchSourceSuite.scala  | 11 +---
 .../main/scala/org/apache/spark/sql/Dataset.scala  | 14 +++---
 .../spark/sql/execution/QueryExecution.scala   | 31 +-
 .../sql/execution/streaming/StreamExecution.scala  |  4 ++-
 .../streaming/MicroBatchExecutionSuite.scala   |  6 +++--
 .../sql/streaming/continuous/ContinuousSuite.scala |  7 ++---
 6 files changed, 51 insertions(+), 22 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39346][SQL] Convert asserts/illegal state exception to internal errors on each phase

2022-06-01 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 8894e785eda [SPARK-39346][SQL] Convert asserts/illegal state exception 
to internal errors on each phase
8894e785eda is described below

commit 8894e785edae42a642351ad91e539324c39da8e4
Author: Max Gekk 
AuthorDate: Wed Jun 1 20:16:17 2022 +0300

[SPARK-39346][SQL] Convert asserts/illegal state exception to internal 
errors on each phase

### What changes were proposed in this pull request?
In the PR, I propose to catch asserts/illegal state exception on each phase 
of query execution: ANALYSIS, OPTIMIZATION, PLANNING, and convert them to a 
SparkException w/ the `INTERNAL_ERROR` error class.

### Why are the changes needed?
To improve user experience with Spark SQL and unify representation of 
user-facing errors.

### Does this PR introduce _any_ user-facing change?
No. The changes might affect users in corner cases only.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *KafkaMicroBatchV1SourceSuite"
$ build/sbt "test:testOnly *KafkaMicroBatchV2SourceSuite"
```

    Closes #36704 from MaxGekk/wrapby-INTERNAL_ERROR-every-phase.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../sql/kafka010/KafkaMicroBatchSourceSuite.scala  | 11 +---
 .../main/scala/org/apache/spark/sql/Dataset.scala  | 14 +++---
 .../spark/sql/execution/QueryExecution.scala   | 31 +-
 .../sql/execution/streaming/StreamExecution.scala  |  4 ++-
 .../streaming/MicroBatchExecutionSuite.scala   |  6 +++--
 .../sql/streaming/continuous/ContinuousSuite.scala |  7 ++---
 6 files changed, 51 insertions(+), 22 deletions(-)

diff --git 
a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
 
b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 2396f31b954..0a32b1b54d0 100644
--- 
a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ 
b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -34,6 +34,7 @@ import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
@@ -666,9 +667,10 @@ abstract class KafkaMicroBatchSourceSuiteBase extends 
KafkaSourceSuiteBase {
 testUtils.sendMessages(topic2, Array("6"))
   },
   StartStream(),
-  ExpectFailure[IllegalStateException](e => {
+  ExpectFailure[SparkException](e => {
+assert(e.asInstanceOf[SparkThrowable].getErrorClass === 
"INTERNAL_ERROR")
 // The offset of `topic2` should be changed from 2 to 1
-assert(e.getMessage.contains("was changed from 2 to 1"))
+assert(e.getCause.getMessage.contains("was changed from 2 to 1"))
   })
 )
   }
@@ -764,12 +766,13 @@ abstract class KafkaMicroBatchSourceSuiteBase extends 
KafkaSourceSuiteBase {
 
   testStream(df)(
 StartStream(checkpointLocation = metadataPath.getAbsolutePath),
-ExpectFailure[IllegalStateException](e => {
+ExpectFailure[SparkException](e => {
+  assert(e.asInstanceOf[SparkThrowable].getErrorClass === 
"INTERNAL_ERROR")
   Seq(
 s"maximum supported log version is v1, but encountered v9",
 "produced by a newer version of Spark and cannot be read by this 
version"
   ).foreach { message =>
-assert(e.toString.contains(message))
+assert(e.getCause.toString.contains(message))
   }
 }))
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index f00ebf51d6d..0a45cf92c6e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.{SparkException, SparkThrowable, TaskContext}
+import org.apache.spark.TaskContext
 import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.function._
@@ -3920,19 +3920,11 @@ class Dataset[

[spark] branch master updated (6d8efb515f6 -> 1e194d26ead)

2022-05-26 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 6d8efb515f6 [SPARK-39308][BUILD][SQL] Upgrade parquet to 1.12.3
 add 1e194d26ead [SPARK-39305][SQL] Add the `EQUAL_NULL()` function

No new revisions were added by this update.

Summary of changes:
 .../sql/catalyst/analysis/FunctionRegistry.scala   |  1 +
 .../sql/catalyst/expressions/predicates.scala  | 38 ++
 .../sql-functions/sql-expression-schema.md |  1 +
 .../resources/sql-tests/inputs/null-handling.sql   |  3 ++
 .../sql-tests/results/null-handling.sql.out| 16 -
 5 files changed, 58 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated (37a2416ca4c -> 6c4e07dbe38)

2022-05-25 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 37a2416ca4c [SPARK-39252][PYSPARK][TESTS] Remove flaky test_df_is_empty
 add 6c4e07dbe38 [SPARK-39255][SQL][3.3] Improve error messages

No new revisions were added by this update.

Summary of changes:
 core/src/main/resources/error/error-classes.json   | 12 ++--
 .../spark/sql/errors/QueryCompilationErrors.scala  |  4 +-
 .../apache/spark/sql/errors/QueryErrorsBase.scala  | 10 +++-
 .../spark/sql/errors/QueryExecutionErrors.scala|  2 +-
 .../apache/spark/sql/types/StructTypeSuite.scala   | 22 ---
 .../resources/sql-tests/results/ansi/cast.sql.out  | 68 +++---
 .../resources/sql-tests/results/ansi/date.sql.out  |  6 +-
 .../results/ansi/datetime-parsing-invalid.sql.out  |  4 +-
 .../sql-tests/results/ansi/interval.sql.out| 20 +++
 .../results/ansi/string-functions.sql.out  |  8 +--
 .../test/resources/sql-tests/results/pivot.sql.out |  2 +-
 .../sql-tests/results/postgreSQL/boolean.sql.out   | 32 +-
 .../sql-tests/results/postgreSQL/float4.sql.out|  8 +--
 .../sql-tests/results/postgreSQL/float8.sql.out|  8 +--
 .../sql-tests/results/postgreSQL/text.sql.out  |  4 +-
 .../results/postgreSQL/window_part2.sql.out|  2 +-
 .../results/postgreSQL/window_part3.sql.out|  2 +-
 .../results/postgreSQL/window_part4.sql.out|  2 +-
 .../results/timestampNTZ/timestamp-ansi.sql.out|  2 +-
 .../sql-tests/results/udf/udf-pivot.sql.out|  2 +-
 .../spark/sql/connector/InsertIntoTests.scala  |  4 +-
 21 files changed, 117 insertions(+), 107 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54711 - in /dev/spark/v3.3.0-rc3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-05-24 Thread maxgekk

Author: maxgekk
Date: Tue May 24 14:09:25 2022
New Revision: 54711

Log:
Apache Spark v3.3.0-rc3 docs


[This commit notification would consist of 2650 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54708 - /dev/spark/v3.3.0-rc3-bin/

2022-05-24 Thread maxgekk

Author: maxgekk
Date: Tue May 24 13:31:55 2022
New Revision: 54708

Log:
Apache Spark v3.3.0-rc3

Added:
dev/spark/v3.3.0-rc3-bin/
dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc3-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc3-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.asc Tue May 24 13:31:55 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKM3eYTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q08vEEADS36LrbaBB+bJomtt8GbqFzUXqTcwx
+FLoymqs2ObRI9zKkY1w7QkIOWdssxlQQQuwKc0sFu3i77YTjLrcmRaxa/t5zvwmf
+2fIcOqu1xFVhIUJbJ/IhLpGK1KlnbgQi2l+0iYLrB9u/VFceZmwGdLu/GBrnJ/e4
+3mNIOKGnPkMPhJi2eKPMLg161S5YMgBgcosRCCBeaxj37sR4RKQnJyYoo5mAE39B
+yd1jcT8Q7KqJI6mLTI4d7zg8djnCn/2ZPFcrgfCnKZz4g1hoXVEzyF1xxg8vHq1B
+7TDulhbBqzNABQDlKTe0xLUA0fW+0NiDy+ZG61TlqoZuBXGO0rSju2V9mnux6qw/
+hfHOuCh6pM4BG4694kV989UUt5YnVSNUyLSC5XHSQsqTgVydREtj0ETNQUSiN70y
+qenMW4gtLEOLWgRsc7Lu0g7IsPgP43kh2llL8vOkXfQVVD2L73vWT6V1iTVFkpT3
+oW0AQ9fdiFgbT4q7nEcfxUm/uDFlzgSxD70QrV8oe8aZRCUeogp1cCZQedZEM+V8
+1qKCMM/5zVCeyrpRZpO9DJdKsIskpM6mIZElOvvo3EHZnf5FDt43KNtFXg+ogWWf
+xiQoERy08pQKK+rgAMJZjA/6wL/SrDebmYrXD8WNBx54G1ZLaUurcWZqkVAZ3+Ts
+lPEabYDmVZKovA==
+=PIrx
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc3-bin/SparkR_3.3.0.tar.gz.sha512 Tue May 24 13:31:55 2022
@@ -0,0 +1 @@
+5a6b1460f360dd505009f4fac85f53c7e5e312c116734a4838713d420f74bff516799ca823c2f100f451aa80bc931d490a18b7e9c290b598dd0ef3e26e05f184
  SparkR_3.3.0.tar.gz

Added: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc3-bin/pyspark-3.3.0.tar.gz.asc Tue May 24 13:31:55 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKM3egTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q015wD/0UETD5HcWbKnScxbXEz8RvgCBFTmQs
+weSyJk5VLRW4qXXG7Q5zALwY3GPemJC9aSALhKwUGOVMGcihQBC7THTohaaPgKAw
+20pgaX4iBiSOoRaJANqxcNz1CceK/VKqH58YFlYpEbOJw0r8boCkd/pPXan2myl0
+w5hb+lE7Cw9DL8tl4W2IPmo4iNP4dI+u46yxd7Yj4W3I+jM2dHWHxIJiJl66nvDd
+6EKmvYRqGEBfcwGES6CrngIyQD4lylJ/FORQE1vVZ0TiAaN+Hqn7k5mr9FETtl8L
+HDFZRro+REB9Xz2h7d31ywWqvMnqWQ3VnKSeVBLLGfMcYP2pHxf2DJiCXaAZAtIy
+RdbnxAK4wjZM3Qe0bJY6Wm0H8lJ8GiO+EQgKGmQjMKwgSSwn/dtHWvEqqX36p0G/
+vyWH46+9I8TxL+w5vNQMdRxXmAJbye8vdyUFBv5AO2hOM2UlXLvz76VElSSVzgi/
+FNhalZ47cVDmFh1B8EsljS8WnNnFxYNNfmwxW6ds2N6nIctMsD+cKRiRdCxh/EbO
+gn7TonhvlFkEv5M4W5HmAALY1Jn/e9r/Ciy+uX4avEKiMZOvuwEkU57WSwjiUfLL
+onxlwWxf+xXQG0ELMezPP8t3QylPziol38P6MMgsRg3HAVTtcaSGsISa1oSBYOdm
+iTiGb1BmLrJ3uA==
+=0Wz6
+-END PGP SIGNATURE-

Added: dev/spark

[spark] branch master updated: [SPARK-39255][SQL] Improve error messages

2022-05-24 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 625afb4e1ae [SPARK-39255][SQL] Improve error messages
625afb4e1ae is described below

commit 625afb4e1aefda59191d79b31f8c94941aedde1e
Author: Max Gekk 
AuthorDate: Tue May 24 14:15:38 2022 +0300

[SPARK-39255][SQL] Improve error messages

### What changes were proposed in this pull request?
In the PR, I propose to improve errors of the following error classes:
1. NON_PARTITION_COLUMN - `a non-partition column name` -> `the 
non-partition column`
2. UNSUPPORTED_SAVE_MODE - `a not existent path` -> `a non existent path`.
3. INVALID_FIELD_NAME. Quote ids to follow the rules 
https://github.com/apache/spark/pull/36621.
4. FAILED_SET_ORIGINAL_PERMISSION_BACK. It is renamed to 
FAILED_PERMISSION_RESET_ORIGINAL.
5. NON_LITERAL_PIVOT_VALUES - Wrap error's expression by double quotes. The 
PR adds new helper method `toSQLExpr()` for that.
6. CAST_INVALID_INPUT - Add the recommendation: `... Correct the syntax for 
the value before casting it, or change the type to one appropriate for the 
value.`

### Why are the changes needed?
To improve user experience with Spark SQL by making error message more 
clear.

### Does this PR introduce _any_ user-facing change?
Yes, it changes user-facing error messages.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
$ build/sbt "sql/testOnly *QueryCompilationErrorsDSv2Suite"
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite"
$ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite"
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite"
$ build/sbt "sql/testOnly *QueryParsingErrorsSuite*"
```

Closes #36635 from MaxGekk/error-class-improve-msg-3.

Lead-authored-by: Max Gekk 
Co-authored-by: Maxim Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 14 ++---
 .../spark/sql/errors/QueryCompilationErrors.scala  |  4 +-
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  2 +
 .../spark/sql/errors/QueryExecutionErrors.scala|  2 +-
 .../apache/spark/sql/types/StructTypeSuite.scala   | 22 ---
 .../resources/sql-tests/results/ansi/cast.sql.out  | 68 +++---
 .../resources/sql-tests/results/ansi/date.sql.out  |  6 +-
 .../results/ansi/datetime-parsing-invalid.sql.out  |  4 +-
 .../sql-tests/results/ansi/interval.sql.out| 20 +++
 .../results/ansi/string-functions.sql.out  |  8 +--
 .../test/resources/sql-tests/results/pivot.sql.out |  2 +-
 .../sql-tests/results/postgreSQL/boolean.sql.out   | 32 +-
 .../sql-tests/results/postgreSQL/float4.sql.out|  8 +--
 .../sql-tests/results/postgreSQL/float8.sql.out|  8 +--
 .../sql-tests/results/postgreSQL/text.sql.out  |  4 +-
 .../results/postgreSQL/window_part2.sql.out|  2 +-
 .../results/postgreSQL/window_part3.sql.out|  2 +-
 .../results/postgreSQL/window_part4.sql.out|  2 +-
 .../results/timestampNTZ/timestamp-ansi.sql.out|  2 +-
 .../sql-tests/results/udf/udf-pivot.sql.out|  2 +-
 .../errors/QueryCompilationErrorsDSv2Suite.scala   |  4 +-
 .../sql/errors/QueryCompilationErrorsSuite.scala   |  5 +-
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala |  3 +-
 .../sql/errors/QueryExecutionErrorsSuite.scala |  6 +-
 24 files changed, 119 insertions(+), 113 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index eb328c6e20a..23f99524a7e 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -23,7 +23,7 @@
 "message" : [ "Cannot up cast  from  to 
.\n" ]
   },
   "CAST_INVALID_INPUT" : {
-"message" : [ "The value  of the type  cannot be cast 
to  because it is malformed. To return NULL instead, use 
`try_cast`. If necessary set  to \"false\" to bypass this error." ],
+"message" : [ "The value  of the type  cannot be cast 
to  because it is malformed. Correct the value as per the syntax, 
or change its target type. To return NULL instead, use `try_cast`. If necessary 
set  to \"false\" to bypass this error." ],
 "sqlState" : "42000"
   },
   "CAST_OVERFLOW" : {
@@ -52,9 +52,6 @@
 "message" : [ "Failed to rename  to  as 
destination already exists" ],
 "sqlState" : "22023"
   },
-  "FAILED_SET_ORIGINAL_PERMISSION_BACK" : {
-"message

[spark] branch branch-3.3 updated (459c4b0c94a -> d491e390ada)

2022-05-24 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 459c4b0c94a [SPARK-39144][SQL] Nested subquery expressions deduplicate 
relations should be done bottom up
 add a7259279d07 Preparing Spark release v3.3.0-rc3
 new d491e390ada Preparing development version 3.3.1-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT

2022-05-24 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit d491e390adaba04dc238868b7adc33251d880095
Author: Maxim Gekk 
AuthorDate: Tue May 24 10:15:35 2022 +

Preparing development version 3.3.1-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 9479bb3bf87..0e449e841cf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.0
+Version: 3.3.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e9c4d9960b..d12f2ad73fa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 2a9acfa335e..842d63f5d38 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7b17e625d75..f7d187bf952 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c5c920e7747..53f38df8851 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 697b5a3928e..845f6659407 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ad2db11370a..8e159089193 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1a7bdee70f3..1987c133285 100644

[spark] 01/01: Preparing Spark release v3.3.0-rc3

2022-05-24 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to tag v3.3.0-rc3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit a7259279d07b302a51456adb13dc1e41a6fd06ed
Author: Maxim Gekk 
AuthorDate: Tue May 24 10:15:29 2022 +

Preparing Spark release v3.3.0-rc3
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0e449e841cf..9479bb3bf87 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.1
+Version: 3.3.0
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d12f2ad73fa..2e9c4d9960b 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 842d63f5d38..2a9acfa335e 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index f7d187bf952..7b17e625d75 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 53f38df8851..c5c920e7747 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 845f6659407..697b5a3928e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 8e159089193..ad2db11370a 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1987c133285..1a7bdee70f3 100644
--- a/common/tags

[spark] tag v3.3.0-rc3 created (now a7259279d07)

2022-05-24 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to tag v3.3.0-rc3
in repository https://gitbox.apache.org/repos/asf/spark.git


  at a7259279d07 (commit)
This tag includes the following new commits:

 new a7259279d07 Preparing Spark release v3.3.0-rc3

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38687][SQL] Use error classes in the compilation errors of generators

2022-05-23 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 5d5e7f94315 [SPARK-38687][SQL] Use error classes in the compilation 
errors of generators
5d5e7f94315 is described below

commit 5d5e7f94315c233d983139fa39163a838882be89
Author: panbingkun 
AuthorDate: Mon May 23 17:35:33 2022 +0300

[SPARK-38687][SQL] Use error classes in the compilation errors of generators

## What changes were proposed in this pull request?
Migrate the following errors in QueryCompilationErrors onto use error 
classes:

- nestedGeneratorError => UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS
- moreThanOneGeneratorError => UNSUPPORTED_GENERATOR.MULTI_GENERATOR
- generatorOutsideSelectError => UNSUPPORTED_GENERATOR.OUTSIDE_SELECT
- generatorNotExpectedError => UNSUPPORTED_GENERATOR.NOT_GENERATOR

### Why are the changes needed?
Porting compilation errors of generator to new error framework, improve 
test coverage, and document expected error messages in tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36617 from panbingkun/SPARK-38687.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 17 ++
 .../spark/sql/errors/QueryCompilationErrors.scala  | 22 
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  8 ++-
 .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 23 
 .../apache/spark/sql/GeneratorFunctionSuite.scala  |  9 ++--
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 61 ++
 6 files changed, 111 insertions(+), 29 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index f6fba105872..eb328c6e20a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -295,6 +295,23 @@
 },
 "sqlState" : "0A000"
   },
+  "UNSUPPORTED_GENERATOR" : {
+"message" : [ "The generator is not supported: " ],
+"subClass" : {
+  "MULTI_GENERATOR" : {
+"message" : [ "only one generator allowed per  clause but 
found : " ]
+  },
+  "NESTED_IN_EXPRESSIONS" : {
+"message" : [ "nested in expressions " ]
+  },
+  "NOT_GENERATOR" : {
+"message" : [ " is expected to be a generator. However, its 
class is , which is not a generator." ]
+  },
+  "OUTSIDE_SELECT" : {
+"message" : [ "outside the SELECT clause, found: " ]
+  }
+}
+  },
   "UNSUPPORTED_GROUPING_EXPRESSION" : {
 "message" : [ "grouping()/grouping_id() can only be used with 
GroupingSets/Cube/Rollup" ]
   },
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 3d133d6cfab..008f13961a6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, 
Attribute, AttributeRef
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, 
LogicalPlan, SerdeInfo, Window}
 import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode}
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, FailFastMode, 
ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, 
PermissiveMode}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, 
UnboundFunction}
@@ -112,21 +112,19 @@ object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = {
-new AnalysisException(
-  "Generators are not supported when it's nested in " +
-"expressions, but got: " + toPrettySQL(trimmedNestedGenerator))
+new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
+  messageParameters = Array("NESTED_IN_EXPRESSIONS", 
toSQLExpr(trimmedNestedGenerator)))
   }
 
   def moreThanOneGeneratorError(generators: Seq[Expre

[spark] branch branch-3.3 updated: [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages

2022-05-22 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new fa400c666c4 [SPARK-39243][SQL][DOCS] Rules of quoting elements in 
error messages
fa400c666c4 is described below

commit fa400c666c41cf864103ba8705116a24092b3687
Author: Max Gekk 
AuthorDate: Sun May 22 18:58:25 2022 +0300

[SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages

### What changes were proposed in this pull request?
In the PR, I propose to describe the rules of quoting elements in error 
messages introduced by the PRs:
- https://github.com/apache/spark/pull/36210
- https://github.com/apache/spark/pull/36233
- https://github.com/apache/spark/pull/36259
- https://github.com/apache/spark/pull/36324
- https://github.com/apache/spark/pull/36335
- https://github.com/apache/spark/pull/36359
- https://github.com/apache/spark/pull/36579

### Why are the changes needed?
To improve code maintenance, and the process of code review.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By existing GAs.

Closes #36621 from MaxGekk/update-error-class-guide.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit 2a4d8a4ea709339175257027e31a75bdeed5daec)
Signed-off-by: Max Gekk 
---
 .../org/apache/spark/sql/errors/QueryErrorsBase.scala   | 17 +
 1 file changed, 17 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index 89bc1039e73..52ffa6d32fd 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -23,6 +23,23 @@ import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.types.{DataType, DoubleType, FloatType}
 
+/**
+ * The trait exposes util methods for preparing error messages such as quoting 
of error elements.
+ * All classes that extent `QueryErrorsBase` shall follow the rules:
+ * 1. Any values shall be outputted in the SQL standard style by using 
`toSQLValue()`.
+ *   For example: 'a string value', 1, NULL.
+ * 2. SQL types shall be double quoted and outputted in the upper case using 
`toSQLType()`.
+ *   For example: "INT", "DECIMAL(10,0)".
+ * 3. Elements of identifiers shall be wrapped by backticks by using 
`toSQLId()`.
+ *   For example: `namespaceA`.`funcB`, `tableC`.
+ * 4. SQL statements shall be in the upper case prepared by using `toSQLStmt`.
+ *   For example: DESC PARTITION, DROP TEMPORARY FUNCTION.
+ * 5. SQL configs and datasource options shall be wrapped by double quotes by 
using
+ *   `toSQLConf()`/`toDSOption()`.
+ *   For example: "spark.sql.ansi.enabled".
+ * 6. Any values of datasource options or SQL configs shall be double quoted.
+ *   For example: "true", "CORRECTED".
+ */
 trait QueryErrorsBase {
   // Converts an error class parameter to its SQL representation
   def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages

2022-05-22 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 2a4d8a4ea70 [SPARK-39243][SQL][DOCS] Rules of quoting elements in 
error messages
2a4d8a4ea70 is described below

commit 2a4d8a4ea709339175257027e31a75bdeed5daec
Author: Max Gekk 
AuthorDate: Sun May 22 18:58:25 2022 +0300

[SPARK-39243][SQL][DOCS] Rules of quoting elements in error messages

### What changes were proposed in this pull request?
In the PR, I propose to describe the rules of quoting elements in error 
messages introduced by the PRs:
- https://github.com/apache/spark/pull/36210
- https://github.com/apache/spark/pull/36233
- https://github.com/apache/spark/pull/36259
- https://github.com/apache/spark/pull/36324
- https://github.com/apache/spark/pull/36335
- https://github.com/apache/spark/pull/36359
- https://github.com/apache/spark/pull/36579

### Why are the changes needed?
To improve code maintenance, and the process of code review.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By existing GAs.

Closes #36621 from MaxGekk/update-error-class-guide.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../org/apache/spark/sql/errors/QueryErrorsBase.scala   | 17 +
 1 file changed, 17 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index ab1f8c57480..81c4d0ac408 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -23,6 +23,23 @@ import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.types.{DataType, DoubleType, FloatType}
 
+/**
+ * The trait exposes util methods for preparing error messages such as quoting 
of error elements.
+ * All classes that extent `QueryErrorsBase` shall follow the rules:
+ * 1. Any values shall be outputted in the SQL standard style by using 
`toSQLValue()`.
+ *   For example: 'a string value', 1, NULL.
+ * 2. SQL types shall be double quoted and outputted in the upper case using 
`toSQLType()`.
+ *   For example: "INT", "DECIMAL(10,0)".
+ * 3. Elements of identifiers shall be wrapped by backticks by using 
`toSQLId()`.
+ *   For example: `namespaceA`.`funcB`, `tableC`.
+ * 4. SQL statements shall be in the upper case prepared by using `toSQLStmt`.
+ *   For example: DESC PARTITION, DROP TEMPORARY FUNCTION.
+ * 5. SQL configs and datasource options shall be wrapped by double quotes by 
using
+ *   `toSQLConf()`/`toDSOption()`.
+ *   For example: "spark.sql.ansi.enabled".
+ * 6. Any values of datasource options or SQL configs shall be double quoted.
+ *   For example: "true", "CORRECTED".
+ */
 trait QueryErrorsBase {
   // Converts an error class parameter to its SQL representation
   def toSQLValue(v: Any, t: DataType): String = Literal.create(v, t) match {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows from a subquery used as an expression

2022-05-20 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 49562f41678 [SPARK-39167][SQL] Throw an exception w/ an error class 
for multiple rows from a subquery used as an expression
49562f41678 is described below

commit 49562f416788cab05b3f82a2471a1f2f6561a1d8
Author: panbingkun 
AuthorDate: Sat May 21 07:50:59 2022 +0300

[SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows 
from a subquery used as an expression

### What changes were proposed in this pull request?
In the PR, I propose to use the MULTI_VALUE_SUBQUERY_ERROR error classes 
for multiple rows from a subquery used as an expression.

### Why are the changes needed?
Porting the executing errors for multiple rows from a subquery used as an 
expression to the new error framework should improve user experience with Spark 
SQL.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Added new test suite

Closes #36580 from panbingkun/SPARK-39167.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   |  3 +++
 .../spark/sql/errors/QueryExecutionErrors.scala|  5 
 .../org/apache/spark/sql/execution/subquery.scala  |  5 ++--
 .../scala/org/apache/spark/sql/SubquerySuite.scala | 11 -
 .../sql/errors/QueryExecutionErrorsSuite.scala | 27 ++
 5 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 1a139c018e8..f6fba105872 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -160,6 +160,9 @@
   "MULTI_UDF_INTERFACE_ERROR" : {
 "message" : [ "Not allowed to implement multiple UDF interfaces, UDF class 
" ]
   },
+  "MULTI_VALUE_SUBQUERY_ERROR" : {
+"message" : [ "more than one row returned by a subquery used as an 
expression: " ]
+  },
   "NON_LITERAL_PIVOT_VALUES" : {
 "message" : [ "Literal expressions required for pivot values, found 
''" ],
 "sqlState" : "42000"
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1e664100545..f79b30f0d0f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2005,4 +2005,9 @@ object QueryExecutionErrors extends QueryErrorsBase {
 new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = 
Array(path),
   cause = null)
   }
+
+  def multipleRowSubqueryError(plan: String): Throwable = {
+new SparkException(
+  errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters = 
Array(plan), cause = null)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 209b0f79243..c6f5983f243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -25,6 +25,7 @@ import 
org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType}
 
@@ -79,9 +80,7 @@ case class ScalarSubquery(
   def updateResult(): Unit = {
 val rows = plan.executeCollect()
 if (rows.length > 1) {
-  // TODO(SPARK-39167): Throw an exception w/ an error class for multiple 
rows from a subquery
-  throw new IllegalStateException(
-s"more than one row returned by a subquery used as an 
expression:\n$plan")
+  throw QueryExecutionErrors.multipleRowSubqueryError(plan.toString)
 }
 if (rows.length == 1) {
   assert(rows(0).numFields == 1,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 396fca47634..500913fb289 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
 
 import sc

[spark] branch master updated: [SPARK-39213][SQL] Create ANY_VALUE aggregate function

2022-05-20 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new efc1e8ac8bc [SPARK-39213][SQL] Create ANY_VALUE aggregate function
efc1e8ac8bc is described below

commit efc1e8ac8bc61872601ac2244629a9d54f8889fb
Author: Vitalii Li 
AuthorDate: Fri May 20 22:28:18 2022 +0300

[SPARK-39213][SQL] Create ANY_VALUE aggregate function

### What changes were proposed in this pull request?

Adding implementation for ANY_VALUE aggregate function. During optimization 
stage it is rewritten to `First` aggregate function.

### Why are the changes needed?

This feature provides feature parity with popular DBs and DWHs

### Does this PR introduce _any_ user-facing change?

Yes - introducing new aggregate function `ANY_VALUE`. Respective 
documentation is updated.

### How was this patch tested?

Unit tests

Closes #36584 from vli-databricks/SPARK-39213.

Authored-by: Vitalii Li 
Signed-off-by: Max Gekk 
---
 docs/sql-ref-ansi-compliance.md|   1 +
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4  |   1 +
 .../spark/sql/catalyst/parser/SqlBaseParser.g4 |   3 +
 .../spark/sql/catalyst/analysis/Analyzer.scala |   1 +
 .../sql/catalyst/analysis/FunctionRegistry.scala   |   1 +
 .../catalyst/expressions/aggregate/AnyValue.scala  |  64 +++
 .../spark/sql/catalyst/parser/AstBuilder.scala |  10 +-
 .../spark/sql/catalyst/SQLKeywordSuite.scala   |   2 +-
 .../expressions/aggregate/FirstLastTestSuite.scala |   4 +
 .../sql-functions/sql-expression-schema.md |   1 +
 .../resources/sql-tests/inputs/udf/udf-window.sql  |   8 +-
 .../src/test/resources/sql-tests/inputs/window.sql |  29 +-
 .../sql-tests/results/udf/udf-window.sql.out   |  46 +-
 .../resources/sql-tests/results/window.sql.out | 574 +++--
 14 files changed, 446 insertions(+), 299 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index 257f53caef1..bb55cec52f5 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -346,6 +346,7 @@ Below is a list of all the keywords in Spark SQL.
 |AND|reserved|non-reserved|reserved|
 |ANTI|non-reserved|strict-non-reserved|non-reserved|
 |ANY|reserved|non-reserved|reserved|
+|ANY_VALUE|non-reserved|non-reserved|non-reserved|
 |ARCHIVE|non-reserved|non-reserved|non-reserved|
 |ARRAY|non-reserved|non-reserved|reserved|
 |AS|reserved|non-reserved|reserved|
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index fac87c62de0..1cbd6d24dea 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -95,6 +95,7 @@ ANALYZE: 'ANALYZE';
 AND: 'AND';
 ANTI: 'ANTI';
 ANY: 'ANY';
+ANY_VALUE: 'ANY_VALUE';
 ARCHIVE: 'ARCHIVE';
 ARRAY: 'ARRAY';
 AS: 'AS';
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index ed57e9062c1..ce37a09d5ba 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -824,6 +824,7 @@ primaryExpression
 | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN 
#cast
 | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA 
argument+=namedExpression)*)? RIGHT_PAREN #struct
 | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN  
#first
+| ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN  
#any_value
 | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN   
#last
 | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression 
RIGHT_PAREN#position
 | constant 
#constantDefault
@@ -1072,6 +1073,7 @@ ansiNonReserved
 | ALTER
 | ANALYZE
 | ANTI
+| ANY_VALUE
 | ARCHIVE
 | ARRAY
 | ASC
@@ -1314,6 +1316,7 @@ nonReserved
 | ANALYZE
 | AND
 | ANY
+| ANY_VALUE
 | ARCHIVE
 | ARRAY
 | AS
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 4dd2081c67f..c5bee6f55fe 100644
--- 
a/sql/cata

[spark] branch master updated: [SPARK-39163][SQL] Throw an exception w/ error class for an invalid bucket file

2022-05-19 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new c6dccc7dd41 [SPARK-39163][SQL] Throw an exception w/ error class for 
an invalid bucket file
c6dccc7dd41 is described below

commit c6dccc7dd412a95007f5bb2584d69b85ff9ebf8e
Author: panbingkun 
AuthorDate: Thu May 19 20:39:35 2022 +0300

[SPARK-39163][SQL] Throw an exception w/ error class for an invalid bucket 
file

### What changes were proposed in this pull request?
In the PR, I propose to use the INVALID_BUCKET_FILE error classes for an 
invalid bucket file.

### Why are the changes needed?
Porting the executing errors for multiple rows from a subquery used as an 
expression to the new error framework should improve user experience with Spark 
SQL.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
UT

Closes #36603 from panbingkun/SPARK-39163.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   |  3 +++
 .../spark/sql/errors/QueryExecutionErrors.scala|  5 
 .../spark/sql/execution/DataSourceScanExec.scala   |  4 ++--
 .../sql/errors/QueryExecutionErrorsSuite.scala | 28 --
 .../adaptive/AdaptiveQueryExecSuite.scala  |  6 ++---
 .../spark/sql/sources/BucketedReadSuite.scala  | 23 --
 6 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index e4ee09ea8a7..1a139c018e8 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -115,6 +115,9 @@
   "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : {
 "message" : [ "The index  is out of bounds. The array has 
 elements. To return NULL instead, use `try_element_at`. If 
necessary set  to \"false\" to bypass this error." ]
   },
+  "INVALID_BUCKET_FILE" : {
+"message" : [ "Invalid bucket file: " ]
+  },
   "INVALID_FIELD_NAME" : {
 "message" : [ "Field name  is invalid:  is not a struct." 
],
 "sqlState" : "42000"
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index a155b0694b5..1e664100545 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2000,4 +2000,9 @@ object QueryExecutionErrors extends QueryErrorsBase {
 s"add ${toSQLValue(amount, IntegerType)} $unit to " +
 s"${toSQLValue(DateTimeUtils.microsToInstant(micros), 
TimestampType)}"))
   }
+
+  def invalidBucketFile(path: String): Throwable = {
+new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = 
Array(path),
+  cause = null)
+  }
 }
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index f7b627cef08..f5d349d975f 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, 
Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.{truncatedString, CaseInsensitiveMap}
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat 
=> ParquetSource}
 import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators
@@ -618,8 +619,7 @@ case class FileSourceScanExec(
   }.groupBy { f =>
 BucketingUtils
   .getBucketId(new Path(f.filePath).getName)
-  // TODO(SPARK-39163): Throw an exception w/ error class for an 
invalid bucket file
-  .getOrElse(throw new IllegalStateException(s"Invalid bucket file 
${f.filePath}"))
+  .getOrElse(throw QueryExecutionErrors.invalidBucketFile(f.filePath))
   }
 
 val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
inde

[spark] branch master updated: [SPARK-39234][SQL] Code clean up in SparkThrowableHelper.getMessage

2022-05-19 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new ebd916b0054 [SPARK-39234][SQL] Code clean up in 
SparkThrowableHelper.getMessage
ebd916b0054 is described below

commit ebd916b005499c724bbec54b3df85cd28a864e03
Author: Gengliang Wang 
AuthorDate: Thu May 19 19:13:15 2022 +0300

[SPARK-39234][SQL] Code clean up in SparkThrowableHelper.getMessage

### What changes were proposed in this pull request?

1. Remove the starting "\n" in `Origin.context`. The "\n" will be append in 
the method `SparkThrowableHelper.getMessage` instead.
2. Code clean up the method SparkThrowableHelper.getMessage to eliminate 
redundant code.

### Why are the changes needed?

Code clean up to eliminate redundant code.
### Does this PR introduce _any_ user-facing change?

No
### How was this patch tested?

Existing UT

Closes #36612 from gengliangwang/moveNewLine.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
---
 .../src/main/scala/org/apache/spark/ErrorInfo.scala | 21 +
 .../apache/spark/sql/catalyst/trees/TreeNode.scala  |  2 +-
 .../spark/sql/catalyst/trees/TreeNodeSuite.scala|  3 +--
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala 
b/core/src/main/scala/org/apache/spark/ErrorInfo.scala
index e11e6485851..4639e56aa50 100644
--- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala
@@ -77,20 +77,25 @@ private[spark] object SparkThrowableHelper {
   queryContext: String = ""): String = {
 val errorInfo = errorClassToInfoMap.getOrElse(errorClass,
   throw new IllegalArgumentException(s"Cannot find error class 
'$errorClass'"))
-if (errorInfo.subClass.isDefined) {
+val (displayClass, displayMessageParameters, displayFormat) = if 
(errorInfo.subClass.isEmpty) {
+  (errorClass, messageParameters, errorInfo.messageFormat)
+} else {
   val subClass = errorInfo.subClass.get
   val subErrorClass = messageParameters.head
   val errorSubInfo = subClass.getOrElse(subErrorClass,
 throw new IllegalArgumentException(s"Cannot find sub error class 
'$subErrorClass'"))
-  val subMessageParameters = messageParameters.tail
-  "[" + errorClass + "." + subErrorClass + "] " + 
String.format((errorInfo.messageFormat +
-errorSubInfo.messageFormat).replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
-subMessageParameters: _*) + queryContext
+  (errorClass + "." + subErrorClass, messageParameters.tail,
+errorInfo.messageFormat + errorSubInfo.messageFormat)
+}
+val displayMessage = String.format(
+  displayFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
+  displayMessageParameters : _*)
+val displayQueryContext = if (queryContext.isEmpty) {
+  ""
 } else {
-  "[" + errorClass + "] " + String.format(
-errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
-messageParameters: _*) + queryContext
+  s"\n$queryContext"
 }
+s"[$displayClass] $displayMessage$displayQueryContext"
   }
 
   def getSqlState(errorClass: String): String = {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 0714898e19d..54c64515ee4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -89,7 +89,7 @@ case class Origin(
 ""
   }
   val builder = new StringBuilder
-  builder ++= s"\n== SQL$objectContext$positionContext ==\n"
+  builder ++= s"== SQL$objectContext$positionContext ==\n"
 
   val text = sqlText.get
   val start = math.max(startIndex.get, 0)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index ffbc5d89bdb..899a740bdae 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -876,8 +876,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
   objectType = Some("VIEW"),
   objectName = Some("some_view&

[spark] branch master updated: [SPARK-37939][SQL] Use error classes in the parsing errors of properties

2022-05-19 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 7309e76d8b9 [SPARK-37939][SQL] Use error classes in the parsing errors 
of properties
7309e76d8b9 is described below

commit 7309e76d8b95e306d6f3d2f611316b748949e9cf
Author: panbingkun 
AuthorDate: Thu May 19 11:29:37 2022 +0300

[SPARK-37939][SQL] Use error classes in the parsing errors of properties

## What changes were proposed in this pull request?
Migrate the following errors in QueryParsingErrors onto use error classes:

- cannotCleanReservedNamespacePropertyError =>
UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY
- cannotCleanReservedTablePropertyError => 
UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY
- invalidPropertyKeyForSetQuotedConfigurationError => INVALID_PROPERTY_KEY
- invalidPropertyValueForSetQuotedConfigurationError => 
INVALID_PROPERTY_VALUE
- propertiesAndDbPropertiesBothSpecifiedError => 
UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES

### Why are the changes needed?
Porting parsing errors of partitions to new error framework, improve test 
coverage, and document expected error messages in tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryParsingErrorsSuite*"
```

Closes #36561 from panbingkun/SPARK-37939.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 15 
 .../spark/sql/errors/QueryParsingErrors.scala  | 28 +--
 .../spark/sql/errors/QueryParsingErrorsSuite.scala | 88 ++
 .../spark/sql/execution/SparkSqlParserSuite.scala  |  6 +-
 .../command/CreateNamespaceParserSuite.scala   |  3 +-
 5 files changed, 129 insertions(+), 11 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 21fde82adbb..e4ee09ea8a7 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -133,6 +133,12 @@
 "message" : [ "The value of parameter(s) '' in  
is invalid: " ],
 "sqlState" : "22023"
   },
+  "INVALID_PROPERTY_KEY" : {
+"message" : [ " is an invalid property key, please use quotes, e.g. 
SET =" ]
+  },
+  "INVALID_PROPERTY_VALUE" : {
+"message" : [ " is an invalid property value, please use quotes, 
e.g. SET =" ]
+  },
   "INVALID_SQL_SYNTAX" : {
 "message" : [ "Invalid SQL syntax: " ],
 "sqlState" : "42000"
@@ -262,6 +268,15 @@
   "REPEATED_PIVOT" : {
 "message" : [ "Repeated PIVOT operation." ]
   },
+  "SET_NAMESPACE_PROPERTY" : {
+"message" : [ " is a reserved namespace property, ." ]
+  },
+  "SET_PROPERTIES_AND_DBPROPERTIES" : {
+"message" : [ "set PROPERTIES and DBPROPERTIES at the same time." ]
+  },
+  "SET_TABLE_PROPERTY" : {
+"message" : [ " is a reserved table property, ." ]
+  },
   "TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS" : {
 "message" : [ "UDF class with  type arguments." ]
   },
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index debfe1b0891..8fa28c0d347 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -267,16 +267,26 @@ object QueryParsingErrors extends QueryErrorsBase {
 
   def cannotCleanReservedNamespacePropertyError(
   property: String, ctx: ParserRuleContext, msg: String): Throwable = {
-new ParseException(s"$property is a reserved namespace property, $msg.", 
ctx)
+new ParseException(
+  errorClass = "UNSUPPORTED_FEATURE",
+  messageParameters = Array("SET_NAMESPACE_PROPERTY", property, msg),
+  ctx)
   }
 
   def propertiesAndDbPropertiesBothSpecifiedError(ctx: 
CreateNamespaceContext): Throwable = {
-new ParseException("Either PROPERTIES or DBPROPERTIES is allowed.", ctx)
+new ParseException(
+  errorClass = "UNSUPPORTED_FEATURE",
+  messageParameters = Array("SET_PROPERTIES_AND_DBPROPERTIES"),
+  ctx
+)
   }
 
   def cannotCleanReservedTablePropertyError(
   property: String, ctx:

[spark] branch master updated: [SPARK-39229][SQL] Separate query contexts from error-classes.json

2022-05-19 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 3c74aed2cbd [SPARK-39229][SQL] Separate query contexts from 
error-classes.json
3c74aed2cbd is described below

commit 3c74aed2cbde2968fab93b2799a56d075420e7d3
Author: Gengliang Wang 
AuthorDate: Thu May 19 11:00:16 2022 +0300

[SPARK-39229][SQL] Separate query contexts from error-classes.json

### What changes were proposed in this pull request?

Separate query contexts for runtime errors from error-classes.json.

### Why are the changes needed?

The message is JSON should only contain parameters explicitly thrown. It is 
more elegant to separate query contexts from error-classes.json.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

Existing UT

Closes #36604 from gengliangwang/refactorErrorClass.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
---
 .../apache/spark/memory/SparkOutOfMemoryError.java |  2 +-
 core/src/main/resources/error/error-classes.json   | 10 +++
 .../main/scala/org/apache/spark/ErrorInfo.scala|  9 --
 .../scala/org/apache/spark/SparkException.scala| 34 +++---
 .../org/apache/spark/SparkThrowableSuite.scala |  2 +-
 .../spark/sql/errors/QueryExecutionErrors.scala| 29 +-
 6 files changed, 53 insertions(+), 33 deletions(-)

diff --git 
a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java 
b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
index 22dfe4d4dbe..c5f19a0c201 100644
--- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
+++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
@@ -39,7 +39,7 @@ public final class SparkOutOfMemoryError extends 
OutOfMemoryError implements Spa
 }
 
 public SparkOutOfMemoryError(String errorClass, String[] 
messageParameters) {
-super(SparkThrowableHelper.getMessage(errorClass, messageParameters));
+super(SparkThrowableHelper.getMessage(errorClass, messageParameters, 
""));
 this.errorClass = errorClass;
 this.messageParameters = messageParameters;
 }
diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index f4eadd4a368..21fde82adbb 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -4,7 +4,7 @@
 "sqlState" : "42000"
   },
   "ARITHMETIC_OVERFLOW" : {
-"message" : [ ". If necessary set  to 
\"false\" (except for ANSI interval type) to bypass this error." ],
+"message" : [ ". If necessary set  to 
\"false\" (except for ANSI interval type) to bypass this error." ],
 "sqlState" : "22003"
   },
   "CANNOT_CAST_DATATYPE" : {
@@ -12,7 +12,7 @@
 "sqlState" : "22005"
   },
   "CANNOT_CHANGE_DECIMAL_PRECISION" : {
-"message" : [ " cannot be represented as Decimal(, 
). If necessary set  to \"false\" to bypass this 
error." ],
+"message" : [ " cannot be represented as Decimal(, 
). If necessary set  to \"false\" to bypass this error." ],
 "sqlState" : "22005"
   },
   "CANNOT_PARSE_DECIMAL" : {
@@ -23,7 +23,7 @@
 "message" : [ "Cannot up cast  from  to 
.\n" ]
   },
   "CAST_INVALID_INPUT" : {
-"message" : [ "The value  of the type  cannot be cast 
to  because it is malformed. To return NULL instead, use 
`try_cast`. If necessary set  to \"false\" to bypass this 
error." ],
+"message" : [ "The value  of the type  cannot be cast 
to  because it is malformed. To return NULL instead, use 
`try_cast`. If necessary set  to \"false\" to bypass this error." ],
 "sqlState" : "42000"
   },
   "CAST_OVERFLOW" : {
@@ -38,7 +38,7 @@
 "sqlState" : "22008"
   },
   "DIVIDE_BY_ZERO" : {
-"message" : [ "Division by zero. To return NULL instead, use `try_divide`. 
If necessary set  to \"false\" (except for ANSI interval type) to 
bypass this error." ],
+"message" : [ "Division by zero. To return NULL instead, use `try_divide`. 
If necessary set  to \"false\" (except for ANSI interval type) to 
bypass this error." ],
 "sqlState" : "22012"
   },
   "DUPLICATE_KEY" : {
@@ -138,7 +138,7 @@
 "sqlState" : "42000"
   },
   &

[spark] branch branch-3.3 updated (b5ce32f41f9 -> 47c47b6e864)

2022-05-18 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from b5ce32f41f9 [SPARK-39162][SQL][3.3] Jdbc dialect should decide which 
function could be pushed down
 add 47c47b6e864 [SPARK-39214][SQL][3.3] Improve errors related to CAST

No new revisions were added by this update.

Summary of changes:
 core/src/main/resources/error/error-classes.json   |  12 +--
 .../spark/sql/catalyst/expressions/Cast.scala  |   8 +-
 .../spark/sql/catalyst/util/DateTimeUtils.scala|   8 +-
 .../spark/sql/catalyst/util/UTF8StringUtils.scala  |   2 +-
 .../spark/sql/errors/QueryExecutionErrors.scala|  62 
 .../scala/org/apache/spark/sql/types/Decimal.scala |   2 +-
 .../catalyst/expressions/AnsiCastSuiteBase.scala   | 104 +
 .../spark/sql/catalyst/expressions/CastSuite.scala |  70 +++---
 .../sql/catalyst/util/DateFormatterSuite.scala |   2 +-
 .../catalyst/util/TimestampFormatterSuite.scala|   3 +-
 .../org/apache/spark/sql/types/DecimalSuite.scala  |   4 +-
 .../resources/sql-tests/results/ansi/cast.sql.out  |  82 
 .../resources/sql-tests/results/ansi/date.sql.out  |   8 +-
 .../results/ansi/datetime-parsing-invalid.sql.out  |   8 +-
 .../sql-tests/results/ansi/interval.sql.out|  28 +++---
 .../results/ansi/string-functions.sql.out  |   8 +-
 .../sql-tests/results/postgreSQL/boolean.sql.out   |  62 ++--
 .../sql-tests/results/postgreSQL/float4.sql.out|  14 +--
 .../sql-tests/results/postgreSQL/float8.sql.out|  10 +-
 .../sql-tests/results/postgreSQL/int8.sql.out  |   8 +-
 .../sql-tests/results/postgreSQL/text.sql.out  |   4 +-
 .../results/postgreSQL/window_part2.sql.out|   2 +-
 .../results/postgreSQL/window_part3.sql.out|   2 +-
 .../results/postgreSQL/window_part4.sql.out|   2 +-
 .../results/timestampNTZ/timestamp-ansi.sql.out|   4 +-
 .../org/apache/spark/sql/SQLInsertTestSuite.scala  |   3 +-
 .../org/apache/spark/sql/sources/InsertSuite.scala |  12 ++-
 27 files changed, 294 insertions(+), 240 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (30cf796bdb0 -> 96f4b7dbc1f)

2022-05-18 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 30cf796bdb0 [BUILD] When building spark project, remove 
spark-tags-tests.jar from…
 add 96f4b7dbc1f [SPARK-39212][SQL] Use double quotes for values of SQL 
configs/DS options in error messages

No new revisions were added by this update.

Summary of changes:
 core/src/main/resources/error/error-classes.json   | 30 +-
 .../org/apache/spark/SparkThrowableSuite.scala |  2 +-
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  4 ++
 .../spark/sql/errors/QueryExecutionErrors.scala| 12 ++--
 .../resources/sql-tests/results/ansi/array.sql.out | 24 
 .../resources/sql-tests/results/ansi/cast.sql.out  | 70 +++---
 .../resources/sql-tests/results/ansi/date.sql.out  | 12 ++--
 .../results/ansi/datetime-parsing-invalid.sql.out  | 20 +++
 .../ansi/decimalArithmeticOperations.sql.out   |  8 +--
 .../sql-tests/results/ansi/interval.sql.out| 40 ++---
 .../resources/sql-tests/results/ansi/map.sql.out   |  8 +--
 .../results/ansi/string-functions.sql.out  |  8 +--
 .../sql-tests/results/ansi/timestamp.sql.out   | 14 ++---
 .../test/resources/sql-tests/results/date.sql.out  |  6 +-
 .../results/datetime-formatting-invalid.sql.out| 44 +++---
 .../results/datetime-parsing-invalid.sql.out   | 16 ++---
 .../resources/sql-tests/results/interval.sql.out   | 18 +++---
 .../sql-tests/results/json-functions.sql.out   |  4 +-
 .../sql-tests/results/postgreSQL/boolean.sql.out   | 32 +-
 .../sql-tests/results/postgreSQL/float4.sql.out| 14 ++---
 .../sql-tests/results/postgreSQL/float8.sql.out| 10 ++--
 .../sql-tests/results/postgreSQL/int4.sql.out  | 12 ++--
 .../sql-tests/results/postgreSQL/int8.sql.out  | 22 +++
 .../results/postgreSQL/select_having.sql.out   |  2 +-
 .../sql-tests/results/postgreSQL/text.sql.out  |  4 +-
 .../results/postgreSQL/window_part2.sql.out|  6 +-
 .../results/postgreSQL/window_part3.sql.out|  2 +-
 .../results/postgreSQL/window_part4.sql.out|  2 +-
 .../resources/sql-tests/results/timestamp.sql.out  | 12 ++--
 .../results/timestampNTZ/timestamp-ansi.sql.out|  6 +-
 .../results/timestampNTZ/timestamp.sql.out |  2 +-
 .../native/stringCastAndExpressions.sql.out|  6 +-
 .../udf/postgreSQL/udf-select_having.sql.out   |  2 +-
 .../sql/errors/QueryCompilationErrorsSuite.scala   |  2 +-
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala | 18 +++---
 .../sql/errors/QueryExecutionErrorsSuite.scala | 12 ++--
 36 files changed, 257 insertions(+), 249 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54558 - in /dev/spark/v3.3.0-rc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-05-16 Thread maxgekk

Author: maxgekk
Date: Mon May 16 09:33:34 2022
New Revision: 54558

Log:
Apache Spark v3.3.0-rc2 docs


[This commit notification would consist of 2650 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54555 - /dev/spark/v3.3.0-rc2-bin/

2022-05-16 Thread maxgekk

Author: maxgekk
Date: Mon May 16 08:58:23 2022
New Revision: 54555

Log:
Apache Spark v3.3.0-rc2

Added:
dev/spark/v3.3.0-rc2-bin/
dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc2-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc2-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.asc Mon May 16 08:58:23 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKCEcwTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0/ZYEACat+N6zgwj76NfgHJJcbtBc5mhIw7y
+G1UOmOTZAkKp1Q0J9pXCmhRdC5jhJnIDCXhFEvLAUDTS8HpmnTyFAs2kCSxZazn7
+AIkqWoX3VYAYa3OKcvnKjosOwuI5FqI9RDmKCi9Al53eSfY5W7D/sgAdKtfRFPom
+F3F/piqTr1z0OdaWvcsNh4VCO/gFNw8SrA9npNxtMoRsRgEe6PaOEruHGDjzVBBD
+nYxC+9NbIH24y+hfVR1aP4o7uv3n+th7s+kHnfNXcORz1bf6udFB2iNlgavQRbsl
+mehJcLcYCcEaWa3QVChK6fFUyiowFtswKqJEj/vP4SOf7uRICEOI5eA8SrNCG1FT
+4ftApO/yqQeeRCMfZziflWEQJN0ZQSkAs0MAADkxeOTaQqYwXIBAT9Vl90Kjj2mE
+sux18CTfj505k3DzN4T60DSA4bcUpaaRWgH+CatRskwomHzfVvB3EsM+Os2Kcl4O
+WXrv90VCyIDIZWqb6UKkIEqqvTsx5TVec7jJk63vp0TX+toszfap49Gim3HDpUIw
+v9q8EiYxJD1MPgM5WLW8RNX9gD7sDh46DxflZFovtyWq3j96k++Nku3ehE3tq/Md
+pPo+9qBgOkZFtyV4U6FXrnwygoH36YoFHgBcRSLr2J6v3mcHOBSsYuHQkGhEIk7B
+8YdA3Sa7fiDvTA==
+=t6GN
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc2-bin/SparkR_3.3.0.tar.gz.sha512 Mon May 16 08:58:23 2022
@@ -0,0 +1,3 @@
+SparkR_3.3.0.tar.gz: EFB28305 D8B97FB0 50F0EF30 680C259D AA5039E1 99F1EC09
+ 08FAC78A B5E41D91 C8621784 CEB78BC2 BCDCAE25 915BAB0C
+ B8E1AF82 05007C36 997F48F0 FD933E4C

Added: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc2-bin/pyspark-3.3.0.tar.gz.asc Mon May 16 08:58:23 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmKCEc4THG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0336D/9FbAXNRXFgl/I9YoHFS5Ci4fvZJN9F
+TR2UGv6zX6T5oEEZfAwYHOvf/4hp7Ob4Oy8Yyor5DjTJGizkpGCa9hLJ+PbOpKBK
+HuVeLjG/gS79euEeMYkHmyDWEnvwB96dH1FfbM/H/9bejBwNBaYxsW0G3TqJRSmC
+oyka0xgAK4e2CDPB9Ks/j59qn0NobyhtLeJCdgXDW/TX/yPWs0NO4zKpmWXiozct
+3Yb6OTa5TOPUNjehpYQxh5yOgzLRsNgNQYindil48nQO9cK0t0L6v7Rhs8YN3LAC
+oqdWkU97eQLm0e/L7QuThH6oSUKZg65PfaRN7Z2P8isoo+pZxXfmnSkn81VmiFUz
+y2e9Goe03k15IOEi3PWmh/ypBotgNAz4eKGHUFrbWb4VszH5uaf8HapvYihfMw/0
+HFPFqtuvxDmq6ySppAAfZ+cEnQ13+2OaTZkS9m0LxQOWtOTkZQMedeoAIxNnglXR
+gir73fiN4KQ/QmaM/TLiFGjEgtRwFKjaCvrL6H9Ocb0/ijsi2paVn+AlGdsSoc39
+7ujWAi2STLe5By9+GObliZhkWzxoiQPY06xGuzLhXrSaa1PLL/oeqaB15+hh02jE
+gmyIqgHyqwDPHHL3kis4qQ4ylpMSCVTI4OcScVOOzg3/YD69rQV45SJv2+/9RyJt
+mmh9rbTmwSSEPQ

[spark] branch branch-3.3 updated: [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException`

2022-05-16 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new 1853eb117e2 [SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException`
1853eb117e2 is described below

commit 1853eb117e24bcc0509d275c4caca6c033bf0ab9
Author: Max Gekk 
AuthorDate: Mon May 16 11:39:37 2022 +0300

[SPARK-39187][SQL][3.3] Remove `SparkIllegalStateException`

### What changes were proposed in this pull request?
Remove `SparkIllegalStateException` and replace it by 
`IllegalStateException` where it was used.

This is a backport of https://github.com/apache/spark/pull/36550.

### Why are the changes needed?
To improve code maintenance and be consistent to other places where 
`IllegalStateException` is used in illegal states (for instance, see 
https://github.com/apache/spark/pull/36524). After the PR 
https://github.com/apache/spark/pull/36500, the exception is substituted by 
`SparkException` w/ the `INTERNAL_ERROR` error class.

### Does this PR introduce _any_ user-facing change?
No. Users shouldn't face to the exception in regular cases.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "sql/test:testOnly *QueryExecutionErrorsSuite*"
$ build/sbt "test:testOnly *ArrowUtilsSuite"
```

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit 1a90512f605c490255f7b38215c207e64621475b)
Signed-off-by: Max Gekk 
    
Closes #36558 from MaxGekk/remove-SparkIllegalStateException-3.3.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/scala/org/apache/spark/SparkException.scala  | 12 
 .../apache/spark/sql/catalyst/analysis/CheckAnalysis.scala |  6 +++---
 .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 11 +++
 .../main/scala/org/apache/spark/sql/util/ArrowUtils.scala  |  9 +++--
 .../scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala  |  4 ++--
 .../spark/sql/errors/QueryExecutionErrorsSuite.scala   | 14 --
 6 files changed, 11 insertions(+), 45 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala 
b/core/src/main/scala/org/apache/spark/SparkException.scala
index 8442c8eb8d3..ed6e811a4cc 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -158,18 +158,6 @@ private[spark] class SparkFileAlreadyExistsException(
   override def getErrorClass: String = errorClass
 }
 
-/**
- * Illegal state exception thrown from Spark with an error class.
- */
-private[spark] class SparkIllegalStateException(
-errorClass: String,
-messageParameters: Array[String])
-  extends IllegalStateException(
-SparkThrowableHelper.getMessage(errorClass, messageParameters)) with 
SparkThrowable {
-
-  override def getErrorClass: String = errorClass
-}
-
 /**
  * File not found exception thrown from Spark with an error class.
  */
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index ff40272682e..f89fbe59af6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, 
TypeUtils}
 import org.apache.spark.sql.connector.catalog.{LookupCatalog, 
SupportsPartitionManagement}
-import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -571,8 +571,8 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog {
  |in operator 
${operator.simpleString(SQLConf.get.maxToStringFields)}
""".stripMargin)
 
-  case _: UnresolvedHint =>
-throw 
QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError
+  case _: UnresolvedHint => throw new IllegalStateException(
+"Logical hint operator should be removed during analysis.")
 
   case f @ Filter(condition, _)
 if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty 
=>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/error

[spark] branch branch-3.3 updated (386c75693b5 -> af38fce62da)

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 386c75693b5 [SPARK-39186][PYTHON] Make pandas-on-Spark's skew 
consistent with pandas
 add c8c657b922a Preparing Spark release v3.3.0-rc2
 new af38fce62da Preparing development version 3.3.1-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit af38fce62da393ff0b56662be050b46de115a89f
Author: Maxim Gekk 
AuthorDate: Mon May 16 05:42:35 2022 +

Preparing development version 3.3.1-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 9479bb3bf87..0e449e841cf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.0
+Version: 3.3.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e9c4d9960b..d12f2ad73fa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 2a9acfa335e..842d63f5d38 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7b17e625d75..f7d187bf952 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c5c920e7747..53f38df8851 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 697b5a3928e..845f6659407 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ad2db11370a..8e159089193 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1a7bdee70f3..1987c133285 100644

[spark] tag v3.3.0-rc2 created (now c8c657b922a)

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to tag v3.3.0-rc2
in repository https://gitbox.apache.org/repos/asf/spark.git


  at c8c657b922a (commit)
This tag includes the following new commits:

 new c8c657b922a Preparing Spark release v3.3.0-rc2

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing Spark release v3.3.0-rc2

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to tag v3.3.0-rc2
in repository https://gitbox.apache.org/repos/asf/spark.git

commit c8c657b922ac8fd8dcf9553113e11a80079db059
Author: Maxim Gekk 
AuthorDate: Mon May 16 05:42:28 2022 +

Preparing Spark release v3.3.0-rc2
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0e449e841cf..9479bb3bf87 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.1
+Version: 3.3.0
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index d12f2ad73fa..2e9c4d9960b 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 842d63f5d38..2a9acfa335e 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index f7d187bf952..7b17e625d75 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 53f38df8851..c5c920e7747 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 845f6659407..697b5a3928e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 8e159089193..ad2db11370a 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.1-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1987c133285..1a7bdee70f3 100644
--- a/common/tags

[spark] branch master updated: [SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of deserializer

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 66c6e19aad1 [SPARK-38688][SQL][TESTS] Use error classes in the 
compilation errors of deserializer
66c6e19aad1 is described below

commit 66c6e19aad1e42d404b70b7dcddf871f28c3774f
Author: panbingkun 
AuthorDate: Mon May 16 08:31:16 2022 +0300

[SPARK-38688][SQL][TESTS] Use error classes in the compilation errors of 
deserializer

### What changes were proposed in this pull request?
Migrate the following errors in QueryCompilationErrors:

* dataTypeMismatchForDeserializerError -> 
UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH
* fieldNumberMismatchForDeserializerError -> 
UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH

### Why are the changes needed?
Porting compilation errors of unsupported deserializer to new error 
framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new UT.

Closes #36479 from panbingkun/SPARK-38688.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 11 ++
 .../spark/sql/errors/QueryCompilationErrors.scala  |  9 +++--
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  4 +++
 .../catalyst/encoders/EncoderResolutionSuite.scala | 26 --
 .../scala/org/apache/spark/sql/DatasetSuite.scala  | 18 --
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 40 +-
 6 files changed, 76 insertions(+), 32 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 3a7bc757f73..f401ea8d29a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -200,6 +200,17 @@
 "message" : [ "Unsupported data type " ],
 "sqlState" : "0A000"
   },
+  "UNSUPPORTED_DESERIALIZER" : {
+"message" : [ "The deserializer is not supported: " ],
+"subClass" : {
+  "DATA_TYPE_MISMATCH" : {
+"message" : [ "need   field but got 
." ]
+  },
+  "FIELD_NUMBER_MISMATCH" : {
+"message" : [ "try to map  to Tuple, but failed as 
the number of fields does not line up." ]
+  }
+}
+  },
   "UNSUPPORTED_FEATURE" : {
 "message" : [ "The feature is not supported: " ],
 "subClass" : {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index efb4389ec50..d803cd23df6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -147,14 +147,17 @@ object QueryCompilationErrors extends QueryErrorsBase {
   dataType: DataType, desiredType: String): Throwable = {
 val quantifier = if (desiredType.equals("array")) "an" else "a"
 new AnalysisException(
-  s"need $quantifier $desiredType field but got " + dataType.catalogString)
+  errorClass = "UNSUPPORTED_DESERIALIZER",
+  messageParameters =
+Array("DATA_TYPE_MISMATCH", quantifier, toSQLType(desiredType), 
toSQLType(dataType)))
   }
 
   def fieldNumberMismatchForDeserializerError(
   schema: StructType, maxOrdinal: Int): Throwable = {
 new AnalysisException(
-  s"Try to map ${schema.catalogString} to Tuple${maxOrdinal + 1}, " +
-"but failed as the number of fields does not line up.")
+  errorClass = "UNSUPPORTED_DESERIALIZER",
+  messageParameters =
+Array("FIELD_NUMBER_MISMATCH", toSQLType(schema), (maxOrdinal + 
1).toString))
   }
 
   def upCastFailureError(
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index d51ee13acef..b47b9f12fb1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -60,6 +60,10 @@ trait QueryErrorsBase {
 quoteByDefault(t.sql)
   }
 
+  def toSQLType(text: String): String = {
+quoteByDefault(text.toUpperCase(Locale.ROOT))
+  }
+
   def toSQLConf(conf: String): String = {
 quoteByDefault(conf)
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
 
b/sql/catalyst/sr

[spark] branch master updated: [SPARK-39187][SQL] Remove `SparkIllegalStateException`

2022-05-15 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 1a90512f605 [SPARK-39187][SQL] Remove `SparkIllegalStateException`
1a90512f605 is described below

commit 1a90512f605c490255f7b38215c207e64621475b
Author: Max Gekk 
AuthorDate: Mon May 16 08:24:12 2022 +0300

[SPARK-39187][SQL] Remove `SparkIllegalStateException`

### What changes were proposed in this pull request?
Remove `SparkIllegalStateException` and replace it by 
`IllegalStateException` where it was used.

### Why are the changes needed?
To improve code maintenance and be consistent to other places where 
`IllegalStateException` is used in illegal states (for instance, see 
https://github.com/apache/spark/pull/36524). After the PR 
https://github.com/apache/spark/pull/36500, the exception is substituted by 
`SparkException` w/ the `INTERNAL_ERROR` error class.

### Does this PR introduce _any_ user-facing change?
No. Users shouldn't face to the exception in regular cases.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "sql/test:testOnly *QueryExecutionErrorsSuite*"
$ build/sbt "test:testOnly *ArrowUtilsSuite"
```
    
Closes #36550 from MaxGekk/remove-SparkIllegalStateException.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../main/scala/org/apache/spark/SparkException.scala   | 12 
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala|  6 +++---
 .../apache/spark/sql/errors/QueryExecutionErrors.scala | 16 +++-
 .../scala/org/apache/spark/sql/util/ArrowUtils.scala   |  9 +++--
 .../org/apache/spark/sql/util/ArrowUtilsSuite.scala|  2 +-
 .../spark/sql/errors/QueryExecutionErrorsSuite.scala   | 18 --
 6 files changed, 14 insertions(+), 49 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala 
b/core/src/main/scala/org/apache/spark/SparkException.scala
index a846e6c46a2..4feea6151b9 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -151,18 +151,6 @@ private[spark] class SparkFileAlreadyExistsException(
   override def getErrorClass: String = errorClass
 }
 
-/**
- * Illegal state exception thrown from Spark with an error class.
- */
-private[spark] class SparkIllegalStateException(
-errorClass: String,
-messageParameters: Array[String])
-  extends IllegalStateException(
-SparkThrowableHelper.getMessage(errorClass, messageParameters)) with 
SparkThrowable {
-
-  override def getErrorClass: String = errorClass
-}
-
 /**
  * File not found exception thrown from Spark with an error class.
  */
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 1e9c431292b..f827e9effe9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, 
TypeUtils}
 import org.apache.spark.sql.connector.catalog.{LookupCatalog, 
SupportsPartitionManagement}
-import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -582,8 +582,8 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog {
  |in operator 
${operator.simpleString(SQLConf.get.maxToStringFields)}
""".stripMargin)
 
-  case _: UnresolvedHint =>
-throw 
QueryExecutionErrors.logicalHintOperatorNotRemovedDuringAnalysisError
+  case _: UnresolvedHint => throw new IllegalStateException(
+"Logical hint operator should be removed during analysis.")
 
   case f @ Filter(condition, _)
 if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty 
=>
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 7ed4fc3574d..b7239d3ff60 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -34,7 +34,7 @@

[spark] branch master updated: [SPARK-38739][SQL][TESTS] Test the error class: INVALID_SYNTAX_FOR_CAST

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 7221ea31b6b [SPARK-38739][SQL][TESTS] Test the error class: 
INVALID_SYNTAX_FOR_CAST
7221ea31b6b is described below

commit 7221ea31b6bbad0d87b22e5413b8979bee56321c
Author: panbingkun 
AuthorDate: Fri May 13 23:20:42 2022 +0300

[SPARK-38739][SQL][TESTS] Test the error class: INVALID_SYNTAX_FOR_CAST

## What changes were proposed in this pull request?
This PR aims to add a test for the error class INVALID_SYNTAX_FOR_CAST to 
`QueryExecutionErrors`. Also the method `invalidInputSyntaxForNumericError` is 
removed as no longer used.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
By running new test:
```
$ build/sbt "test:testOnly *QueryExecutionAnsiErrorsSuite"
```

Closes #36493 from panbingkun/SPARK-38739.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../apache/spark/sql/errors/QueryExecutionErrors.scala  |  9 +
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala  | 17 -
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 447a820a128..e687417d7cc 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -115,17 +115,10 @@ object QueryExecutionErrors extends QueryErrorsBase {
 context))
   }
 
-  def invalidInputSyntaxForNumericError(
-  e: NumberFormatException,
-  errorContext: String): NumberFormatException = {
-new NumberFormatException(s"${e.getMessage}. To return NULL instead, use 
'try_cast'. " +
-  s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this 
error." + errorContext)
-  }
-
   def invalidInputSyntaxForNumericError(
   to: DataType,
   s: UTF8String,
-  errorContext: String): NumberFormatException = {
+  errorContext: String): SparkNumberFormatException = {
 new SparkNumberFormatException(errorClass = "INVALID_SYNTAX_FOR_CAST",
   messageParameters = Array(toSQLType(to), toSQLValue(s, StringType),
 SQLConf.ANSI_ENABLED.key, errorContext))
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
index 78b78f99ab0..8aef4c6f345 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.errors
 
-import org.apache.spark.{SparkArithmeticException, 
SparkArrayIndexOutOfBoundsException, SparkConf, SparkDateTimeException, 
SparkNoSuchElementException}
+import org.apache.spark.{SparkArithmeticException, 
SparkArrayIndexOutOfBoundsException, SparkConf, SparkDateTimeException, 
SparkNoSuchElementException, SparkNumberFormatException}
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.internal.SQLConf
 
@@ -124,4 +124,19 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest with 
QueryErrorsSuiteBase
   |""".stripMargin
 )
   }
+
+  test("INVALID_SYNTAX_FOR_CAST: cast string to double") {
+checkErrorClass(
+  exception = intercept[SparkNumberFormatException] {
+sql("select CAST('xe23' AS DOUBLE)").collect()
+  },
+  errorClass = "INVALID_SYNTAX_FOR_CAST",
+  msg = """Invalid input syntax for type "DOUBLE": 'xe23'. """ 
+
+"""To return NULL instead, use 'try_cast'. If necessary set """ +
+"""spark.sql.ansi.enabled to false to bypass this error.
+  |== SQL(line 1, position 7) ==
+  |select CAST('xe23' AS DOUBLE)
+  |   ^^
+  |""".stripMargin)
+  }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38751][SQL][TESTS] Test the error class: UNRECOGNIZED_SQL_TYPE

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new bbf3a2eafa0 [SPARK-38751][SQL][TESTS] Test the error class: 
UNRECOGNIZED_SQL_TYPE
bbf3a2eafa0 is described below

commit bbf3a2eafa004f712799261ef883dcc457a072fd
Author: panbingkun 
AuthorDate: Fri May 13 19:29:02 2022 +0300

[SPARK-38751][SQL][TESTS] Test the error class: UNRECOGNIZED_SQL_TYPE

## What changes were proposed in this pull request?
This PR aims to add a test for the error class UNRECOGNIZED_SQL_TYPE to 
`QueryExecutionErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*"
```

Closes #36463 from panbingkun/SPARK-38751.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../sql/errors/QueryExecutionErrorsSuite.scala | 89 +-
 1 file changed, 86 insertions(+), 3 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 7a5592c148a..cf1551298a8 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -19,23 +19,27 @@ package org.apache.spark.sql.errors
 
 import java.io.IOException
 import java.net.URL
-import java.util.{Locale, ServiceConfigurationError}
+import java.sql.{Connection, DriverManager, PreparedStatement, ResultSet, 
ResultSetMetaData}
+import java.util.{Locale, Properties, ServiceConfigurationError}
 
 import org.apache.hadoop.fs.{LocalFileSystem, Path}
 import org.apache.hadoop.fs.permission.FsPermission
+import org.mockito.Mockito.{mock, when}
 import test.org.apache.spark.sql.connector.JavaSimpleWritableDataSource
 
-import org.apache.spark.{SparkArithmeticException, 
SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, 
SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, 
SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, 
SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, 
SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, 
SparkSQLException, SparkUnsupportedOperationException, SparkUpgradeException}
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode}
 import org.apache.spark.sql.catalyst.util.BadRecordException
 import org.apache.spark.sql.connector.SimpleWritableDataSource
 import org.apache.spark.sql.execution.QueryExecutionException
+import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, 
JDBCOptions}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.functions.{lit, lower, struct, sum, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION
-import org.apache.spark.sql.types.{DecimalType, StructType, TimestampType}
+import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
+import org.apache.spark.sql.types.{DataType, DecimalType, MetadataBuilder, 
StructType, TimestampType}
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.util.Utils
 
@@ -514,6 +518,85 @@ class QueryExecutionErrorsSuite
   "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")
 }
   }
+
+  test("UNRECOGNIZED_SQL_TYPE: unrecognized SQL type -100") {
+Utils.classForName("org.h2.Driver")
+
+val properties = new Properties()
+properties.setProperty("user", "testUser")
+properties.setProperty("password", "testPass")
+
+val url = "jdbc:h2:mem:testdb0"
+val urlWithUserAndPass = 
"jdbc:h2:mem:testdb0;user=testUser;password=testPass"
+val tableName = "test.table1"
+val unrecognizedColumnType = -100
+
+var conn: java.sql.Connection = null
+try {
+  conn = DriverManager.getConnection(url, properties)
+  conn.prepareStatement("create schema test").executeUpdate()
+  conn.commit()
+
+  conn.prepareStatement(s"create table $tableName (a INT)").executeUpdate()
+  conn.prepareStatement(
+s"insert into $tableName values (1)").executeUpdate()
+  conn.commit()
+} finally {
+  if (null != conn) {
+

[spark] branch branch-3.3 updated: [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new e743e68ce62 [SPARK-39178][CORE] SparkFatalException should show root 
cause when print error stack
e743e68ce62 is described below

commit e743e68ce62e18ced6c49a22f5d101c72b7bfbe2
Author: Angerszh 
AuthorDate: Fri May 13 16:47:11 2022 +0300

[SPARK-39178][CORE] SparkFatalException should show root cause when print 
error stack

### What changes were proposed in this pull request?
Our user meet an case when running broadcast, throw `SparkFatalException`, 
but in error stack, it don't show the error case.

### Why are the changes needed?
Make exception more clear

### Does this PR introduce _any_ user-facing change?
User can got root cause when application throw `SparkFatalException`.

### How was this patch tested?
For ut
```
  test("") {
throw new SparkFatalException(
  new OutOfMemoryError("Not enough memory to build and broadcast the 
table to all " +
  "worker nodes. As a workaround, you can either disable broadcast by 
setting " +
  s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher 
value.")
  .initCause(null))
  }
```

Before this pr:
```
[info]   org.apache.spark.util.SparkFatalException:
[info]   at 
org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190)
[info]   at 
org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182)
[info]   at 
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64)
[info]   at 
org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
[info]   at 
org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
[info]   at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233)
[info]   at 
org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
[info]   at scala.collection.immutable.List.foreach(List.scala:431)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232)
[info]   at 
org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563)
[info]   at org.scalatest.Suite.run(Suite.scala:1112)
```

After this pr:
```
[info]   org.apache.spark.util.SparkFatalException: 
java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table 
to all worker nodes. As a workaround, you can either disable broadcast by 
setting driver memory by setting spark.driver.memory to a higher value.
[info]   at 
org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190)
[info]   at 
org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest

[spark] branch master updated: [SPARK-39178][CORE] SparkFatalException should show root cause when print error stack

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new d7317b03e97 [SPARK-39178][CORE] SparkFatalException should show root 
cause when print error stack
d7317b03e97 is described below

commit d7317b03e975f8dc1a8c276dd0a931e00c478717
Author: Angerszh 
AuthorDate: Fri May 13 16:47:11 2022 +0300

[SPARK-39178][CORE] SparkFatalException should show root cause when print 
error stack

### What changes were proposed in this pull request?
Our user meet an case when running broadcast, throw `SparkFatalException`, 
but in error stack, it don't show the error case.

### Why are the changes needed?
Make exception more clear

### Does this PR introduce _any_ user-facing change?
User can got root cause when application throw `SparkFatalException`.

### How was this patch tested?
For ut
```
  test("") {
throw new SparkFatalException(
  new OutOfMemoryError("Not enough memory to build and broadcast the 
table to all " +
  "worker nodes. As a workaround, you can either disable broadcast by 
setting " +
  s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher 
value.")
  .initCause(null))
  }
```

Before this pr:
```
[info]   org.apache.spark.util.SparkFatalException:
[info]   at 
org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190)
[info]   at 
org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:200)
[info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:200)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:182)
[info]   at 
org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:64)
[info]   at 
org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234)
[info]   at 
org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227)
[info]   at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:64)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:233)
[info]   at 
org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413)
[info]   at scala.collection.immutable.List.foreach(List.scala:431)
[info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
[info]   at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:396)
[info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:475)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTests(AnyFunSuiteLike.scala:233)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.runTests$(AnyFunSuiteLike.scala:232)
[info]   at 
org.scalatest.funsuite.AnyFunSuite.runTests(AnyFunSuite.scala:1563)
[info]   at org.scalatest.Suite.run(Suite.scala:1112)
```

After this pr:
```
[info]   org.apache.spark.util.SparkFatalException: 
java.lang.OutOfMemoryError: Not enough memory to build and broadcast the table 
to all worker nodes. As a workaround, you can either disable broadcast by 
setting driver memory by setting spark.driver.memory to a higher value.
[info]   at 
org.apache.spark.SparkContextSuite.$anonfun$new$1(SparkContextSuite.scala:59)
[info]   at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85)
[info]   at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83)
[info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
[info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:190)
[info]   at 
org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:203)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:188)
[info]   at 
org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest

[spark] branch branch-3.3 updated: [SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new 1372f312052 [SPARK-39164][SQL][3.3] Wrap asserts/illegal state 
exceptions by the INTERNAL_ERROR exception in actions
1372f312052 is described below

commit 1372f312052dd0361e371e2ed63436f3e299c617
Author: Max Gekk 
AuthorDate: Fri May 13 16:43:53 2022 +0300

[SPARK-39164][SQL][3.3] Wrap asserts/illegal state exceptions by the 
INTERNAL_ERROR exception in actions

### What changes were proposed in this pull request?
In the PR, I propose to catch `java.lang.IllegalStateException` and 
`java.lang.AssertionError` (raised by asserts), and wrap them by Spark's 
exception w/ the `INTERNAL_ERROR` error class. The modification affects only 
actions so far.

This PR affects the case of missing bucket file. After the changes, Spark 
throws `SparkException` w/ `INTERNAL_ERROR` instead of `IllegalStateException`. 
Since this is not Spark's illegal state, the exception should be replaced by 
another runtime exception. Created the ticket SPARK-39163 to fix this.

This is a backport of https://github.com/apache/spark/pull/36500.

### Why are the changes needed?
To improve user experience with Spark SQL and unify representation of 
internal errors by using error classes like for other errors. Usually, users 
shouldn't observe asserts and illegal states, but even if such situation 
happens, they should see errors in the same way as other errors (w/ error class 
`INTERNAL_ERROR`).

### Does this PR introduce _any_ user-facing change?
Yes. At least, in one particular case, see the modified test suites and 
SPARK-39163.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *.BucketedReadWithoutHiveSupportSuite"
$ build/sbt "test:testOnly *.AdaptiveQueryExecSuite"
$ build/sbt "test:testOnly *.WholeStageCodegenSuite"
```

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit f5c3f0c228fef7808d1f927e134595ddd4d31723)
Signed-off-by: Max Gekk 

Closes #36533 from MaxGekk/class-internal-error-3.3.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../main/scala/org/apache/spark/sql/Dataset.scala   | 21 -
 .../spark/sql/execution/DataSourceScanExec.scala|  1 +
 .../org/apache/spark/sql/execution/subquery.scala   |  1 +
 .../scala/org/apache/spark/sql/SubquerySuite.scala  | 10 ++
 .../sql/execution/WholeStageCodegenSuite.scala  | 14 --
 .../execution/adaptive/AdaptiveQueryExecSuite.scala |  9 ++---
 .../spark/sql/sources/BucketedReadSuite.scala   |  8 +---
 7 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7d16a2f5eee..56f0e8978ec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.TaskContext
+import org.apache.spark.{SparkException, SparkThrowable, TaskContext}
 import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.function._
@@ -3848,12 +3848,23 @@ class Dataset[T] private[sql](
 
   /**
* Wrap a Dataset action to track the QueryExecution and time cost, then 
report to the
-   * user-registered callback functions.
+   * user-registered callback functions, and also to convert asserts/illegal 
states to
+   * the internal error exception.
*/
   private def withAction[U](name: String, qe: QueryExecution)(action: 
SparkPlan => U) = {
-SQLExecution.withNewExecutionId(qe, Some(name)) {
-  qe.executedPlan.resetMetrics()
-  action(qe.executedPlan)
+try {
+  SQLExecution.withNewExecutionId(qe, Some(name)) {
+qe.executedPlan.resetMetrics()
+action(qe.executedPlan)
+  }
+} catch {
+  case e: SparkThrowable => throw e
+  case e @ (_: java.lang.IllegalStateException | _: 
java.lang.AssertionError) =>
+throw new SparkException(
+  errorClass = "INTERNAL_ERROR",
+  messageParameters = Array(s"""The "$name" action failed."""),
+  cause = e)
+  case e: Throwable => throw e
 }
   }
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index ac0f3af5725..1ec93a614b7 100644
---

[spark] branch branch-3.3 updated: [SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException`

2022-05-13 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new c2bd7bac76a [SPARK-39165][SQL][3.3] Replace `sys.error` by 
`IllegalStateException`
c2bd7bac76a is described below

commit c2bd7bac76a5cf7ffc5ef61a1df2b8bb5a72f131
Author: Max Gekk 
AuthorDate: Fri May 13 12:47:53 2022 +0300

[SPARK-39165][SQL][3.3] Replace `sys.error` by `IllegalStateException`

### What changes were proposed in this pull request?
Replace all invokes of `sys.error()` by throwing of `IllegalStateException` 
in the `sql` namespace.

This is a backport of https://github.com/apache/spark/pull/36524.

### Why are the changes needed?
In the context of wrapping all internal errors like asserts/illegal state 
exceptions (see https://github.com/apache/spark/pull/36500), it is impossible 
to distinguish `RuntimeException` of `sys.error()` from Spark's exceptions like 
`SparkRuntimeException`. The last one can be propagated to the user space but 
`sys.error` exceptions shouldn't be visible to users in regular cases.

### Does this PR introduce _any_ user-facing change?
No, shouldn't. sys.error shouldn't propagate exception to user space in 
regular cases.

### How was this patch tested?
By running the existing test suites.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit 95c7efd7571464d8adfb76fb22e47a5816cf73fb)
Signed-off-by: Max Gekk 

    Closes #36532 from MaxGekk/sys_error-internal-3.3.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../scala/org/apache/spark/sql/execution/SparkStrategies.scala| 4 ++--
 .../org/apache/spark/sql/execution/datasources/DataSource.scala   | 8 
 .../sql/execution/datasources/parquet/ParquetWriteSupport.scala   | 3 +--
 .../apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala | 4 ++--
 .../org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala | 5 +++--
 .../scala/org/apache/spark/sql/execution/streaming/memory.scala   | 3 ++-
 .../execution/streaming/sources/TextSocketMicroBatchStream.scala  | 3 ++-
 .../src/main/scala/org/apache/spark/sql/execution/subquery.scala  | 3 ++-
 .../apache/spark/sql/execution/window/AggregateProcessor.scala| 2 +-
 .../org/apache/spark/sql/execution/window/WindowExecBase.scala| 8 
 .../src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 3 ++-
 .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala   | 2 +-
 12 files changed, 26 insertions(+), 22 deletions(-)

diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 3b8a70ffe94..17f3cfbda89 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -503,8 +503,8 @@ abstract class SparkStrategies extends 
QueryPlanner[SparkPlan] {
   
_.aggregateFunction.children.filterNot(_.foldable).toSet).distinct.length > 1) {
   // This is a sanity check. We should not reach here when we have 
multiple distinct
   // column sets. Our `RewriteDistinctAggregates` should take care 
this case.
-  sys.error("You hit a query analyzer bug. Please report your query to 
" +
-  "Spark user mailing list.")
+  throw new IllegalStateException(
+"You hit a query analyzer bug. Please report your query to Spark 
user mailing list.")
 }
 
 // Ideally this should be done in `NormalizeFloatingNumbers`, but we 
do it here because
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 2bb3d48c145..143fb4cf960 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -539,8 +539,8 @@ case class DataSource(
 DataWritingCommand.propogateMetrics(sparkSession.sparkContext, 
resolved, metrics)
 // Replace the schema with that of the DataFrame we just wrote out to 
avoid re-inferring
 copy(userSpecifiedSchema = 
Some(outputColumns.toStructType.asNullable)).resolveRelation()
-  case _ =>
-sys.error(s"${providingClass.getCanonicalName} does not allow create 
table as select.")
+  case _ => throw new IllegalStateException(
+s"${providingClass.getCanonicalName} does not allow create table as 
select.")
 }
   }
 
@@ -556,8 +556,8 @@ case class DataSource(
 dis

[spark] branch master updated: [SPARK-39164][SQL] Wrap asserts/illegal state exceptions by the INTERNAL_ERROR exception in actions

2022-05-12 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new f5c3f0c228f [SPARK-39164][SQL] Wrap asserts/illegal state exceptions 
by the INTERNAL_ERROR exception in actions
f5c3f0c228f is described below

commit f5c3f0c228fef7808d1f927e134595ddd4d31723
Author: Max Gekk 
AuthorDate: Thu May 12 23:54:19 2022 +0300

[SPARK-39164][SQL] Wrap asserts/illegal state exceptions by the 
INTERNAL_ERROR exception in actions

### What changes were proposed in this pull request?
In the PR, I propose to catch `java.lang.IllegalStateException` and 
`java.lang.AssertionError` (raised by asserts), and wrap them by Spark's 
exception w/ the `INTERNAL_ERROR` error class. The modification affects only 
actions so far.

This PR affects the case of missing bucket file. After the changes, Spark 
throws `SparkException` w/ `INTERNAL_ERROR` instead of `IllegalStateException`. 
Since this is not Spark's illegal state, the exception should be replaced by 
another runtime exception. Created the ticket SPARK-39163 to fix this.

### Why are the changes needed?
To improve user experience with Spark SQL and unify representation of 
internal errors by using error classes like for other errors. Usually, users 
shouldn't observe asserts and illegal states, but even if such situation 
happens, they should see errors in the same way as other errors (w/ error class 
`INTERNAL_ERROR`).

### Does this PR introduce _any_ user-facing change?
Yes. At least, in one particular case, see the modified test suites and 
SPARK-39163.

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *.BucketedReadWithoutHiveSupportSuite"
$ build/sbt "test:testOnly *.AdaptiveQueryExecSuite"
$ build/sbt "test:testOnly *.WholeStageCodegenSuite"
```

Closes #36500 from MaxGekk/class-internal-error.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 .../main/scala/org/apache/spark/sql/Dataset.scala   | 21 -
 .../spark/sql/execution/DataSourceScanExec.scala|  1 +
 .../org/apache/spark/sql/execution/subquery.scala   |  1 +
 .../scala/org/apache/spark/sql/SubquerySuite.scala  | 10 ++
 .../sql/execution/WholeStageCodegenSuite.scala  | 14 --
 .../execution/adaptive/AdaptiveQueryExecSuite.scala |  9 ++---
 .../spark/sql/sources/BucketedReadSuite.scala   |  8 +---
 7 files changed, 43 insertions(+), 21 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 36b6d6b470d..8c89ec795de 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.TaskContext
+import org.apache.spark.{SparkException, SparkThrowable, TaskContext}
 import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable}
 import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.function._
@@ -3906,12 +3906,23 @@ class Dataset[T] private[sql](
 
   /**
* Wrap a Dataset action to track the QueryExecution and time cost, then 
report to the
-   * user-registered callback functions.
+   * user-registered callback functions, and also to convert asserts/illegal 
states to
+   * the internal error exception.
*/
   private def withAction[U](name: String, qe: QueryExecution)(action: 
SparkPlan => U) = {
-SQLExecution.withNewExecutionId(qe, Some(name)) {
-  qe.executedPlan.resetMetrics()
-  action(qe.executedPlan)
+try {
+  SQLExecution.withNewExecutionId(qe, Some(name)) {
+qe.executedPlan.resetMetrics()
+action(qe.executedPlan)
+  }
+} catch {
+  case e: SparkThrowable => throw e
+  case e @ (_: java.lang.IllegalStateException | _: 
java.lang.AssertionError) =>
+throw new SparkException(
+  errorClass = "INTERNAL_ERROR",
+  messageParameters = Array(s"""The "$name" action failed."""),
+  cause = e)
+  case e: Throwable => throw e
 }
   }
 
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 9141a3f742e..f7b627cef08 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -618,6 +618,7 @@ case class FileSourceScanExec(
   }.groupBy { f =>

[spark] branch master updated (c74506cc33b -> 95c7efd7571)

2022-05-12 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from c74506cc33b [SPARK-39086][SQL] Support UDT in Spark Parquet vectorized 
reader
 add 95c7efd7571 [SPARK-39165][SQL] Replace `sys.error` by 
`IllegalStateException`

No new revisions were added by this update.

Summary of changes:
 .../scala/org/apache/spark/sql/execution/SparkStrategies.scala| 4 ++--
 .../org/apache/spark/sql/execution/datasources/DataSource.scala   | 8 
 .../sql/execution/datasources/parquet/ParquetWriteSupport.scala   | 3 +--
 .../apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala | 4 ++--
 .../org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala | 5 +++--
 .../scala/org/apache/spark/sql/execution/streaming/memory.scala   | 3 ++-
 .../execution/streaming/sources/TextSocketMicroBatchStream.scala  | 3 ++-
 .../src/main/scala/org/apache/spark/sql/execution/subquery.scala  | 3 ++-
 .../apache/spark/sql/execution/window/AggregateProcessor.scala| 2 +-
 .../org/apache/spark/sql/execution/window/WindowExecBase.scala| 8 
 .../src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala | 3 ++-
 .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala   | 2 +-
 12 files changed, 26 insertions(+), 22 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.2 updated: [SPARK-39060][SQL][3.2] Typo in error messages of decimal overflow

2022-05-11 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
 new 6f9e3034ada [SPARK-39060][SQL][3.2] Typo in error messages of decimal 
overflow
6f9e3034ada is described below

commit 6f9e3034ada72f372dafe93152e01ad5cb323989
Author: Vitalii Li 
AuthorDate: Thu May 12 08:13:51 2022 +0300

[SPARK-39060][SQL][3.2] Typo in error messages of decimal overflow

### What changes were proposed in this pull request?

This PR removes extra curly bracket from debug string for Decimal type in 
SQL.

This is a backport from master branch. Commit: 
https://github.com/apache/spark/commit/165ce4eb7d6d75201beb1bff879efa99fde24f94

### Why are the changes needed?

Typo in error messages of decimal overflow.

### Does this PR introduce _any_ user-facing change?

No

### How was this patch tested?

By running tests:
```
$ build/sbt "sql/testOnly"
```

Closes #36458 from vli-databricks/SPARK-39060-3.2.

Authored-by: Vitalii Li 
Signed-off-by: Max Gekk 
---
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala   | 4 ++--
 .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 46814297231..bc5fba8d0d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -227,9 +227,9 @@ final class Decimal extends Ordered[Decimal] with 
Serializable {
 
   def toDebugString: String = {
 if (decimalVal.ne(null)) {
-  s"Decimal(expanded,$decimalVal,$precision,$scale})"
+  s"Decimal(expanded, $decimalVal, $precision, $scale)"
 } else {
-  s"Decimal(compact,$longVal,$precision,$scale})"
+  s"Decimal(compact, $longVal, $precision, $scale)"
 }
   }
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 2f3513e734f..c65742e4d8b 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -76,7 +76,7 @@ select (5e36BD + 0.1) + 5e36BD
 struct<>
 -- !query output
 java.lang.ArithmeticException
-Decimal(expanded,10.1,39,1}) cannot be 
represented as Decimal(38, 1).
+Decimal(expanded, 10.1, 39, 1) cannot be 
represented as Decimal(38, 1).
 
 
 -- !query
@@ -85,7 +85,7 @@ select (-4e36BD - 0.1) - 7e36BD
 struct<>
 -- !query output
 java.lang.ArithmeticException
-Decimal(expanded,-11.1,39,1}) cannot be 
represented as Decimal(38, 1).
+Decimal(expanded, -11.1, 39, 1) cannot be 
represented as Decimal(38, 1).
 
 
 -- !query
@@ -94,7 +94,7 @@ select 12345678901234567890.0 * 12345678901234567890.0
 struct<>
 -- !query output
 java.lang.ArithmeticException
-Decimal(expanded,152415787532388367501905199875019052100,39,0}) cannot be 
represented as Decimal(38, 2).
+Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be 
represented as Decimal(38, 2).
 
 
 -- !query
@@ -103,7 +103,7 @@ select 1e35BD / 0.1
 struct<>
 -- !query output
 java.lang.ArithmeticException
-Decimal(expanded,1,37,0}) cannot be 
represented as Decimal(38, 6).
+Decimal(expanded, 1, 37, 0) cannot be 
represented as Decimal(38, 6).
 
 
 -- !query


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc

2022-05-07 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new 6378365011c [SPARK-39121][K8S][DOCS] Fix format error on 
running-on-kubernetes doc
6378365011c is described below

commit 6378365011c590d7e7225ea05728bfe06490e769
Author: Yikun Jiang 
AuthorDate: Sat May 7 10:19:53 2022 +0300

[SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc

### What changes were proposed in this pull request?
Fix format error on running-on-kubernetes doc

### Why are the changes needed?
Fix format syntax error

### Does this PR introduce _any_ user-facing change?
No, unreleased doc only

### How was this patch tested?
- `SKIP_API=1 bundle exec jekyll serve --watch`
- CI passed

Closes #36476 from Yikun/SPARK-39121.

Authored-by: Yikun Jiang 
Signed-off-by: Max Gekk 
(cherry picked from commit 2349f74866ae1b365b5e4e0ec8a58c4f7f06885c)
Signed-off-by: Max Gekk 
---
 docs/running-on-kubernetes.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 763a9668d3b..ee77e37beb3 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1699,7 +1699,7 @@ Kubernetes supports [Pod 
priority](https://kubernetes.io/docs/concepts/schedulin
 
 Spark on Kubernetes allows defining the priority of jobs by [Pod 
template](#pod-template). The user can specify the 
priorityClassName in driver or executor Pod template 
spec section. Below is an example to show how to specify it:
 
-```
+```yaml
 apiVersion: v1
 Kind: Pod
 metadata:
@@ -1729,8 +1729,8 @@ Spark allows users to specify a custom Kubernetes 
schedulers.
 3. Specify scheduler feature step.
 
Users may also consider to use 
spark.kubernetes.{driver/executor}.pod.featureSteps to support 
more complex requirements, including but not limited to:
-  - Create additional Kubernetes custom resources for driver/executor 
scheduling.
-  - Set scheduler hints according to configuration or existing Pod info 
dynamically.
+   - Create additional Kubernetes custom resources for driver/executor 
scheduling.
+   - Set scheduler hints according to configuration or existing Pod info 
dynamically.
 
  Using Volcano as Customized Scheduler for Spark on Kubernetes
 
@@ -1766,7 +1766,7 @@ To use Volcano as a custom scheduler the user needs to 
specify the following con
 --conf 
spark.kubernetes.scheduler.volcano.podGroupTemplateFile=/path/to/podgroup-template.yaml
 # Specify driver/executor VolcanoFeatureStep
 --conf 
spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
---conf 
spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep```
+--conf 
spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
 ```
 
 # Volcano Feature Step


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc

2022-05-07 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 2349f74866a [SPARK-39121][K8S][DOCS] Fix format error on 
running-on-kubernetes doc
2349f74866a is described below

commit 2349f74866ae1b365b5e4e0ec8a58c4f7f06885c
Author: Yikun Jiang 
AuthorDate: Sat May 7 10:19:53 2022 +0300

[SPARK-39121][K8S][DOCS] Fix format error on running-on-kubernetes doc

### What changes were proposed in this pull request?
Fix format error on running-on-kubernetes doc

### Why are the changes needed?
Fix format syntax error

### Does this PR introduce _any_ user-facing change?
No, unreleased doc only

### How was this patch tested?
- `SKIP_API=1 bundle exec jekyll serve --watch`
- CI passed

Closes #36476 from Yikun/SPARK-39121.

Authored-by: Yikun Jiang 
Signed-off-by: Max Gekk 
---
 docs/running-on-kubernetes.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index f5f2465fb06..c8c202360f8 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -1699,7 +1699,7 @@ Kubernetes supports [Pod 
priority](https://kubernetes.io/docs/concepts/schedulin
 
 Spark on Kubernetes allows defining the priority of jobs by [Pod 
template](#pod-template). The user can specify the 
priorityClassName in driver or executor Pod template 
spec section. Below is an example to show how to specify it:
 
-```
+```yaml
 apiVersion: v1
 Kind: Pod
 metadata:
@@ -1729,8 +1729,8 @@ Spark allows users to specify a custom Kubernetes 
schedulers.
 3. Specify scheduler feature step.
 
Users may also consider to use 
spark.kubernetes.{driver/executor}.pod.featureSteps to support 
more complex requirements, including but not limited to:
-  - Create additional Kubernetes custom resources for driver/executor 
scheduling.
-  - Set scheduler hints according to configuration or existing Pod info 
dynamically.
+   - Create additional Kubernetes custom resources for driver/executor 
scheduling.
+   - Set scheduler hints according to configuration or existing Pod info 
dynamically.
 
  Using Volcano as Customized Scheduler for Spark on Kubernetes
 
@@ -1766,7 +1766,7 @@ To use Volcano as a custom scheduler the user needs to 
specify the following con
 --conf 
spark.kubernetes.scheduler.volcano.podGroupTemplateFile=/path/to/podgroup-template.yaml
 # Specify driver/executor VolcanoFeatureStep
 --conf 
spark.kubernetes.driver.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
---conf 
spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep```
+--conf 
spark.kubernetes.executor.pod.featureSteps=org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
 ```
 
 # Volcano Feature Step


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39117][SQL][TESTS] Do not include number of functions in sql-expression-schema.md

2022-05-06 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 986b0f769b8 [SPARK-39117][SQL][TESTS] Do not include number of 
functions in sql-expression-schema.md
986b0f769b8 is described below

commit 986b0f769b8ffa8a033d0f182217e83faa38fb4a
Author: Wenchen Fan 
AuthorDate: Fri May 6 20:43:36 2022 +0300

[SPARK-39117][SQL][TESTS] Do not include number of functions in 
sql-expression-schema.md

### What changes were proposed in this pull request?

`sql-expression-schema.md` is a golden file for tracking purposes: whenever 
we change a function or add a new function, this file must be updated. However, 
the number of functions in this file is not very useful and stops people from 
adding functions at the same time. This PR prints the summary information 
during test instead of putting it in the golden file.

### Why are the changes needed?

Increase development velocity.

### Does this PR introduce _any_ user-facing change?

no

### How was this patch tested?

N/A

Closes #36472 from cloud-fan/small.

Authored-by: Wenchen Fan 
Signed-off-by: Max Gekk 
---
 .../sql-functions/sql-expression-schema.md |  4 --
 .../apache/spark/sql/ExpressionsSchemaSuite.scala  | 48 +-
 2 files changed, 11 insertions(+), 41 deletions(-)

diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md 
b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index accf9ea4577..0115578e909 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,8 +1,4 @@
 
-## Summary
-  - Number of queries: 390
-  - Number of expressions that missing example: 12
-  - Expressions missing examples: 
bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
 | Class name | Function name or alias | Query example | Output schema |
 | -- | -- | - | - |
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index f8071e6cda1..d6ef90ce0b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -133,10 +133,6 @@ class ExpressionsSchemaSuite extends QueryTest with 
SharedSparkSession {
 
 val header = Seq(
   s"",
-  "## Summary",
-  s"  - Number of queries: ${outputs.size}",
-  s"  - Number of expressions that missing example: 
${missingExamples.size}",
-  s"  - Expressions missing examples: ${missingExamples.mkString(",")}",
   "## Schema of Built-in Functions",
   "| Class name | Function name or alias | Query example | Output schema 
|",
   "| -- | -- | - | - |"
@@ -149,11 +145,20 @@ class ExpressionsSchemaSuite extends QueryTest with 
SharedSparkSession {
 assert(parent.mkdirs(), "Could not create directory: " + parent)
   }
   stringToFile(resultFile, goldenOutput)
+  // scalastyle:off println
+  println(
+s"""
+  |## Summary
+  |  - Number of queries: ${outputs.size}
+  |  - Number of expressions that missing example: 
${missingExamples.size}
+  |  - Expressions missing examples: ${missingExamples.mkString(",")}
+  |""".stripMargin)
+  // scalastyle:on println
 }
 
 val outputSize = outputs.size
 val headerSize = header.size
-val (expectedMissingExamples, expectedOutputs) = {
+val expectedOutputs = {
   val expectedGoldenOutput = fileToString(resultFile)
   val lines = expectedGoldenOutput.split("\n")
   val expectedSize = lines.size
@@ -162,8 +167,7 @@ class ExpressionsSchemaSuite extends QueryTest with 
SharedSparkSession {
 s"Expected $expectedSize blocks in result file but got " +
   s"${outputSize + headerSize}. Try regenerating the result files.")
 
-  val numberOfQueries = lines(2).split(":")(1).trim.toInt
-  val expectedOutputs = Seq.tabulate(outputSize) { i =>
+  Seq.tabulate(outputSize) { i =>
 val segments = lines(i + headerSize).split('|')
 QueryOutput(
   className = segments(1).trim,
@@ -171,28 +175,6 @@ class ExpressionsSchemaSuite extends QueryTest with 
SharedSparkSession {
   sql = segments(3).trim,
   s

[spark] branch branch-3.0 updated (4e38563d39c -> 19942e7be86)

2022-05-06 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/spark.git


from 4e38563d39c [SPARK-38918][SQL][3.0] Nested column pruning should 
filter out attributes that do not belong to the current relation
 add 19942e7be86 [SPARK-39060][SQL][3.0] Typo in error messages of decimal 
overflow

No new revisions were added by this update.

Summary of changes:
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala   | 4 ++--
 .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 
 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.1 updated (8f6a3a50b4b -> 19576c412b7)

2022-05-06 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


from 8f6a3a50b4b [SPARK-39084][PYSPARK] Fix df.rdd.isEmpty() by using 
TaskContext to stop iterator on task completion
 add 19576c412b7 [SPARK-39060][SQL][3.1] Typo in error messages of decimal 
overflow

No new revisions were added by this update.

Summary of changes:
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala   | 4 ++--
 .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 
 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39108][SQL] Show hints for try_add/try_substract/try_multiply in int/long overflow errors

2022-05-05 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new c274812284a [SPARK-39108][SQL] Show hints for 
try_add/try_substract/try_multiply in int/long overflow errors
c274812284a is described below

commit c274812284a3b7ec725e6b8afc2e7ab0f91b923e
Author: Gengliang Wang 
AuthorDate: Thu May 5 23:03:44 2022 +0300

[SPARK-39108][SQL] Show hints for try_add/try_substract/try_multiply in 
int/long overflow errors

### What changes were proposed in this pull request?

Show hints for try_add/try_substract/try_multiply in int/long overflow 
errors

### Why are the changes needed?

Better error message for resolving the overflow errors under ANSI mode.

### Does this PR introduce _any_ user-facing change?

No, minor error message improvement

### How was this patch tested?

UT

Closes #36456 from gengliangwang/tryHint.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
---
 .../scala/org/apache/spark/sql/catalyst/util/MathUtils.scala | 12 ++--
 .../test/resources/sql-tests/results/postgreSQL/int4.sql.out | 12 ++--
 .../test/resources/sql-tests/results/postgreSQL/int8.sql.out |  8 
 .../sql-tests/results/postgreSQL/window_part2.sql.out|  4 ++--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
index f96c9fba5a3..e5c87a41ea8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
@@ -27,32 +27,32 @@ object MathUtils {
   def addExact(a: Int, b: Int): Int = withOverflow(Math.addExact(a, b))
 
   def addExact(a: Int, b: Int, errorContext: String): Int =
-withOverflow(Math.addExact(a, b), errorContext = errorContext)
+withOverflow(Math.addExact(a, b), hint = "try_add", errorContext = 
errorContext)
 
   def addExact(a: Long, b: Long): Long = withOverflow(Math.addExact(a, b))
 
   def addExact(a: Long, b: Long, errorContext: String): Long =
-withOverflow(Math.addExact(a, b), errorContext = errorContext)
+withOverflow(Math.addExact(a, b), hint = "try_add", errorContext = 
errorContext)
 
   def subtractExact(a: Int, b: Int): Int = withOverflow(Math.subtractExact(a, 
b))
 
   def subtractExact(a: Int, b: Int, errorContext: String): Int =
-withOverflow(Math.subtractExact(a, b), errorContext = errorContext)
+withOverflow(Math.subtractExact(a, b), hint = "try_subtract", errorContext 
= errorContext)
 
   def subtractExact(a: Long, b: Long): Long = 
withOverflow(Math.subtractExact(a, b))
 
   def subtractExact(a: Long, b: Long, errorContext: String): Long =
-withOverflow(Math.subtractExact(a, b), errorContext = errorContext)
+withOverflow(Math.subtractExact(a, b), hint = "try_subtract", errorContext 
= errorContext)
 
   def multiplyExact(a: Int, b: Int): Int = withOverflow(Math.multiplyExact(a, 
b))
 
   def multiplyExact(a: Int, b: Int, errorContext: String): Int =
-withOverflow(Math.multiplyExact(a, b), errorContext = errorContext)
+withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", errorContext 
= errorContext)
 
   def multiplyExact(a: Long, b: Long): Long = 
withOverflow(Math.multiplyExact(a, b))
 
   def multiplyExact(a: Long, b: Long, errorContext: String): Long =
-withOverflow(Math.multiplyExact(a, b), errorContext = errorContext)
+withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", errorContext 
= errorContext)
 
   def negateExact(a: Int): Int = withOverflow(Math.negateExact(a))
 
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
index 6b42e31340f..a39cdbc340c 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
@@ -200,7 +200,7 @@ SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM 
INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-[ARITHMETIC_OVERFLOW] integer overflow. If necessary set 
spark.sql.ansi.enabled to false (except for ANSI interval type) to bypass this 
error.
+[ARITHMETIC_OVERFLOW] integer overflow. To return NULL instead, use 
'try_multiply'. If necessary set spark.sql.ansi.enabled to false (except for 
ANSI interval type) to bypass this error.
 == SQL(line 1, position 25) ==
 SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i

[spark] branch master updated: [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases

2022-05-05 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 4b1c2fb7a27 [SPARK-39099][BUILD] Add dependencies to Dockerfile for 
building Spark releases
4b1c2fb7a27 is described below

commit 4b1c2fb7a27757ebf470416c8ec02bb5c1f7fa49
Author: Max Gekk 
AuthorDate: Thu May 5 20:10:06 2022 +0300

[SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark 
releases

### What changes were proposed in this pull request?
Add missed dependencies to `dev/create-release/spark-rm/Dockerfile`.

### Why are the changes needed?
To be able to build Spark releases.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By building the Spark 3.3 release via:
```
$ dev/create-release/do-release-docker.sh -d /home/ubuntu/max/spark-3.3-rc1
```

Closes #36449 from MaxGekk/deps-Dockerfile.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 dev/create-release/spark-rm/Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/create-release/spark-rm/Dockerfile 
b/dev/create-release/spark-rm/Dockerfile
index ffd60c07af0..c6555e0463d 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 #   We should use the latest Sphinx version once this is fixed.
 # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 
pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 
pyarrow==3.0.0 plotly==5.4.0"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 
pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 
pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17"
 ARG GEM_PKGS="bundler:2.2.9"
 
 # Install extra needed repos and refresh.
@@ -79,9 +79,9 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg 
ca-certificates && \
   # Note that PySpark doc generation also needs pandoc due to nbsphinx
   $APT_INSTALL r-base r-base-dev && \
   $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \
-  $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \
+  $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf 
texlive-latex-extra && \
   $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev 
libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \
-  Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 
'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')" && \
+  Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 
'testthat', 'knitr', 'rmarkdown', 'markdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')" && \
   Rscript -e "devtools::install_github('jimhester/lintr')" && \
   Rscript -e "devtools::install_version('pkgdown', version='2.0.1', 
repos='https://cloud.r-project.org')" && \
   Rscript -e "devtools::install_version('preferably', version='0.4', 
repos='https://cloud.r-project.org')" && \


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch branch-3.3 updated: [SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark releases

2022-05-05 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new 6a61f95a359 [SPARK-39099][BUILD] Add dependencies to Dockerfile for 
building Spark releases
6a61f95a359 is described below

commit 6a61f95a359e6aa9d09f8044019074dc7effcf30
Author: Max Gekk 
AuthorDate: Thu May 5 20:10:06 2022 +0300

[SPARK-39099][BUILD] Add dependencies to Dockerfile for building Spark 
releases

### What changes were proposed in this pull request?
Add missed dependencies to `dev/create-release/spark-rm/Dockerfile`.

### Why are the changes needed?
To be able to build Spark releases.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By building the Spark 3.3 release via:
```
$ dev/create-release/do-release-docker.sh -d /home/ubuntu/max/spark-3.3-rc1
```

Closes #36449 from MaxGekk/deps-Dockerfile.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit 4b1c2fb7a27757ebf470416c8ec02bb5c1f7fa49)
Signed-off-by: Max Gekk 
---
 dev/create-release/spark-rm/Dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dev/create-release/spark-rm/Dockerfile 
b/dev/create-release/spark-rm/Dockerfile
index ffd60c07af0..c6555e0463d 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -42,7 +42,7 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 #   We should use the latest Sphinx version once this is fixed.
 # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 
pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 
pyarrow==3.0.0 plotly==5.4.0"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 
pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 
jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 
pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17"
 ARG GEM_PKGS="bundler:2.2.9"
 
 # Install extra needed repos and refresh.
@@ -79,9 +79,9 @@ RUN apt-get clean && apt-get update && $APT_INSTALL gnupg 
ca-certificates && \
   # Note that PySpark doc generation also needs pandoc due to nbsphinx
   $APT_INSTALL r-base r-base-dev && \
   $APT_INSTALL libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev && \
-  $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf && \
+  $APT_INSTALL texlive-latex-base texlive texlive-fonts-extra texinfo qpdf 
texlive-latex-extra && \
   $APT_INSTALL libfontconfig1-dev libharfbuzz-dev libfribidi-dev 
libfreetype6-dev libpng-dev libtiff5-dev libjpeg-dev && \
-  Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 
'testthat', 'knitr', 'rmarkdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')" && \
+  Rscript -e "install.packages(c('curl', 'xml2', 'httr', 'devtools', 
'testthat', 'knitr', 'rmarkdown', 'markdown', 'roxygen2', 'e1071', 'survival'), 
repos='https://cloud.r-project.org/')" && \
   Rscript -e "devtools::install_github('jimhester/lintr')" && \
   Rscript -e "devtools::install_version('pkgdown', version='2.0.1', 
repos='https://cloud.r-project.org')" && \
   Rscript -e "devtools::install_version('preferably', version='0.4', 
repos='https://cloud.r-project.org')" && \


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [MINOR] Remove unused import

2022-05-05 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new bf447046327 [MINOR] Remove unused import
bf447046327 is described below

commit bf447046327b80f176fd638db418d0513b9c2516
Author: panbingkun 
AuthorDate: Thu May 5 19:25:32 2022 +0300

[MINOR] Remove unused import

### What changes were proposed in this pull request?
Remove unused import in `numerics`.

### Why are the changes needed?
Cleanup

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
N/A

Closes #36454 from panbingkun/minor.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index fea792f08d0..c3d893d82fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.types
 
 import scala.math.Numeric._
-import scala.math.Ordering
 
 import org.apache.spark.sql.catalyst.util.{MathUtils, SQLOrderingUtil}
 import org.apache.spark.sql.errors.QueryExecutionErrors


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-37938][SQL][TESTS] Use error classes in the parsing errors of partitions

2022-05-05 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 29ff671933e [SPARK-37938][SQL][TESTS] Use error classes in the parsing 
errors of partitions
29ff671933e is described below

commit 29ff671933e3b432e69a26761bc79856f21b82c7
Author: panbingkun 
AuthorDate: Thu May 5 19:22:28 2022 +0300

[SPARK-37938][SQL][TESTS] Use error classes in the parsing errors of 
partitions

## What changes were proposed in this pull request?
Migrate the following errors in QueryParsingErrors onto use error classes:

- emptyPartitionKeyError => INVALID_SQL_SYNTAX
- partitionTransformNotExpectedError => INVALID_SQL_SYNTAX
- descColumnForPartitionUnsupportedError => 
UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION
- incompletePartitionSpecificationError => INVALID_SQL_SYNTAX

### Why are the changes needed?
Porting parsing errors of partitions to new error framework, improve test 
coverage, and document expected error messages in tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryParsingErrorsSuite*"
```

Closes #36416 from panbingkun/SPARK-37938.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   |  3 ++
 .../spark/sql/errors/QueryParsingErrors.scala  | 22 ++--
 .../spark/sql/catalyst/parser/DDLParserSuite.scala |  2 +-
 .../resources/sql-tests/results/describe.sql.out   |  2 +-
 .../spark/sql/errors/QueryErrorsSuiteBase.scala| 16 --
 .../spark/sql/errors/QueryParsingErrorsSuite.scala | 60 ++
 .../command/ShowPartitionsParserSuite.scala| 22 +---
 .../command/TruncateTableParserSuite.scala | 21 +---
 8 files changed, 125 insertions(+), 23 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 24b50c4209a..3a7bc757f73 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -206,6 +206,9 @@
   "AES_MODE" : {
 "message" : [ "AES- with the padding  by the 
 function." ]
   },
+  "DESC_TABLE_COLUMN_PARTITION" : {
+"message" : [ "DESC TABLE COLUMN for a specific partition." ]
+  },
   "DISTRIBUTE_BY" : {
 "message" : [ "DISTRIBUTE BY clause." ]
   },
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index ed5773f4f82..1d15557c9d0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -77,7 +77,11 @@ object QueryParsingErrors extends QueryErrorsBase {
   }
 
   def emptyPartitionKeyError(key: String, ctx: PartitionSpecContext): 
Throwable = {
-new ParseException(s"Found an empty partition key '$key'.", ctx)
+new ParseException(
+  errorClass = "INVALID_SQL_SYNTAX",
+  messageParameters =
+Array(s"Partition key ${toSQLId(key)} must set value (can't be 
empty)."),
+  ctx)
   }
 
   def combinationQueryResultClausesUnsupportedError(ctx: 
QueryOrganizationContext): Throwable = {
@@ -243,7 +247,11 @@ object QueryParsingErrors extends QueryErrorsBase {
 
   def partitionTransformNotExpectedError(
   name: String, describe: String, ctx: ApplyTransformContext): Throwable = 
{
-new ParseException(s"Expected a column reference for transform $name: 
$describe", ctx)
+new ParseException(
+  errorClass = "INVALID_SQL_SYNTAX",
+  messageParameters =
+Array(s"Expected a column reference for transform ${toSQLId(name)}: 
$describe"),
+  ctx)
   }
 
   def tooManyArgumentsForTransformError(name: String, ctx: 
ApplyTransformContext): Throwable = {
@@ -298,12 +306,18 @@ object QueryParsingErrors extends QueryErrorsBase {
   }
 
   def descColumnForPartitionUnsupportedError(ctx: DescribeRelationContext): 
Throwable = {
-new ParseException("DESC TABLE COLUMN for a specific partition is not 
supported", ctx)
+new ParseException(
+  errorClass = "UNSUPPORTED_FEATURE",
+  messageParameters = Array("DESC_TABLE_COLUMN_PARTITION"),
+  ctx)
   }
 
   def incompletePartitionSpecificationError(
   key: String, ctx: DescribeRelationContext): Throwable = {
-new ParseException(s"PARTITION specifi

svn commit: r54275 - in /dev/spark/v3.3.0-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-05-05 Thread maxgekk

Author: maxgekk
Date: Thu May  5 08:51:39 2022
New Revision: 54275

Log:
Apache Spark v3.3.0-rc1 docs


[This commit notification would consist of 2649 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54273 - /dev/spark/v3.3.0-rc1-bin/

2022-05-05 Thread maxgekk

Author: maxgekk
Date: Thu May  5 08:17:05 2022
New Revision: 54273

Log:
Apache Spark v3.3.0-rc1

Added:
dev/spark/v3.3.0-rc1-bin/
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc Thu May  5 08:17:05 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJzh6QTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q07HcEACkCSXRG7LXd0+/jBU49syIUIpOsUrN
+bgbq90ifbo6eCidbhj4wJl5OZO7tKCsV2IrbQYRHVP0Lq7GTCw1Fg4/mY4QiLkhi
+RWDizZrKrr9CbHXVFo7ZTlIiaxjnTOcIxauKRtu6rbIJdfIzZyRZwhAYerdK6WOx
+atrcWfrY/MhKW/v6/25b8R4SWpLssNXaGj5RRqhs/cn/Kjwus8WkBDzQIibcE2ac
+TJA+agMH2fkyC1sUaZOVEo1E68nUBV/vv5GyEtctjnESGDsh90/d+6X8L2cmME9H
+YGUO91cT1byN3LCR0FDqMSTea8yh3HsdTQ4Ly+s1Ia7h5UCwnDlpFXTyHsHX9sv7
+osXKz4b1ejogjxHlCiPpFgZ+P3gNa31mpJWmOwMLE49Cgxcn7DdZUXTZaAwZmwhH
+YURgYtpqrG+4oKpAOLGR+wx+2ZGv0a0QeLd4iTUEhxhiPFRw9QkNG5VUmHgz237b
+ZJzz9Ef0wLbaS5F6ZySk0FBqHTPgCsPZS3ZtmdU76zg37mNPej2xotLrLon2TXhN
+TJkcLI8azbRoqcrNSOWKjBWYbLJ3nG4bDNqEkqdi/QApiisnneuXX89w152SI8vF
+/GoyJK0xs6rjCsUURXWUZ/kzeVQHxtXfBNLk967+TSOHVDaKFehhS0hJbRNUP0jp
+O+gTjMZQfQh+Uw==
+=saiU
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 Thu May  5 08:17:05 2022
@@ -0,0 +1,3 @@
+SparkR_3.3.0.tar.gz: 98A2665A 04513C1A BE26952E 7396E3B7 AF63715B B6CCFAF3
+ CD8C04EC A9F2374F F9E159D3 635CA631 22E4DCEE 1F6B6FE9
+ F91F2E18 C9518AAF 713DC95A 3D39D496

Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc Thu May  5 08:17:05 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJzh6YTHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0+4LD/wMGUzSXVcBCbUsVYtEtmoWjqBDZks7
+wN0SrnaI4UNXKlV0/rRbSMGRnVuqdwAlwJsb2RYNS56wswgTz9bhUB9cUUiSWftp
+Pf5XE9LqarekEF48kSYv6XOGCoXIA4wa9BdfzBF8Q43kCI4WTRibv9xaMv+F60or
+0xwgLl+8666M0L+Jg2tzrdI+cnkf42j07pL1HfqCsoZJSjxFmgSexXigZj+oSw+p
+4bTTofAWUfj+jILpPw8s7Vnf0Gvi7YEGpfchUv9oB8N1LzKLyS1HYNLGSAqbE1vm
+CvG9X8IzWQr4wIVqWSMWnsfImJL7EcA+G1SrUZP//d5UitvbF3ZZ5tMUvPYqgfKz
+S7kwyxuI1/uQ6CpJ5vxdrQQfRauYA4oWws4jWf2O6xOF5VIB1F0aF0//SLdauR+r
+GX4aYzQF+2DG6pIGJWYfrE9I4U4/LQLbdVVawItNnMKjphxD3Vi1kn9ITzJAtpLE
+75T9wPvlqSY7bLQlpBLd2+mModF2K+Gonr8Z06Xe0kr/R+tyrjrP5Oa++egLcaFo
+ZCr+L6WvkW8XnCfzU7T7d7wNKlskw7sh9BqOluMr+YW9rL+CKEYiM4JZrlUZCT3R
+rcLnVX47qigSw+WETHtMLA/TWYS6FQpKqs49cYbWAAT2K6mvmPiM1MupZSo6HgS+
+/KROoSIKLGVTRA

[spark] branch branch-3.3 updated (94d3d6b5fce -> 1fa3171f387)

2022-05-04 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 94d3d6b5fce [SPARK-38891][SQL] Skipping allocating vector for 
repetition & definition levels when possible
 add 1fa3171f387 [SPARK-39060][SQL][3.3] Typo in error messages of decimal 
overflow

No new revisions were added by this update.

Summary of changes:
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala   | 4 ++--
 sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out   | 2 +-
 .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 
 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54271 - in /dev/spark: v3.3.0-rc1-bin/ v3.3.0-rc1-docs/

2022-05-04 Thread maxgekk

Author: maxgekk
Date: Thu May  5 05:58:09 2022
New Revision: 54271

Log:
Remove v3.3.0-rc1

Removed:
dev/spark/v3.3.0-rc1-bin/
dev/spark/v3.3.0-rc1-docs/


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54255 - in /dev/spark/v3.3.0-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/R/articles/ _site/api/R/deps/ _site/api/R/deps/bootstrap-5.1.0/ _site/api/R/deps/jquery-3.6.0/ _site/api

2022-05-04 Thread maxgekk

Author: maxgekk
Date: Wed May  4 19:42:41 2022
New Revision: 54255

Log:
Apache Spark v3.3.0-rc1 docs


[This commit notification would consist of 2661 parts, 
which exceeds the limit of 50 ones, so it was shortened to the summary.]

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38744][SQL][TESTS] Test the error class: NON_LITERAL_PIVOT_VALUES

2022-05-04 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 8f0aca27916 [SPARK-38744][SQL][TESTS] Test the error class: 
NON_LITERAL_PIVOT_VALUES
8f0aca27916 is described below

commit 8f0aca279168fba23695a4919a01b79dc776f21d
Author: panbingkun 
AuthorDate: Wed May 4 21:49:44 2022 +0300

[SPARK-38744][SQL][TESTS] Test the error class: NON_LITERAL_PIVOT_VALUES

## What changes were proposed in this pull request?
This PR aims to add a test for the error class NON_LITERAL_PIVOT_VALUES to 
`QueryCompilationErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36431 from panbingkun/SPARK-38744.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../sql/errors/QueryCompilationErrorsSuite.scala | 20 
 1 file changed, 20 insertions(+)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 252c7298cb5..40b18ad3cc7 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -525,6 +525,26 @@ class QueryCompilationErrorsSuite
 msg = "Field name m.n is invalid: m is not a struct.; line 1 pos 27")
 }
   }
+
+  test("NON_LITERAL_PIVOT_VALUES: literal expressions required for pivot 
values") {
+val df = Seq(
+  ("dotNET", 2012, 1),
+  ("Java", 2012, 2),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 3)
+).toDF("course", "year", "earnings")
+
+checkErrorClass(
+  exception = intercept[AnalysisException] {
+df.groupBy(df("course")).
+  pivot(df("year"), Seq($"earnings")).
+  agg(sum($"earnings")).collect()
+  },
+  errorClass = "NON_LITERAL_PIVOT_VALUES",
+  msg = "Literal expressions required for pivot values, found 
'earnings#\\w+'",
+  matchMsg = true)
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38733][SQL][TESTS] Test the error class: INCOMPATIBLE_DATASOURCE_REGISTER

2022-05-04 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 834841ef5da [SPARK-38733][SQL][TESTS] Test the error class: 
INCOMPATIBLE_DATASOURCE_REGISTER
834841ef5da is described below

commit 834841ef5dab150f249d4171fddb474251beecac
Author: panbingkun 
AuthorDate: Wed May 4 14:59:12 2022 +0300

[SPARK-38733][SQL][TESTS] Test the error class: 
INCOMPATIBLE_DATASOURCE_REGISTER

## What changes were proposed in this pull request?
This PR aims to add a test for the error class 
INCOMPATIBLE_DATASOURCE_REGISTER to `QueryExecutionErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*"
```

Closes #36429 from panbingkun/SPARK-38733.

Lead-authored-by: panbingkun 
Co-authored-by: Maxim Gekk 
Signed-off-by: Max Gekk 
---
 .../sql/errors/QueryExecutionErrorsSuite.scala | 33 --
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index baa731571f7..7a5592c148a 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.errors
 
 import java.io.IOException
-import java.util.Locale
+import java.net.URL
+import java.util.{Locale, ServiceConfigurationError}
 
 import org.apache.hadoop.fs.{LocalFileSystem, Path}
 import org.apache.hadoop.fs.permission.FsPermission
 import test.org.apache.spark.sql.connector.JavaSimpleWritableDataSource
 
-import org.apache.spark.{SparkArithmeticException, SparkException, 
SparkIllegalArgumentException, SparkIllegalStateException, 
SparkRuntimeException, SparkSecurityException, 
SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, 
SparkClassNotFoundException, SparkException, SparkIllegalArgumentException, 
SparkIllegalStateException, SparkRuntimeException, SparkSecurityException, 
SparkUnsupportedOperationException, SparkUpgradeException}
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode}
 import org.apache.spark.sql.catalyst.util.BadRecordException
 import org.apache.spark.sql.connector.SimpleWritableDataSource
@@ -485,6 +486,34 @@ class QueryExecutionErrorsSuite
   }
 }
   }
+
+  test("INCOMPATIBLE_DATASOURCE_REGISTER: create table using an incompatible 
data source") {
+val newClassLoader = new ClassLoader() {
+
+  override def getResources(name: String): java.util.Enumeration[URL] = {
+if 
(name.equals("META-INF/services/org.apache.spark.sql.sources.DataSourceRegister"))
 {
+  // scalastyle:off
+  throw new ServiceConfigurationError(s"Illegal configuration-file 
syntax: $name",
+new 
NoClassDefFoundError("org.apache.spark.sql.sources.HadoopFsRelationProvider"))
+  // scalastyle:on throwerror
+} else {
+  super.getResources(name)
+}
+  }
+}
+
+Utils.withContextClassLoader(newClassLoader) {
+  val e = intercept[SparkClassNotFoundException] {
+sql("CREATE TABLE student (id INT, name STRING, age INT) USING 
org.apache.spark.sql.fake")
+  }
+  checkErrorClass(
+exception = e,
+errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+msg = "Detected an incompatible DataSourceRegister. Please remove the 
incompatible library " +
+  "from classpath or upgrade it. Error: Illegal configuration-file 
syntax: " +
+  "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")
+}
+  }
 }
 
 class FakeFileSystemSetPermission extends LocalFileSystem {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

svn commit: r54250 - /dev/spark/v3.3.0-rc1-bin/

2022-05-04 Thread maxgekk

Author: maxgekk
Date: Wed May  4 08:35:46 2022
New Revision: 54250

Log:
Apache Spark v3.3.0-rc1

Added:
dev/spark/v3.3.0-rc1-bin/
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz   (with props)
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc
dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop2.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz   (with 
props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3-scala2.13.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-hadoop3.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0-bin-without-hadoop.tgz.sha512
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz   (with props)
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.asc
dev/spark/v3.3.0-rc1-bin/spark-3.3.0.tgz.sha512

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.asc Wed May  4 08:35:46 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJyOn4THG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q06HUD/9qNA8U+46FRVU0vZYPE1tA1ydBDQLf
+LR2JWFa5/JpV2P8cu1iJFojnbLM1nUk5giDrJaNUeRKG7x5OUMyP3bDpoZQOn14V
+U/hP3Lom5Ms/GXFw1h8lQv5Ijaq3/GZXaAQi1Ha1aafb9HTPPBkjy4YMLMHmGaIm
+7N3q6eqGxQcVekopxZ73LmXwhWyRK2PEGgrlqbmvbs3CB+VEa/9qm0q8gRplKFQE
+cAiCAu80BueS/pn90Tv77QmQDyXBTnlDG0hlrxkTLa4MIkmkbwaEwpRJSKlWIgRo
+0emTcHMdwVa1kRICZk5gJ+ceGc/X0pChacv8aY/rP79rc4SprDP+iiQGlL6G38e1
+67h4KgpQFCzKUIidRCXoewbdIrX5VEf2Np+7XddYuMjA8SJML/Cg/2eKU24A+o9b
+kOs3Xo/RRrpnGVtcLvWDUCfevATzwN23gmbEtC5L872IhLwd9lbIvaAbqI7LvjMf
+VCT0+5rDztAYCjTviMQBPsqg7DjVrjBAUmci5zjXPIiXHCGyTRZDB4tTsm2a3A6R
+iIsffD4HvzFJpsqOT1cs0/NrF29mBlc3IDEWWQYZ0Ig+zZo0dJ4ktxwKNF3yJCcz
+kLBayDuc0pvzz2VmPs2WB6mwWvtlLxJJmc4nBlJhciOR8pghEnpKuvRqth9H+hWM
+Cx9xgDFDm3EK2g==
+=QBEZ
+-END PGP SIGNATURE-

Added: dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512
==
--- dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 (added)
+++ dev/spark/v3.3.0-rc1-bin/SparkR_3.3.0.tar.gz.sha512 Wed May  4 08:35:46 2022
@@ -0,0 +1,3 @@
+SparkR_3.3.0.tar.gz: B1BC497B 9C52A984 346E2BBD 1BD74227 E5DA7830 3735D2FF
+ 99AE8E55 4543DCB9 F293847F B4781ACB B88369D8 27AF5DD9
+ E0DDFEB1 0B9F7B26 21AC2569 3E3E26F2

Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz
==
Binary file - no diff available.

Propchange: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz
--
svn:mime-type = application/octet-stream

Added: dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc
==
--- dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc (added)
+++ dev/spark/v3.3.0-rc1-bin/pyspark-3.3.0.tar.gz.asc Wed May  4 08:35:46 2022
@@ -0,0 +1,17 @@
+-BEGIN PGP SIGNATURE-
+
+iQJHBAABCgAxFiEEgPuOvo66aFBJiXA0kbXcgV2/ENMFAmJyOoATHG1heGdla2tA
+YXBhY2hlLm9yZwAKCRCRtdyBXb8Q0wkXD/sGsAKaIECEZja5U2CfmVNHcmT0jQnK
+y/oAf+8rtT6vLXZ2EVuHFAdSPa4Rzq0gBnXcQk9gZcDXQhH/Hfu0CkTLZdgs0YUY
+jZqKks6oBlNObjEh9G8BNyeRB/Q1efRVupBW1koyex43XHNBPTCwinqR/t6WY+C6
+zdUQ935yhvuPbk3qHAbuLwy/qE5xXNr7YskthIjNoh3md9viLDEeqE4SabPY+o6Z
+WoMliOBQD2B1drIzmr97dqCcMh9mtDuy2dMLb5RFk/JD2XTAIk+w4FmFGdZ7CgIA
+/gwTEtdst3jqqtPJ5YI2FJAedfY7WIYlBlD1+lxUHxqVcwA6dmYpVQhqsR3DYUKw
+JMdd+SyG0IvyBWDe5wo+MG0dBodZVXnv6Ap9dFsF36BLnuN358S+EifbvakbuVD0
+trWLvjO/F1yGkYnTN49OCs8DXz23jPMSjDOZJLH8lvk7bl4lDRg7C8AmAfwLFocG
+wTeSS7DxZoEOuQ2WVuclsMm5SCZNrjV6e7AHkyo34I+8Oo7O/tVC7+l8Q+2Qi1Ky
+AEkcWsAiRvn3M83tmwP2qPp7FMixdEJqktr9GDhX1B75OIScu87pbmUw3t2fyYiY
+S8wuQ0DU9hLLGQaFf6XQuVEr+/0OmIBXPPZoHZTG9o/94Wb0tFyxDfvxJkWrLcOF
+l7/5dzJLvxrDkA

[spark] branch master updated: [SPARK-39060][SQL] Typo in error messages of decimal overflow

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 165ce4eb7d6 [SPARK-39060][SQL] Typo in error messages of decimal 
overflow
165ce4eb7d6 is described below

commit 165ce4eb7d6d75201beb1bff879efa99fde24f94
Author: Vitalii Li 
AuthorDate: Wed May 4 09:41:53 2022 +0300

[SPARK-39060][SQL] Typo in error messages of decimal overflow

### What changes were proposed in this pull request?

This PR removes extra curly bracket from debug string for Decimal type in 
SQL.

### Why are the changes needed?

Typo in error messages of decimal overflow.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

By running updated test:
```
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z 
decimalArithmeticOperations.sql"
```

Closes #36397 from vli-databricks/SPARK-39060.

Authored-by: Vitalii Li 
Signed-off-by: Max Gekk 
---
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala   | 4 ++--
 sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out   | 2 +-
 .../sql-tests/results/ansi/decimalArithmeticOperations.sql.out| 8 
 .../src/test/resources/sql-tests/results/ansi/interval.sql.out| 2 +-
 .../apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala   | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 12ce7a30601..1eeaa46736e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -227,9 +227,9 @@ final class Decimal extends Ordered[Decimal] with 
Serializable {
 
   def toDebugString: String = {
 if (decimalVal.ne(null)) {
-  s"Decimal(expanded,$decimalVal,$precision,$scale})"
+  s"Decimal(expanded, $decimalVal, $precision, $scale)"
 } else {
-  s"Decimal(compact,$longVal,$precision,$scale})"
+  s"Decimal(compact, $longVal, $precision, $scale)"
 }
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
index 566e27a0e20..476ec158f1f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
@@ -666,7 +666,7 @@ select cast('123.45' as decimal(4, 2))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded,123.45,5,2}) cannot be 
represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to 
false to bypass this error.
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 123.45, 5, 2) cannot be 
represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to 
false to bypass this error.
 == SQL(line 1, position 7) ==
 select cast('123.45' as decimal(4, 2))
^^^
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 1640875973e..d4b15d92952 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -76,7 +76,7 @@ select (5e36BD + 0.1) + 5e36BD
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-[CANNOT_CHANGE_DECIMAL_PRECISION] 
Decimal(expanded,10.1,39,1}) cannot be 
represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to 
false to bypass this error.
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
10.1, 39, 1) cannot be represented as 
Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to false to bypass 
this error.
 == SQL(line 1, position 7) ==
 select (5e36BD + 0.1) + 5e36BD
^^^
@@ -88,7 +88,7 @@ select (-4e36BD - 0.1) - 7e36BD
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-[CANNOT_CHANGE_DECIMAL_PRECISION] 
Decimal(expanded,-11.1,39,1}) cannot be 
represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to 
false to bypass this error.
+[CANNOT_CHANGE_DECIMAL_PRECISION] Decimal(expanded, 
-11.1, 39, 1) cannot be represented as 
Decimal(38, 1).

[spark] branch branch-3.3 updated: [SPARK-39087][SQL][3.3] Improve messages of error classes

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
 new d3aadb40370 [SPARK-39087][SQL][3.3] Improve messages of error classes
d3aadb40370 is described below

commit d3aadb40370c0613c2d2ce41d8b905f0fafcd69c
Author: Max Gekk 
AuthorDate: Wed May 4 08:45:03 2022 +0300

[SPARK-39087][SQL][3.3] Improve messages of error classes

### What changes were proposed in this pull request?
In the PR, I propose to modify error messages of the following error 
classes:
- INVALID_JSON_SCHEMA_MAP_TYPE
- INCOMPARABLE_PIVOT_COLUMN
- INVALID_ARRAY_INDEX_IN_ELEMENT_AT
- INVALID_ARRAY_INDEX
- DIVIDE_BY_ZERO

This is a backport of https://github.com/apache/spark/pull/36428.

### Why are the changes needed?
To improve readability of error messages.

### Does this PR introduce _any_ user-facing change?
Yes. It changes user-facing error messages.

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*"
$ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite"
$ build/sbt "test:testOnly *SparkThrowableSuite"
```

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
(cherry picked from commit 040526391a45ad610422a48c05aa69ba5133f922)
Signed-off-by: Max Gekk 

Closes #36439 from MaxGekk/error-class-improve-msg-3.3.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 12 -
 .../org/apache/spark/SparkThrowableSuite.scala |  2 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  |  6 ++---
 .../expressions/ArithmeticExpressionSuite.scala| 30 +++---
 .../expressions/CollectionExpressionsSuite.scala   |  4 +--
 .../catalyst/expressions/ComplexTypeSuite.scala|  4 +--
 .../expressions/IntervalExpressionsSuite.scala | 10 
 .../expressions/StringExpressionsSuite.scala   |  6 ++---
 .../sql/catalyst/util/IntervalUtilsSuite.scala |  2 +-
 .../resources/sql-tests/results/ansi/array.sql.out | 24 -
 .../sql-tests/results/ansi/interval.sql.out|  4 +--
 .../resources/sql-tests/results/interval.sql.out   |  4 +--
 .../test/resources/sql-tests/results/pivot.sql.out |  4 +--
 .../sql-tests/results/postgreSQL/case.sql.out  |  6 ++---
 .../sql-tests/results/postgreSQL/int8.sql.out  |  6 ++---
 .../results/postgreSQL/select_having.sql.out   |  2 +-
 .../results/udf/postgreSQL/udf-case.sql.out|  6 ++---
 .../udf/postgreSQL/udf-select_having.sql.out   |  2 +-
 .../sql-tests/results/udf/udf-pivot.sql.out|  4 +--
 .../apache/spark/sql/ColumnExpressionSuite.scala   | 12 -
 .../org/apache/spark/sql/DataFrameSuite.scala  |  2 +-
 .../apache/spark/sql/execution/SQLViewSuite.scala  |  4 +--
 .../sql/streaming/FileStreamSourceSuite.scala  |  2 +-
 23 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 463a5eae534..78934667ac0 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -37,7 +37,7 @@
 "sqlState" : "22008"
   },
   "DIVIDE_BY_ZERO" : {
-"message" : [ "divide by zero. To return NULL instead, use 'try_divide'. 
If necessary set  to false (except for ANSI interval type) to bypass 
this error." ],
+"message" : [ "Division by zero. To return NULL instead, use `try_divide`. 
If necessary set  to false (except for ANSI interval type) to bypass 
this error." ],
 "sqlState" : "22012"
   },
   "DUPLICATE_KEY" : {
@@ -72,7 +72,7 @@
 "message" : [ "Grouping sets size cannot be greater than " ]
   },
   "INCOMPARABLE_PIVOT_COLUMN" : {
-"message" : [ "Invalid pivot column ''. Pivot columns must be 
comparable." ],
+"message" : [ "Invalid pivot column . Pivot columns must be 
comparable." ],
 "sqlState" : "42000"
   },
   "INCOMPATIBLE_DATASOURCE_REGISTER" : {
@@ -89,10 +89,10 @@
 "message" : [ "" ]
   },
   "INVALID_ARRAY_INDEX" : {
-"message" : [ "Invalid index: , numElements: . If 
necessary set  to false to bypass this error." ]
+"message" : [ "The index  is out of bounds. The array has 
 elements. If necessary set  to false to bypass this error." 
]
   },
   "INVALID_ARRAY_

[spark] branch branch-3.3 updated (4177626e634 -> 0515536e6d1)

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from 4177626e634 [SPARK-35320][SQL][FOLLOWUP] Remove duplicated test
 add 482b7d54b52 Preparing Spark release v3.3.0-rc1
 new 0515536e6d1 Preparing development version 3.3.1-SNAPSHOT

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] 01/01: Preparing development version 3.3.1-SNAPSHOT

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 0515536e6d1b4819eeab59cecb9a045b1a0d3325
Author: Maxim Gekk 
AuthorDate: Tue May 3 18:15:51 2022 +

Preparing development version 3.3.1-SNAPSHOT
---
 R/pkg/DESCRIPTION  | 2 +-
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 6 +++---
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 38 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 9479bb3bf87..0e449e841cf 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.0
+Version: 3.3.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' 
<https://spark.apache.org>.
 Authors@R:
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 2e9c4d9960b..d12f2ad73fa 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 2a9acfa335e..842d63f5d38 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 7b17e625d75..f7d187bf952 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index c5c920e7747..53f38df8851 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 697b5a3928e..845f6659407 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index ad2db11370a..8e159089193 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0
+3.3.1-SNAPSHOT
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 1a7bdee70f3..1987c133285 100644
--- a

[spark] 01/01: Preparing Spark release v3.3.0-rc1

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to tag v3.3.0-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git

commit 482b7d54b522c4d1e25f3e84eabbc78126f22a3d
Author: Maxim Gekk 
AuthorDate: Tue May 3 18:15:45 2022 +

Preparing Spark release v3.3.0-rc1
---
 assembly/pom.xml   | 2 +-
 common/kvstore/pom.xml | 2 +-
 common/network-common/pom.xml  | 2 +-
 common/network-shuffle/pom.xml | 2 +-
 common/network-yarn/pom.xml| 2 +-
 common/sketch/pom.xml  | 2 +-
 common/tags/pom.xml| 2 +-
 common/unsafe/pom.xml  | 2 +-
 core/pom.xml   | 2 +-
 docs/_config.yml   | 4 ++--
 examples/pom.xml   | 2 +-
 external/avro/pom.xml  | 2 +-
 external/docker-integration-tests/pom.xml  | 2 +-
 external/kafka-0-10-assembly/pom.xml   | 2 +-
 external/kafka-0-10-sql/pom.xml| 2 +-
 external/kafka-0-10-token-provider/pom.xml | 2 +-
 external/kafka-0-10/pom.xml| 2 +-
 external/kinesis-asl-assembly/pom.xml  | 2 +-
 external/kinesis-asl/pom.xml   | 2 +-
 external/spark-ganglia-lgpl/pom.xml| 2 +-
 graphx/pom.xml | 2 +-
 hadoop-cloud/pom.xml   | 2 +-
 launcher/pom.xml   | 2 +-
 mllib-local/pom.xml| 2 +-
 mllib/pom.xml  | 2 +-
 pom.xml| 2 +-
 repl/pom.xml   | 2 +-
 resource-managers/kubernetes/core/pom.xml  | 2 +-
 resource-managers/kubernetes/integration-tests/pom.xml | 2 +-
 resource-managers/mesos/pom.xml| 2 +-
 resource-managers/yarn/pom.xml | 2 +-
 sql/catalyst/pom.xml   | 2 +-
 sql/core/pom.xml   | 2 +-
 sql/hive-thriftserver/pom.xml  | 2 +-
 sql/hive/pom.xml   | 2 +-
 streaming/pom.xml  | 2 +-
 tools/pom.xml  | 2 +-
 37 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/assembly/pom.xml b/assembly/pom.xml
index 0f88fe4feaf..2e9c4d9960b 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../pom.xml
   
 
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 15f7b8fa828..2a9acfa335e 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index d652b6d1c8d..7b17e625d75 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index db36da4799f..c5c920e7747 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index 9e0a202edd1..697b5a3928e 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index 068ef60b77f..ad2db11370a 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 5081579e38d..1a7bdee70f3 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 500f4083805..66dc93de059 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.12
-3.3.0-SNAPSHOT
+3.3.0
 ../../pom.xml
   
 
diff --git a

[spark] tag v3.3.0-rc1 created (now 482b7d54b52)

2022-05-03 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to tag v3.3.0-rc1
in repository https://gitbox.apache.org/repos/asf/spark.git


  at 482b7d54b52 (commit)
This tag includes the following new commits:

 new 482b7d54b52 Preparing Spark release v3.3.0-rc1

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39085][SQL] Move the error message of `INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json

2022-05-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new db7f346729d [SPARK-39085][SQL] Move the error message of 
`INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json
db7f346729d is described below

commit db7f346729d481f6ea6fcc88e381fda33de9b3f1
Author: Max Gekk 
AuthorDate: Tue May 3 08:28:27 2022 +0300

[SPARK-39085][SQL] Move the error message of 
`INCONSISTENT_BEHAVIOR_CROSS_VERSION` to error-classes.json

### What changes were proposed in this pull request?
In the PR, I propose to create two new sub-classes of the error class 
`INCONSISTENT_BEHAVIOR_CROSS_VERSION`:
- READ_ANCIENT_DATETIME
- WRITE_ANCIENT_DATETIME

and move their error messages from source code to the json file 
`error-classes.json`.

### Why are the changes needed?
1. To improve maintainability of error messages in the one place.
2. To follow the general rule that bodies of error messages should be 
placed to the json file, and only parameters are passed from source code.

### Does this PR introduce _any_ user-facing change?
No.

### How was this patch tested?
By running the modified test suite:
```
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*"
$ build/sbt "test:testOnly *SparkThrowableSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite"
$ build/sbt "test:testOnly *DateFormatterSuite"
$ build/sbt "test:testOnly *DateExpressionsSuite"
$ build/sbt "test:testOnly *TimestampFormatterSuite"
```

Closes #36426 from 
MaxGekk/error-subclass-INCONSISTENT_BEHAVIOR_CROSS_VERSION.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 19 +-
 .../scala/org/apache/spark/SparkException.scala|  7 ---
 .../spark/sql/errors/QueryExecutionErrors.scala| 67 ++
 .../resources/sql-tests/results/ansi/date.sql.out  |  9 ++-
 .../results/ansi/datetime-parsing-invalid.sql.out  | 24 +---
 .../sql-tests/results/ansi/timestamp.sql.out   | 18 --
 .../test/resources/sql-tests/results/date.sql.out  |  9 ++-
 .../results/datetime-formatting-invalid.sql.out| 66 ++---
 .../results/datetime-parsing-invalid.sql.out   | 24 +---
 .../sql-tests/results/json-functions.sql.out   |  6 +-
 .../resources/sql-tests/results/timestamp.sql.out  | 18 --
 .../results/timestampNTZ/timestamp-ansi.sql.out|  3 +-
 .../results/timestampNTZ/timestamp.sql.out |  3 +-
 .../native/stringCastAndExpressions.sql.out|  9 ++-
 .../sql/errors/QueryExecutionErrorsSuite.scala |  4 +-
 15 files changed, 177 insertions(+), 109 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index eacbeec570f..24b50c4209a 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -79,7 +79,24 @@
 "message" : [ "Detected an incompatible DataSourceRegister. Please remove 
the incompatible library from classpath or upgrade it. Error: " ]
   },
   "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : {
-"message" : [ "You may get a different result due to the upgrading to 
Spark >= : " ]
+"message" : [ "You may get a different result due to the upgrading to" ],
+"subClass" : {
+  "DATETIME_PATTERN_RECOGNITION" : {
+"message" : [ " Spark >= 3.0: \nFail to recognize  pattern in 
the DateTimeFormatter. 1) You can set  to 'LEGACY' to restore the 
behavior before Spark 3.0. 2) You can form a valid datetime pattern with the 
guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"; ]
+  },
+  "FORMAT_DATETIME_BY_NEW_PARSER" : {
+"message" : [ " Spark >= 3.0: \nFail to format it to  
in the new formatter. You can set\n to 'LEGACY' to restore the behavior 
before\nSpark 3.0, or set to 'CORRECTED' and treat it as an invalid datetime 
string.\n" ]
+  },
+  "PARSE_DATETIME_BY_NEW_PARSER" : {
+"message" : [ " Spark >= 3.0: \nFail to parse  in the new 
parser. You can set  to 'LEGACY' to restore the behavior before Spark 
3.0, or set to 'CORRECTED' and treat it as an invalid datetime string." ]
+  },
+  "READ_ANCIENT_DATETIME" : {
+"message" : [ " Spark >= 3.0: \nreading dates before 1582-10-15 or 
timestamps before 1900-01-01T00:00:00Z\nfrom  files can be ambiguous, 
as the

[spark] branch master updated: [SPARK-39087][SQL] Improve messages of error classes

2022-05-02 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 040526391a4 [SPARK-39087][SQL] Improve messages of error classes
040526391a4 is described below

commit 040526391a45ad610422a48c05aa69ba5133f922
Author: Max Gekk 
AuthorDate: Tue May 3 08:17:02 2022 +0300

[SPARK-39087][SQL] Improve messages of error classes

### What changes were proposed in this pull request?
In the PR, I propose to modify error messages of the following error 
classes:
- INVALID_JSON_SCHEMA_MAP_TYPE
- INCOMPARABLE_PIVOT_COLUMN
- INVALID_ARRAY_INDEX_IN_ELEMENT_AT
- INVALID_ARRAY_INDEX
- DIVIDE_BY_ZERO

### Why are the changes needed?
To improve readability of error messages.

### Does this PR introduce _any_ user-facing change?
Yes. It changes user-facing error messages.

### How was this patch tested?
By running the modified test suites:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
$ build/sbt "sql/testOnly *QueryExecutionErrorsSuite*"
$ build/sbt "sql/testOnly *QueryExecutionAnsiErrorsSuite"
$ build/sbt "test:testOnly *SparkThrowableSuite"
    ```

Closes #36428 from MaxGekk/error-class-improve-msg.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 12 -
 .../org/apache/spark/SparkThrowableSuite.scala |  4 +--
 .../spark/sql/errors/QueryCompilationErrors.scala  |  6 ++---
 .../expressions/ArithmeticExpressionSuite.scala| 30 +++---
 .../expressions/CollectionExpressionsSuite.scala   |  4 +--
 .../catalyst/expressions/ComplexTypeSuite.scala|  4 +--
 .../expressions/IntervalExpressionsSuite.scala | 10 
 .../expressions/StringExpressionsSuite.scala   |  6 ++---
 .../sql/catalyst/util/IntervalUtilsSuite.scala |  2 +-
 .../resources/sql-tests/results/ansi/array.sql.out | 24 -
 .../sql-tests/results/ansi/interval.sql.out|  4 +--
 .../resources/sql-tests/results/interval.sql.out   |  4 +--
 .../test/resources/sql-tests/results/pivot.sql.out |  4 +--
 .../sql-tests/results/postgreSQL/case.sql.out  |  6 ++---
 .../sql-tests/results/postgreSQL/int8.sql.out  |  6 ++---
 .../results/postgreSQL/select_having.sql.out   |  2 +-
 .../results/udf/postgreSQL/udf-case.sql.out|  6 ++---
 .../udf/postgreSQL/udf-select_having.sql.out   |  2 +-
 .../sql-tests/results/udf/udf-pivot.sql.out|  4 +--
 .../apache/spark/sql/ColumnExpressionSuite.scala   | 12 -
 .../org/apache/spark/sql/DataFrameSuite.scala  |  2 +-
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 10 +++-
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala |  8 +++---
 .../sql/errors/QueryExecutionErrorsSuite.scala | 25 +-
 .../apache/spark/sql/execution/SQLViewSuite.scala  |  4 +--
 .../sql/streaming/FileStreamSourceSuite.scala  |  2 +-
 26 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index aa38f8b9747..eacbeec570f 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -34,7 +34,7 @@
 "sqlState" : "22008"
   },
   "DIVIDE_BY_ZERO" : {
-"message" : [ "divide by zero. To return NULL instead, use 'try_divide'. 
If necessary set  to false (except for ANSI interval type) to bypass 
this error." ],
+"message" : [ "Division by zero. To return NULL instead, use `try_divide`. 
If necessary set  to false (except for ANSI interval type) to bypass 
this error." ],
 "sqlState" : "22012"
   },
   "DUPLICATE_KEY" : {
@@ -72,7 +72,7 @@
 "message" : [ "Grouping sets size cannot be greater than " ]
   },
   "INCOMPARABLE_PIVOT_COLUMN" : {
-"message" : [ "Invalid pivot column ''. Pivot columns must be 
comparable." ],
+"message" : [ "Invalid pivot column . Pivot columns must be 
comparable." ],
 "sqlState" : "42000"
   },
   "INCOMPATIBLE_DATASOURCE_REGISTER" : {
@@ -89,10 +89,10 @@
 "message" : [ "" ]
   },
   "INVALID_ARRAY_INDEX" : {
-"message" : [ "Invalid index: , numElements: . If 
necessary set  to false to bypass this error." ]
+"message" : [ "The index  is out of bounds. The array has 
 elements. If necessary set  to false to bypass this error." 
]
   },
   "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : {
-"mes

[spark] branch master updated (81786a2e960 -> 501519e5a52)

2022-05-01 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from 81786a2e960 [SPARK-38737][SQL][TESTS] Test the error classes: 
INVALID_FIELD_NAME
 add 501519e5a52 [SPARK-38729][SQL][TESTS] Test the error class: 
FAILED_SET_ORIGINAL_PERMISSION_BACK

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/errors/QueryCompilationErrors.scala  |  2 +-
 .../sql/errors/QueryExecutionErrorsSuite.scala | 34 +-
 2 files changed, 34 insertions(+), 2 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME

2022-05-01 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 81786a2e960 [SPARK-38737][SQL][TESTS] Test the error classes: 
INVALID_FIELD_NAME
81786a2e960 is described below

commit 81786a2e96018ded474b353c004ac2f63fde
Author: panbingkun 
AuthorDate: Sun May 1 11:35:09 2022 +0300

[SPARK-38737][SQL][TESTS] Test the error classes: INVALID_FIELD_NAME

## What changes were proposed in this pull request?
This PR aims to add a test for the error class INVALID_FIELD_NAME to 
`QueryCompilationErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36404 from panbingkun/SPARK-38737.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../spark/sql/errors/QueryCompilationErrorsSuite.scala | 14 ++
 1 file changed, 14 insertions(+)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 1115db07f21..8fffccbed40 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -513,6 +513,20 @@ class QueryCompilationErrorsSuite
   msg = "Invalid pivot value 'struct(col1, dotnet, col2, Experts)': value 
data type " +
 "struct does not match pivot column data type 
int")
   }
+
+  test("INVALID_FIELD_NAME: add a nested field for not struct parent") {
+withTable("t") {
+  sql("CREATE TABLE t(c struct, m string) USING parquet")
+
+  val e = intercept[AnalysisException] {
+sql("ALTER TABLE t ADD COLUMNS (m.n int)")
+  }
+  checkErrorClass(
+exception = e,
+errorClass = "INVALID_FIELD_NAME",
+msg = "Field name m.n is invalid: m is not a struct.; line 1 pos 27")
+}
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-38700][SQL] Use error classes in the execution errors of save mode

2022-05-01 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new b30d1d41414 [SPARK-38700][SQL] Use error classes in the execution 
errors of save mode
b30d1d41414 is described below

commit b30d1d41414e200f1cc7ec9675e5c013bdf5b214
Author: panbingkun 
AuthorDate: Sun May 1 10:34:31 2022 +0300

[SPARK-38700][SQL] Use error classes in the execution errors of save mode

### What changes were proposed in this pull request?
Migrate the following errors in QueryExecutionErrors:

* unsupportedSaveModeError -> UNSUPPORTED_SAVE_MODE

### Why are the changes needed?
Porting execution errors of unsupported saveMode to new error framework.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
Add new UT.

Closes #36350 from panbingkun/SPARK-38700.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 11 
 .../main/scala/org/apache/spark/ErrorInfo.scala|  6 ++---
 .../spark/sql/errors/QueryExecutionErrors.scala|  9 +--
 .../InsertIntoHadoopFsRelationCommand.scala|  2 +-
 .../sql/errors/QueryExecutionErrorsSuite.scala | 31 --
 5 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 4908a9b6c2e..aa38f8b9747 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -246,6 +246,17 @@
   "UNSUPPORTED_GROUPING_EXPRESSION" : {
 "message" : [ "grouping()/grouping_id() can only be used with 
GroupingSets/Cube/Rollup" ]
   },
+  "UNSUPPORTED_SAVE_MODE" : {
+"message" : [ "The save mode  is not supported for: " ],
+"subClass" : {
+  "EXISTENT_PATH" : {
+"message" : [ "an existent path." ]
+  },
+  "NON_EXISTENT_PATH" : {
+"message" : [ "a not existent path." ]
+  }
+}
+  },
   "UNTYPED_SCALA_UDF" : {
 "message" : [ "You're using untyped Scala UDF, which does not have the 
input type information. Spark may blindly pass null to the Scala closure with 
primitive-type argument, and the closure will see the default value of the Java 
type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result 
is 0 for null input. To get rid of this error, you could:\n1. use typed Scala 
UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`\n2. use Java 
UDF APIs, e.g. `udf(ne [...]
   },
diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala 
b/core/src/main/scala/org/apache/spark/ErrorInfo.scala
index a21f33e8833..0447572bb1c 100644
--- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/ErrorInfo.scala
@@ -80,9 +80,9 @@ private[spark] object SparkThrowableHelper {
   val errorSubInfo = subClass.getOrElse(subErrorClass,
 throw new IllegalArgumentException(s"Cannot find sub error class 
'$subErrorClass'"))
   val subMessageParameters = messageParameters.tail
-  "[" + errorClass + "." + subErrorClass + "] " + errorInfo.messageFormat +
-
String.format(errorSubInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
-  subMessageParameters: _*)
+  "[" + errorClass + "." + subErrorClass + "] " + 
String.format((errorInfo.messageFormat +
+errorSubInfo.messageFormat).replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
+subMessageParameters: _*)
 } else {
   "[" + errorClass + "] " + String.format(
 errorInfo.messageFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 225315d3f02..4b8d76e8e6f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -592,8 +592,13 @@ object QueryExecutionErrors extends QueryErrorsBase {
""".stripMargin)
   }
 
-  def unsupportedSaveModeError(saveMode: String, pathExists: Boolean): 
Throwable = {
-new IllegalStateException(s"unsupported save mode $saveMode ($pathExists)")
+  def saveModeUnsupportedError(saveMode: Any, pathExists: Boolean): Throwable 
= {
+pathEx

[spark] branch master updated: [SPARK-38748][SQL][TESTS] Test the error class: PIVOT_VALUE_DATA_TYPE_MISMATCH

2022-04-28 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 30a2d9bd3a0 [SPARK-38748][SQL][TESTS] Test the error class: 
PIVOT_VALUE_DATA_TYPE_MISMATCH
30a2d9bd3a0 is described below

commit 30a2d9bd3a0fbf19d6862f9a0904457fac16ff5d
Author: panbingkun 
AuthorDate: Fri Apr 29 09:29:35 2022 +0300

[SPARK-38748][SQL][TESTS] Test the error class: 
PIVOT_VALUE_DATA_TYPE_MISMATCH

## What changes were proposed in this pull request?
This PR aims to add a test for the error class 
PIVOT_VALUE_DATA_TYPE_MISMATCH to `QueryCompilationErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36400 from panbingkun/SPARK-38748.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 24 +-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index ec8edd2acd6..1115db07f21 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.errors
 import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, 
QueryTest, Row}
 import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test}
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.functions.{grouping, grouping_id, sum, udf}
+import org.apache.spark.sql.functions.{grouping, grouping_id, lit, struct, 
sum, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, MapType, StringType, 
StructField, StructType}
 
@@ -491,6 +491,28 @@ class QueryCompilationErrorsSuite
 msg = "Field name c.X is ambiguous and has 2 matching fields in the 
struct.; line 1 pos 0")
 }
   }
+
+  test("PIVOT_VALUE_DATA_TYPE_MISMATCH: can't cast pivot value data type 
(struct) " +
+"to pivot column data type (int)") {
+val df = Seq(
+  ("dotNET", 2012, 1),
+  ("Java", 2012, 2),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 3)
+).toDF("course", "year", "earnings")
+
+checkErrorClass(
+  exception = intercept[AnalysisException] {
+df.groupBy(df("course")).pivot(df("year"), Seq(
+  struct(lit("dotnet"), lit("Experts")),
+  struct(lit("java"), lit("Dummies".
+  agg(sum($"earnings")).collect()
+  },
+  errorClass = "PIVOT_VALUE_DATA_TYPE_MISMATCH",
+  msg = "Invalid pivot value 'struct(col1, dotnet, col2, Experts)': value 
data type " +
+"struct does not match pivot column data type 
int")
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to UNSUPPORTED_FEATURE

2022-04-28 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new af016d9eb87 [SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to 
UNSUPPORTED_FEATURE
af016d9eb87 is described below

commit af016d9eb87dcdd2423dc6eae691a52a5a23ae63
Author: Serge Rielau 
AuthorDate: Fri Apr 29 08:57:36 2022 +0300

[SPARK-39050][SQL] Error class: UNSUPPORTED_OPERATION to UNSUPPORTED_FEATURE

### What changes were proposed in this pull request?

UNSUPPORTED_OPERATION will be removed and replaced with the existing 
UNSUPPORTED_FEATURE.
This effects three errors:
ARROW TIMESTAMP, ORC TMESTAMP TO TIMESTAMP_NTZ and  ORC TMESTAMP_NTZ TO 
TIMESTAMP

### Why are the changes needed?

Clean up ERROR CLASSES before publishing them.

### Does this PR introduce _any_ user-facing change?

No, this is still internal, unreleased  code

### How was this patch tested?

Run existing QueryExecutionErrorsSuite

Closes #36385 from 
srielau/SPARK-39050-UNSUPPORTED_OPERATION-to-UNSUPPORTED_FEATURE.

Authored-by: Serge Rielau 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   |  6 ++---
 .../spark/sql/errors/QueryExecutionErrors.scala| 30 ++
 .../org/apache/spark/sql/util/ArrowUtils.scala |  2 +-
 .../apache/spark/sql/util/ArrowUtilsSuite.scala|  4 +--
 .../sql/errors/QueryExecutionErrorsSuite.scala | 23 +
 5 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 4738599685b..4908a9b6c2e 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -213,6 +213,9 @@
   "NATURAL_CROSS_JOIN" : {
 "message" : [ "NATURAL CROSS JOIN." ]
   },
+  "ORC_TYPE_CAST" : {
+"message" : [ "Unable to convert  of Orc to data type 
." ]
+  },
   "PANDAS_UDAF_IN_PIVOT" : {
 "message" : [ "Pandas user defined aggregate function in the PIVOT 
clause." ]
   },
@@ -243,9 +246,6 @@
   "UNSUPPORTED_GROUPING_EXPRESSION" : {
 "message" : [ "grouping()/grouping_id() can only be used with 
GroupingSets/Cube/Rollup" ]
   },
-  "UNSUPPORTED_OPERATION" : {
-"message" : [ "The operation is not supported: " ]
-  },
   "UNTYPED_SCALA_UDF" : {
 "message" : [ "You're using untyped Scala UDF, which does not have the 
input type information. Spark may blindly pass null to the Scala closure with 
primitive-type argument, and the closure will see the default value of the Java 
type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result 
is 0 for null input. To get rid of this error, you could:\n1. use typed Scala 
UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`\n2. use Java 
UDF APIs, e.g. `udf(ne [...]
   },
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index d5e42a1dde7..225315d3f02 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -68,6 +68,11 @@ import org.apache.spark.util.CircularBuffer
  */
 object QueryExecutionErrors extends QueryErrorsBase {
 
+  def internalMissingTimezoneIdError(): Throwable = {
+new SparkIllegalStateException(errorClass = "INTERNAL_ERROR",
+  messageParameters = Array("Missing timezoneId where it is mandatory."))
+  }
+
   def logicalHintOperatorNotRemovedDuringAnalysisError(): Throwable = {
 new SparkIllegalStateException(errorClass = "INTERNAL_ERROR",
   messageParameters = Array(
@@ -1614,15 +1619,6 @@ object QueryExecutionErrors extends QueryErrorsBase {
 new SparkException(s"Can not load in UserDefinedType ${name} for user 
class ${userClass}.")
   }
 
-  def timeZoneIdNotSpecifiedForTimestampTypeError(): Throwable = {
-new SparkUnsupportedOperationException(
-  errorClass = "UNSUPPORTED_OPERATION",
-  messageParameters = Array(
-s"${toSQLType(TimestampType)} must supply timeZoneId parameter " +
-  s"while converting to the arrow timestamp type.")
-)
-  }
-
   def notPublicClassError(name: String): Throwable = {
 new UnsupportedOperationException(
   s"$name is not a public class. Only public classes are supported.")
@@ -1936,18 +1932

[spark] branch master updated: [SPARK-38718][SQL][TESTS] Test the error class: AMBIGUOUS_FIELD_NAME

2022-04-28 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 581c801e02f [SPARK-38718][SQL][TESTS] Test the error class: 
AMBIGUOUS_FIELD_NAME
581c801e02f is described below

commit 581c801e02f97712545399f37ce6e7acac7af5b5
Author: panbingkun 
AuthorDate: Thu Apr 28 23:29:17 2022 +0300

[SPARK-38718][SQL][TESTS] Test the error class: AMBIGUOUS_FIELD_NAME

## What changes were proposed in this pull request?
This PR aims to add a test for the error class AMBIGUOUS_FIELD_NAME to 
`QueryCompilationErrorsSuite`.

### Why are the changes needed?
The changes improve test coverage, and document expected error messages in 
tests.

### Does this PR introduce any user-facing change?
No

### How was this patch tested?
By running new test:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36395 from panbingkun/SPARK-38718.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../spark/sql/errors/QueryCompilationErrorsSuite.scala| 15 +++
 1 file changed, 15 insertions(+)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 2d1e6f94925..ec8edd2acd6 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -476,6 +476,21 @@ class QueryCompilationErrorsSuite
   checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i 
FROM v)"), Row(1))
 }
   }
+
+  test("AMBIGUOUS_FIELD_NAME: alter column matching multi fields in the 
struct") {
+withTable("t") {
+  withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+sql("CREATE TABLE t(c struct) USING parquet")
+  }
+
+  checkErrorClass(
+exception = intercept[AnalysisException] {
+  sql("ALTER TABLE t CHANGE COLUMN c.X COMMENT 'new comment'")
+},
+errorClass = "AMBIGUOUS_FIELD_NAME",
+msg = "Field name c.X is ambiguous and has 2 matching fields in the 
struct.; line 1 pos 0")
+}
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (ec2bfa566ed -> ecade78526b)

2022-04-28 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from ec2bfa566ed [SPARK-39055][DOC] Fix documentation 404 page
 add ecade78526b [SPARK-38741][SQL][TESTS] Test the error class: 
MAP_KEY_DOES_NOT_EXIST

No new revisions were added by this update.

Summary of changes:
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala| 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39052][SQL] Support Literal.create(Char, StringType)

2022-04-27 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new abc2dc03fc8 [SPARK-39052][SQL] Support Literal.create(Char, StringType)
abc2dc03fc8 is described below

commit abc2dc03fc8f910ab95054205cdea4e3cb25801f
Author: Hyukjin Kwon 
AuthorDate: Thu Apr 28 07:53:50 2022 +0300

[SPARK-39052][SQL] Support Literal.create(Char, StringType)

### What changes were proposed in this pull request?

This is sort of a followup of 
https://github.com/apache/spark/commit/54fcaafb094e299f21c18370fddb4a727c88d875.
 `Literal.create` should also support `Char` too.

### Why are the changes needed?

To make the support of external type `Char` same as `Literla.apply`.

### Does this PR introduce _any_ user-facing change?

No, this isn't exposed to users. `Literal.create(Char, StringType)` isn't 
also used in the current codebase internally. This PR is just for completeness.

### How was this patch tested?

Unittests were added.

Closes #36389 from HyukjinKwon/SPARK-39052.

Authored-by: Hyukjin Kwon 
Signed-off-by: Max Gekk 
---
 .../scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala  | 1 +
 .../org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala   | 1 +
 .../spark/sql/catalyst/expressions/LiteralExpressionSuite.scala   | 4 
 3 files changed, 6 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index 3e6d31e79b7..263d3734217 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -499,6 +499,7 @@ object CatalystTypeConverters {
*/
   def convertToCatalyst(a: Any): Any = a match {
 case s: String => StringConverter.toCatalyst(s)
+case c: Char => StringConverter.toCatalyst(c.toString)
 case d: Date => DateConverter.toCatalyst(d)
 case ld: LocalDate => LocalDateConverter.toCatalyst(ld)
 case t: Timestamp => TimestampConverter.toCatalyst(t)
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index b559e219882..bf194a2288b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -152,6 +152,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite 
with SQLHelper {
 val converter = 
CatalystTypeConverters.createToCatalystConverter(StringType)
 val expected = UTF8String.fromString("X")
 assert(converter(chr) === expected)
+assert(CatalystTypeConverters.convertToCatalyst('a') === 
UTF8String.fromString("a"))
   }
 
   test("SPARK-33390: Make Literal support char array") {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 6ce51f1eec8..80e7a3206aa 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -247,6 +247,10 @@ class LiteralExpressionSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 // scalastyle:on
   }
 
+  test("SPARK-39052: Support Char in Literal.create") {
+checkEvaluation(Literal.create('a', StringType), "a")
+  }
+
   test("construct literals from java.time.LocalDate") {
 Seq(
   LocalDate.of(1, 1, 1),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39047][SQL] Replace the error class ILLEGAL_SUBSTRING by INVALID_PARAMETER_VALUE

2022-04-27 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 9dcc24c36f6 [SPARK-39047][SQL] Replace the error class 
ILLEGAL_SUBSTRING by INVALID_PARAMETER_VALUE
9dcc24c36f6 is described below

commit 9dcc24c36f6fcdf43bf66fe50415be575f7b2918
Author: Max Gekk 
AuthorDate: Thu Apr 28 07:46:44 2022 +0300

[SPARK-39047][SQL] Replace the error class ILLEGAL_SUBSTRING by 
INVALID_PARAMETER_VALUE

### What changes were proposed in this pull request?
In the PR, I propose to remove the `ILLEGAL_SUBSTRING` error class, and use 
`INVALID_PARAMETER_VALUE` in the case when the `strfmt` parameter of the 
`format_string()` function contains `%0$`. The last value is handled 
differently by JDKs:  _"... Java 8 and Java 11 uses it as "%1$", and Java 17 
throws IllegalFormatArgumentIndexException(Illegal format argument index = 0)"_.

### Why are the changes needed?
To improve code maintenance and user experience with Spark SQL by reducing 
the number of user-facing error classes.

### Does this PR introduce _any_ user-facing change?
Yes, it changes user-facing error message.

Before:
```sql
spark-sql> select format_string('%0$s', 'Hello');
Error in query: [ILLEGAL_SUBSTRING] The argument_index of string format 
cannot contain position 0$.; line 1 pos 7
```

After:
```sql
spark-sql> select format_string('%0$s', 'Hello');
Error in query: [INVALID_PARAMETER_VALUE] The value of parameter(s) 
'strfmt' in `format_string` is invalid: expects %1$, %2$ and so on, but got 
%0$.; line 1 pos 7
```

### How was this patch tested?
By running the affected test suites:
```
$ build/sbt "test:testOnly *SparkThrowableSuite"
$ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z 
text.sql"
    $ build/sbt "test:testOnly *QueryCompilationErrorsSuite"
```

Closes #36380 from MaxGekk/error-class-ILLEGAL_SUBSTRING.

Authored-by: Max Gekk 
Signed-off-by: Max Gekk 
---
 core/src/main/resources/error/error-classes.json   | 3 ---
 .../apache/spark/sql/catalyst/expressions/stringExpressions.scala  | 3 +--
 .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala | 7 ---
 .../src/test/resources/sql-tests/results/postgreSQL/text.sql.out   | 2 +-
 .../org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala  | 7 ---
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 673866e6c35..4738599685b 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -71,9 +71,6 @@
   "GROUPING_SIZE_LIMIT_EXCEEDED" : {
 "message" : [ "Grouping sets size cannot be greater than " ]
   },
-  "ILLEGAL_SUBSTRING" : {
-"message" : [ " cannot contain ." ]
-  },
   "INCOMPARABLE_PIVOT_COLUMN" : {
 "message" : [ "Invalid pivot column ''. Pivot columns must be 
comparable." ],
 "sqlState" : "42000"
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 976caeb3502..9089ff46637 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -1898,8 +1898,7 @@ case class FormatString(children: Expression*) extends 
Expression with ImplicitC
*/
   private def checkArgumentIndexNotZero(expression: Expression): Unit = 
expression match {
 case StringLiteral(pattern) if pattern.contains("%0$") =>
-  throw QueryCompilationErrors.illegalSubstringError(
-"The argument_index of string format", "position 0$")
+  throw QueryCompilationErrors.zeroArgumentIndexError()
 case _ => // do nothing
   }
 }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 7f212ed5891..3d379fb4f71 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -66,10 +66,11 @@ object QueryCompilationErrors extends QueryErrorsBase {
   messageParameters = Array(sizeLimit.toString))

[spark] branch branch-3.3 updated (b3ecff34ab6 -> b25276f4385)

2022-04-27 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


from b3ecff34ab6 [SPARK-34079][SQL][FOLLOW-UP] Revert some changes in 
InjectRuntimeFilterSuite
 add b25276f4385 [SPARK-39015][SQL][3.3] Remove the usage of toSQLValue(v) 
without an explicit type

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/expressions/Cast.scala  | 58 --
 .../expressions/complexTypeExtractors.scala|  5 +-
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 14 --
 .../spark/sql/catalyst/util/IntervalUtils.scala| 23 +
 .../apache/spark/sql/errors/QueryErrorsBase.scala  | 14 ++
 .../spark/sql/errors/QueryExecutionErrors.scala| 47 ++
 .../scala/org/apache/spark/sql/types/Decimal.scala | 21 +---
 .../org/apache/spark/sql/types/numerics.scala  | 13 +++--
 .../catalyst/expressions/AnsiCastSuiteBase.scala   |  3 +-
 .../test/resources/sql-tests/inputs/ansi/map.sql   |  1 +
 .../resources/sql-tests/results/ansi/map.sql.out   | 14 +-
 11 files changed, 125 insertions(+), 88 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (e49147af4a8 -> 4e84f339973)

2022-04-27 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from e49147af4a8 [SPARK-39015][SQL] Remove the usage of toSQLValue(v) 
without an explicit type
 add 4e84f339973 [SPARK-39027][SQL] Output SQL statements in error messages 
in upper case and w/o double quotes

No new revisions were added by this update.

Summary of changes:
 python/pyspark/sql/tests/test_udf.py |  2 +-
 .../apache/spark/sql/errors/QueryErrorsBase.scala|  3 +--
 .../ExtractPythonUDFFromJoinConditionSuite.scala |  2 +-
 .../resources/sql-tests/results/describe.sql.out |  4 ++--
 .../sql/errors/QueryCompilationErrorsSuite.scala |  6 +++---
 .../spark/sql/errors/QueryParsingErrorsSuite.scala   | 20 ++--
 .../spark/sql/execution/command/DDLParserSuite.scala |  4 ++--
 7 files changed, 20 insertions(+), 21 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated (d05e01d5402 -> e49147af4a8)

2022-04-27 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


from d05e01d5402 [SPARK-34079][SQL][FOLLOW-UP] Revert some changes in 
InjectRuntimeFilterSuite
 add e49147af4a8 [SPARK-39015][SQL] Remove the usage of toSQLValue(v) 
without an explicit type

No new revisions were added by this update.

Summary of changes:
 .../spark/sql/catalyst/expressions/Cast.scala  | 58 --
 .../expressions/complexTypeExtractors.scala|  5 +-
 .../spark/sql/catalyst/util/DateTimeUtils.scala| 14 --
 .../spark/sql/catalyst/util/IntervalUtils.scala| 23 +
 .../apache/spark/sql/errors/QueryErrorsBase.scala  | 14 ++
 .../spark/sql/errors/QueryExecutionErrors.scala| 47 ++
 .../scala/org/apache/spark/sql/types/Decimal.scala | 21 +---
 .../org/apache/spark/sql/types/numerics.scala  | 13 +++--
 .../catalyst/expressions/AnsiCastSuiteBase.scala   |  3 +-
 .../test/resources/sql-tests/inputs/ansi/map.sql   |  1 +
 .../resources/sql-tests/results/ansi/map.sql.out   | 14 +-
 .../sql/errors/QueryExecutionAnsiErrorsSuite.scala |  5 +-
 12 files changed, 128 insertions(+), 90 deletions(-)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-39028][SQL] Use SparkDateTimeException when casting to datetime types failed

2022-04-26 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new ead45889278 [SPARK-39028][SQL] Use SparkDateTimeException when casting 
to datetime types failed
ead45889278 is described below

commit ead45889278e8c5f71dc2ff2c7b020592e5e897f
Author: Gengliang Wang 
AuthorDate: Tue Apr 26 22:06:07 2022 +0300

[SPARK-39028][SQL] Use SparkDateTimeException when casting to datetime 
types failed

### What changes were proposed in this pull request?

Use SparkDateTimeException when casting to datetime types failed

### Why are the changes needed?

It is more reasonable to throw `SparkDateTimeException` instead of 
`java.time.DateTimeException`

### Does this PR introduce _any_ user-facing change?

Yes, a minor change for the exception type.

### How was this patch tested?

UT

Closes #36362 from gengliangwang/datetimeException.

Authored-by: Gengliang Wang 
Signed-off-by: Max Gekk 
---
 .../spark/sql/errors/QueryExecutionErrors.scala  |  5 ++---
 .../resources/sql-tests/results/ansi/cast.sql.out| 20 ++--
 .../resources/sql-tests/results/ansi/date.sql.out|  4 ++--
 .../results/ansi/datetime-parsing-invalid.sql.out|  8 
 .../sql-tests/results/ansi/interval.sql.out  | 16 
 .../results/postgreSQL/window_part3.sql.out  |  2 +-
 .../results/timestampNTZ/timestamp-ansi.sql.out  |  4 ++--
 7 files changed, 29 insertions(+), 30 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 59172682925..dd45f62ac09 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1019,9 +1019,8 @@ object QueryExecutionErrors extends QueryErrorsBase {
 } else {
   toSQLValue(value)
 }
-new DateTimeException(s"Invalid input syntax for type ${toSQLType(to)}: 
$valueString. " +
-  s"To return NULL instead, use 'try_cast'. If necessary set 
${SQLConf.ANSI_ENABLED.key} " +
-  s"to false to bypass this error." + errorContext)
+new SparkDateTimeException("INVALID_SYNTAX_FOR_CAST",
+  Array(toSQLType(to), valueString, SQLConf.ANSI_ENABLED.key, 
errorContext))
   }
 
   def registeringStreamingQueryListenerError(e: Exception): Throwable = {
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
index 96db4f2db42..566e27a0e20 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
@@ -697,8 +697,8 @@ select cast('a' as date)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid input syntax for type "DATE": 'a'. To return NULL instead, use 
'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this 
error.
+org.apache.spark.SparkDateTimeException
+[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "DATE": 'a'. To return 
NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to false 
to bypass this error.
 == SQL(line 1, position 7) ==
 select cast('a' as date)
^
@@ -717,8 +717,8 @@ select cast('a' as timestamp)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid input syntax for type "TIMESTAMP": 'a'. To return NULL instead, use 
'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this 
error.
+org.apache.spark.SparkDateTimeException
+[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "TIMESTAMP": 'a'. To 
return NULL instead, use 'try_cast'. If necessary set spark.sql.ansi.enabled to 
false to bypass this error.
 == SQL(line 1, position 7) ==
 select cast('a' as timestamp)
^^
@@ -737,8 +737,8 @@ select cast('a' as timestamp_ntz)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid input syntax for type "TIMESTAMP_NTZ": 'a'. To return NULL instead, 
use 'try_cast'. If necessary set spark.sql.ansi.enabled to false to bypass this 
error.
+org.apache.spark.SparkDateTimeException
+[INVALID_SYNTAX_FOR_CAST] Invalid input syntax for type "TIMESTAMP_NTZ": 'a'. 
To return NULL instead, use 'try_cast'. If necessary set spark.sql

[spark] branch master updated: [SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` from SQLQuerySuite to QueryCompilationErrorsSuite

2022-04-25 Thread maxgekk

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new da51dc7aa76 [SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` 
from SQLQuerySuite to QueryCompilationErrorsSuite
da51dc7aa76 is described below

commit da51dc7aa7674f158fb82f9f735af7d46f6a9399
Author: panbingkun 
AuthorDate: Mon Apr 25 21:53:17 2022 +0300

[SPARK-38742][SQL][TESTS] Move the tests `MISSING_COLUMN` from 
SQLQuerySuite to QueryCompilationErrorsSuite

### What changes were proposed in this pull request?
This pr aims to move tests for the error class MISSING_COLUMN from 
SQLQuerySuite to QueryCompilationErrorsSuite, it's a followup of SPARK-37935.

### Why are the changes needed?
To improve code maintenance.

### Does this PR introduce any user-facing change?
No.

### How was this patch tested?
By running the moved tests:
```
$ build/sbt "sql/testOnly *QueryCompilationErrorsSuite*"
```

Closes #36280 from panbingkun/SPARK-38742.

Authored-by: panbingkun 
Signed-off-by: Max Gekk 
---
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 38 -
 .../sql/errors/QueryCompilationErrorsSuite.scala   | 63 ++
 2 files changed, 63 insertions(+), 38 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 70b38db034f..4d384d3286b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1114,31 +1114,6 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
 )
   }
 
-  test("SPARK-17863: SELECT distinct does not work correctly if order by 
missing attribute") {
-checkAnswer(
-  sql("""select distinct struct.a, struct.b
-  |from (
-  |  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
-  |  union all
-  |  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
-  |order by a, b
-  |""".stripMargin),
-  Row(1, 2) :: Nil)
-
-val error = intercept[AnalysisException] {
-  sql("""select distinct struct.a, struct.b
-|from (
-|  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
-|  union all
-|  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
-|order by struct.a, struct.b
-|""".stripMargin)
-}
-assert(error.getErrorClass == "MISSING_COLUMN")
-assert(error.messageParameters.sameElements(Array("struct.a", "a, b")))
-
-  }
-
   test("cast boolean to string") {
 // TODO Ensure true/false string letter casing is consistent with Hive in 
all cases.
 checkAnswer(
@@ -2734,19 +2709,6 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
 }
   }
 
-  test("SPARK-21335: support un-aliased subquery") {
-withTempView("v") {
-  Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
-  checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1))
-
-  val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM 
v)"))
-  assert(e.getErrorClass == "MISSING_COLUMN")
-  assert(e.messageParameters.sameElements(Array("v.i", 
"__auto_generated_subquery_name.i")))
-
-  checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i 
FROM v)"), Row(1))
-}
-  }
-
   test("SPARK-21743: top-most limit should not cause memory leak") {
 // In unit test, Spark will fail the query if memory leak detected.
 spark.range(100).groupBy("id").count().limit(1).collect()
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 8b63ba52ab8..f1325a68366 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -409,6 +409,69 @@ class QueryCompilationErrorsSuite
 "can only contain StringType as a key type for a MapType."
 )
   }
+
+  test("MISSING_COLUMN: SELECT distinct does not work correctly " +
+"if order by missing attribute") {
+checkAnswer(
+  sql(
+&quo

< 4 5 6 7 8 9 10 11 12 >

801 - 900 of 1112 matches

Mail list logo