This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 49562f41678 [SPARK-39167][SQL] Throw an exception w/ an error class
for multiple rows from a subquery used as an expression
49562f41678 is described below
commit 49562f416788cab05b3f82a2471a1f2f6561a1d8
Author: panbingkun <[email protected]>
AuthorDate: Sat May 21 07:50:59 2022 +0300
[SPARK-39167][SQL] Throw an exception w/ an error class for multiple rows
from a subquery used as an expression
### What changes were proposed in this pull request?
In the PR, I propose to use the MULTI_VALUE_SUBQUERY_ERROR error classes
for multiple rows from a subquery used as an expression.
### Why are the changes needed?
Porting the executing errors for multiple rows from a subquery used as an
expression to the new error framework should improve user experience with Spark
SQL.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Added new test suite
Closes #36580 from panbingkun/SPARK-39167.
Authored-by: panbingkun <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
core/src/main/resources/error/error-classes.json | 3 +++
.../spark/sql/errors/QueryExecutionErrors.scala | 5 ++++
.../org/apache/spark/sql/execution/subquery.scala | 5 ++--
.../scala/org/apache/spark/sql/SubquerySuite.scala | 11 ---------
.../sql/errors/QueryExecutionErrorsSuite.scala | 27 ++++++++++++++++++++++
5 files changed, 37 insertions(+), 14 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json
b/core/src/main/resources/error/error-classes.json
index 1a139c018e8..f6fba105872 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -160,6 +160,9 @@
"MULTI_UDF_INTERFACE_ERROR" : {
"message" : [ "Not allowed to implement multiple UDF interfaces, UDF class
<class>" ]
},
+ "MULTI_VALUE_SUBQUERY_ERROR" : {
+ "message" : [ "more than one row returned by a subquery used as an
expression: <plan>" ]
+ },
"NON_LITERAL_PIVOT_VALUES" : {
"message" : [ "Literal expressions required for pivot values, found
'<expression>'" ],
"sqlState" : "42000"
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1e664100545..f79b30f0d0f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -2005,4 +2005,9 @@ object QueryExecutionErrors extends QueryErrorsBase {
new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters =
Array(path),
cause = null)
}
+
+ def multipleRowSubqueryError(plan: String): Throwable = {
+ new SparkException(
+ errorClass = "MULTI_VALUE_SUBQUERY_ERROR", messageParameters =
Array(plan), cause = null)
+ }
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index 209b0f79243..c6f5983f243 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -25,6 +25,7 @@ import
org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{BooleanType, DataType}
@@ -79,9 +80,7 @@ case class ScalarSubquery(
def updateResult(): Unit = {
val rows = plan.executeCollect()
if (rows.length > 1) {
- // TODO(SPARK-39167): Throw an exception w/ an error class for multiple
rows from a subquery
- throw new IllegalStateException(
- s"more than one row returned by a subquery used as an
expression:\n$plan")
+ throw QueryExecutionErrors.multipleRowSubqueryError(plan.toString)
}
if (rows.length == 1) {
assert(rows(0).numFields == 1,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 396fca47634..500913fb289 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql
import scala.collection.mutable.ArrayBuffer
-import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Sort}
import org.apache.spark.sql.execution.{ColumnarToRowExec,
ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec,
ScalarSubquery, SubqueryExec, WholeStageCodegenExec}
@@ -146,16 +145,6 @@ class SubquerySuite extends QueryTest with
SharedSparkSession with AdaptiveSpark
)
}
- test("runtime error when the number of rows is greater than 1") {
- val e = intercept[SparkException] {
- sql("select (select a from (select 1 as a union all select 2 as a) t) as
b").collect()
- }
- // TODO(SPARK-39167): Throw an exception w/ an error class for multiple
rows from a subquery
- assert(e.getErrorClass === "INTERNAL_ERROR")
- assert(e.getCause.getMessage.contains(
- "more than one row returned by a subquery used as an expression"))
- }
-
test("uncorrelated scalar subquery on a DataFrame generated query") {
withTempView("subqueryData") {
val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value")
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index bbf6c0dda79..e8d1afddde2 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -611,6 +611,33 @@ class QueryExecutionErrorsSuite
matchMsg = true)
}
}
+
+ test("MULTI_VALUE_SUBQUERY_ERROR: " +
+ "more than one row returned by a subquery used as an expression") {
+ checkErrorClass(
+ exception = intercept[SparkException] {
+ sql("select (select a from (select 1 as a union all select 2 as a) t)
as b").collect()
+ },
+ errorClass = "MULTI_VALUE_SUBQUERY_ERROR",
+ msg =
+ """more than one row returned by a subquery used as an expression: """
+
+ """Subquery subquery#\w+, \[id=#\w+\]
+ |\+\- AdaptiveSparkPlan isFinalPlan=true
+ | \+\- == Final Plan ==
+ | Union
+ | :\- \*\(1\) Project \[\w+ AS a#\w+\]
+ | : \+\- \*\(1\) Scan OneRowRelation\[\]
+ | \+\- \*\(2\) Project \[\w+ AS a#\w+\]
+ | \+\- \*\(2\) Scan OneRowRelation\[\]
+ | \+\- == Initial Plan ==
+ | Union
+ | :\- Project \[\w+ AS a#\w+\]
+ | : \+\- Scan OneRowRelation\[\]
+ | \+\- Project \[\w+ AS a#\w+\]
+ | \+\- Scan OneRowRelation\[\]
+ |""".stripMargin,
+ matchMsg = true)
+ }
}
class FakeFileSystemSetPermission extends LocalFileSystem {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]