This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8b967e191b7 [SPARK-43254][SQL] Assign a name to the error
_LEGACY_ERROR_TEMP_2018
8b967e191b7 is described below
commit 8b967e191b755d7f2830c15d382c83ce7aeb69c1
Author: dengziming <[email protected]>
AuthorDate: Thu Sep 21 10:22:37 2023 +0300
[SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018
### What changes were proposed in this pull request?
Assign the name `CLASS_UNSUPPORTED_BY_MAP_OBJECTS` to the legacy error
class `_LEGACY_ERROR_TEMP_2018`.
### Why are the changes needed?
To assign proper name as a part of activity in SPARK-37935
### Does this PR introduce _any_ user-facing change?
Yes, the error message will include the error class name
### How was this patch tested?
Add a unit test to produce the error from user code.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #42939 from dengziming/SPARK-43254.
Authored-by: dengziming <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../src/main/resources/error/error-classes.json | 10 +++---
docs/sql-error-conditions.md | 6 ++++
.../sql/catalyst/encoders/ExpressionEncoder.scala | 2 +-
.../spark/sql/errors/QueryExecutionErrors.scala | 2 +-
.../expressions/ObjectExpressionsSuite.scala | 11 +++---
.../scala/org/apache/spark/sql/DatasetSuite.scala | 40 ++++++++++++++++++++--
6 files changed, 57 insertions(+), 14 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index d92ccfce5c5..8942d3755e9 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -344,6 +344,11 @@
],
"sqlState" : "22003"
},
+ "CLASS_UNSUPPORTED_BY_MAP_OBJECTS" : {
+ "message" : [
+ "`MapObjects` does not support the class <cls> as resulting collection."
+ ]
+ },
"CODEC_NOT_AVAILABLE" : {
"message" : [
"The codec <codecName> is not available. Consider to set the config
<configKey> to <configVal>."
@@ -4944,11 +4949,6 @@
"not resolved."
]
},
- "_LEGACY_ERROR_TEMP_2018" : {
- "message" : [
- "class `<cls>` is not supported by `MapObjects` as resulting collection."
- ]
- },
"_LEGACY_ERROR_TEMP_2020" : {
"message" : [
"Couldn't find a valid constructor on <cls>."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 1df00f72bc9..f6f94efc2b0 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -297,6 +297,12 @@ The value `<value>` of the type `<sourceType>` cannot be
cast to `<targetType>`
Fail to assign a value of `<sourceType>` type to the `<targetType>` type
column or variable `<columnName>` due to an overflow. Use `try_cast` on the
input value to tolerate overflow and return NULL instead.
+### CLASS_UNSUPPORTED_BY_MAP_OBJECTS
+
+SQLSTATE: none assigned
+
+`MapObjects` does not support the class `<cls>` as resulting collection.
+
### CODEC_NOT_AVAILABLE
SQLSTATE: none assigned
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index ff72b5a0d96..74d7a5e7a67 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -170,7 +170,7 @@ object ExpressionEncoder {
* Function that deserializes an [[InternalRow]] into an object of type `T`.
This class is not
* thread-safe.
*/
- class Deserializer[T](private val expressions: Seq[Expression])
+ class Deserializer[T](val expressions: Seq[Expression])
extends (InternalRow => T) with Serializable {
@transient
private[this] var constructProjection: Projection = _
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index e14fef1fad7..84472490128 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -422,7 +422,7 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase with ExecutionE
def classUnsupportedByMapObjectsError(cls: Class[_]): SparkRuntimeException
= {
new SparkRuntimeException(
- errorClass = "_LEGACY_ERROR_TEMP_2018",
+ errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
messageParameters = Map("cls" -> cls.getName))
}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 3a662e68d58..de85d6fe0b7 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -404,11 +404,12 @@ class ObjectExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
customCollectionClasses.foreach(testMapObjects(collection, _, inputType))
// Unsupported custom collection class
- val errMsg = intercept[RuntimeException] {
- testMapObjects(collection, classOf[scala.collection.Map[Int, Int]],
inputType)
- }.getMessage()
- assert(errMsg.contains("`scala.collection.Map` is not supported by
`MapObjects` " +
- "as resulting collection."))
+ checkError(
+ exception = intercept[SparkRuntimeException] {
+ testMapObjects(collection, classOf[scala.collection.Map[Int, Int]],
inputType)
+ },
+ errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+ parameters = Map("cls" -> "scala.collection.Map"))
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index e05b545f235..32469534978 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
import java.io.{Externalizable, ObjectInput, ObjectOutput}
import java.sql.{Date, Timestamp}
+import scala.reflect.ClassTag
import scala.util.Random
import org.apache.hadoop.fs.{Path, PathFilter}
@@ -32,8 +33,9 @@ import org.apache.spark.TestUtils.withListener
import org.apache.spark.internal.config.MAX_RESULT_SIZE
import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum,
ScroogeLikeExample}
-import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, OuterScopes}
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders,
ExpressionEncoder, OuterScopes}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode,
GenericRowWithSchema}
import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
import org.apache.spark.sql.catalyst.util.sideBySide
import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
@@ -2561,6 +2563,40 @@ class DatasetSuite extends QueryTest
checkDataset(ds.filter(f(col("_1"))), Tuple1(ValueClass(2)))
}
+
+ test("CLASS_UNSUPPORTED_BY_MAP_OBJECTS when creating dataset") {
+ withSQLConf(
+ // Set CODEGEN_FACTORY_MODE to default value to reproduce
CLASS_UNSUPPORTED_BY_MAP_OBJECTS
+ SQLConf.CODEGEN_FACTORY_MODE.key ->
CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+ // Create our own encoder to cover the default encoder from
spark.implicits._
+ implicit val im: ExpressionEncoder[Array[Int]] = ExpressionEncoder(
+ AgnosticEncoders.IterableEncoder(
+ ClassTag(classOf[Array[Int]]), BoxedIntEncoder, false, false))
+
+ val df = spark.createDataset(Seq(Array(1)))
+ val exception = intercept[org.apache.spark.SparkRuntimeException] {
+ df.collect()
+ }
+ val expressions = im.resolveAndBind(df.queryExecution.logical.output,
+ spark.sessionState.analyzer)
+ .createDeserializer().expressions
+
+ // Expression decoding error
+ checkError(
+ exception = exception,
+ errorClass = "_LEGACY_ERROR_TEMP_2151",
+ parameters = Map(
+ "e" -> exception.getCause.toString(),
+ "expressions" -> expressions.map(
+ _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n"))
+ )
+ // class unsupported by map objects
+ checkError(
+ exception =
exception.getCause.asInstanceOf[org.apache.spark.SparkRuntimeException],
+ errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+ parameters = Map("cls" -> classOf[Array[Int]].getName))
+ }
+ }
}
class DatasetLargeResultCollectingSuite extends QueryTest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]