This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 7e2642db062 [SPARK-42326][SQL] Integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE` 7e2642db062 is described below commit 7e2642db062cc45c44cbf549d6431bd72915fa17 Author: itholic <haejoon....@databricks.com> AuthorDate: Thu Feb 16 22:04:17 2023 +0300 [SPARK-42326][SQL] Integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE` ### What changes were proposed in this pull request? This PR proposes to integrate `_LEGACY_ERROR_TEMP_2099` into `UNSUPPORTED_DATATYPE`. And also introduce new error class `UNSUPPORTED_ARROWTYPE`. ### Why are the changes needed? We should assign proper name for LEGACY errors. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Updated UT. Closes #39979 from itholic/LEGACY_2099. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> (cherry picked from commit 9855b137032bf9504dff96eb5bb9951accacac0f) Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 11 ++++++----- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 13 ++++++++++--- .../main/scala/org/apache/spark/sql/util/ArrowUtils.scala | 4 ++-- .../scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala | 9 ++++++++- .../scala/org/apache/spark/sql/execution/Columnar.scala | 2 +- .../org/apache/spark/sql/execution/arrow/ArrowWriter.scala | 2 +- .../spark/sql/execution/arrow/ArrowConvertersSuite.scala | 10 ++++++---- 7 files changed, 34 insertions(+), 17 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 770223625cf..a0970550d72 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1567,6 +1567,12 @@ ], "sqlState" : "42703" }, + "UNSUPPORTED_ARROWTYPE" : { + "message" : [ + "Unsupported arrow type <typeName>." + ], + "sqlState" : "0A000" + }, "UNSUPPORTED_DATATYPE" : { "message" : [ "Unsupported data type <typeName>." @@ -4094,11 +4100,6 @@ "Could not compare cost with <cost>." ] }, - "_LEGACY_ERROR_TEMP_2099" : { - "message" : [ - "Unsupported data type: <dt>." - ] - }, "_LEGACY_ERROR_TEMP_2100" : { "message" : [ "not support type: <dataType>." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 2bafa2e2c03..17c5b2f4f10 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -25,6 +25,7 @@ import java.time.temporal.ChronoField import java.util.concurrent.TimeoutException import com.fasterxml.jackson.core.{JsonParser, JsonToken} +import org.apache.arrow.vector.types.pojo.ArrowType import org.apache.hadoop.fs.{FileAlreadyExistsException, FileStatus, Path} import org.apache.hadoop.fs.permission.FsPermission import org.codehaus.commons.compiler.{CompileException, InternalCompilerException} @@ -1124,10 +1125,16 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map("cost" -> cost)) } - def unsupportedDataTypeError(dt: String): SparkUnsupportedOperationException = { + def unsupportedArrowTypeError(typeName: ArrowType): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( - errorClass = "_LEGACY_ERROR_TEMP_2099", - messageParameters = Map("dt" -> dt)) + errorClass = "UNSUPPORTED_ARROWTYPE", + messageParameters = Map("typeName" -> typeName.toString)) + } + + def unsupportedDataTypeError(typeName: DataType): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "UNSUPPORTED_DATATYPE", + messageParameters = Map("typeName" -> toSQLType(typeName))) } def notSupportTypeError(dataType: DataType): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala index e854eba0383..6c6635bac57 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala @@ -56,7 +56,7 @@ private[sql] object ArrowUtils { case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH) case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND) case _ => - throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString) + throw QueryExecutionErrors.unsupportedDataTypeError(dt) } def fromArrowType(dt: ArrowType): DataType = dt match { @@ -79,7 +79,7 @@ private[sql] object ArrowUtils { case ArrowType.Null.INSTANCE => NullType case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => YearMonthIntervalType() case di: ArrowType.Duration if di.getUnit == TimeUnit.MICROSECOND => DayTimeIntervalType() - case _ => throw QueryExecutionErrors.unsupportedDataTypeError(dt.toString) + case _ => throw QueryExecutionErrors.unsupportedArrowTypeError(dt) } /** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala index 6dd02afe19b..2f78d03db80 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala @@ -21,7 +21,7 @@ import java.time.ZoneId import org.apache.arrow.vector.types.pojo.ArrowType -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException} import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.LA import org.apache.spark.sql.types._ @@ -54,6 +54,13 @@ class ArrowUtilsSuite extends SparkFunSuite { roundtrip(TimestampType) } assert(tsExMsg.getMessage.contains("timezoneId")) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + ArrowUtils.fromArrowType(new ArrowType.Int(8, false)) + }, + errorClass = "UNSUPPORTED_ARROWTYPE", + parameters = Map("typeName" -> "Int(8, false)") + ) } test("timestamp") { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala index 684a3f319ab..a051cc26a7d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala @@ -276,7 +276,7 @@ private object RowToColumnConverter { case dt: DecimalType => new DecimalConverter(dt) case mt: MapType => MapConverter(getConverterForType(mt.keyType, nullable = false), getConverterForType(mt.valueType, mt.valueContainsNull)) - case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown.toString) + case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown) } if (nullable) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala index 34e128a4925..af7126495c5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala @@ -78,7 +78,7 @@ object ArrowWriter { case (_: YearMonthIntervalType, vector: IntervalYearVector) => new IntervalYearWriter(vector) case (_: DayTimeIntervalType, vector: DurationVector) => new DurationWriter(vector) case (dt, _) => - throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString) + throw QueryExecutionErrors.unsupportedDataTypeError(dt) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index eb33e2e47ca..82e4c970837 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -28,7 +28,7 @@ import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot} import org.apache.arrow.vector.ipc.JsonFileReader import org.apache.arrow.vector.util.{ByteArrayReadableSeekableByteChannel, Validator} -import org.apache.spark.{SparkException, TaskContext} +import org.apache.spark.{SparkException, SparkUnsupportedOperationException, TaskContext} import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeProjection @@ -1269,9 +1269,11 @@ class ArrowConvertersSuite extends SharedSparkSession { val e = intercept[SparkException] { calendarIntervalData.toDF().toArrowBatchRdd.collect() } - - assert(e.getCause.isInstanceOf[UnsupportedOperationException]) - assert(e.getCause.getMessage.contains("Unsupported data type: interval")) + checkError( + exception = e.getCause.asInstanceOf[SparkUnsupportedOperationException], + errorClass = "UNSUPPORTED_DATATYPE", + parameters = Map("typeName" -> "\"INTERVAL\"") + ) } test("test Arrow Validator") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org