This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 1439d9b275e [SPARK-38734][SQL] Remove the error class
`INDEX_OUT_OF_BOUNDS`
1439d9b275e is described below
commit 1439d9b275e844b5b595126bc97d2b44f6e859ed
Author: Max Gekk <[email protected]>
AuthorDate: Tue Sep 13 10:54:53 2022 +0300
[SPARK-38734][SQL] Remove the error class `INDEX_OUT_OF_BOUNDS`
### What changes were proposed in this pull request?
In the PR, I propose to remove the error class `INDEX_OUT_OF_BOUNDS` from
`error-classes.json` and the exception `SparkIndexOutOfBoundsException`. And
replace the last one by a SparkException w/ the error class `INTERNAL_ERROR`
because the exception should not be raised in regular cases.
`ArrayDataIndexedSeq` throws the exception from `apply()`, and
`ArrayDataIndexedSeq` can be created from `ArrayData.toSeq` only. The last one
is invoked from 2 places:
1. The `Slice` expression ( or `slice` function):
https://github.com/apache/spark/blob/443eea97578c41870c343cdb88cf69bfdf27033a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala#L1600-L1601
where any access to the produced array is guarded:
```sql
spark-sql> set spark.sql.ansi.enabled=true;
spark.sql.ansi.enabled true
Time taken: 2.415 seconds, Fetched 1 row(s)
spark-sql> SELECT slice(array(1, 2, 3, 4), 2, 2)[4];
...
org.apache.spark.SparkArrayIndexOutOfBoundsException: [INVALID_ARRAY_INDEX]
The index 4 is out of bounds. The array has 2 elements. Use the SQL function
`get()` to tolerate accessing element at invalid index and return NULL instead.
If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
== SQL(line 1, position 8) ==
SELECT slice(array(1, 2, 3, 4), 2, 2)[4]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
at
org.apache.spark.sql.errors.QueryExecutionErrors$.invalidArrayIndexError(QueryExecutionErrors.scala:239)
at
org.apache.spark.sql.catalyst.expressions.GetArrayItem.nullSafeEval(complexTypeExtractors.scala:271)
```
see
https://github.com/apache/spark/blob/a9bb924480e4953457dad680c15ca346f71a26c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala#L268-L271
2. `MapObjects.convertToSeq`:
https://github.com/apache/spark/blob/5b96e82ad6a4f5d5e4034d9d7112077159cf5044/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L886
where any access to the produced IndexedSeq is guarded via map-way access in
https://github.com/apache/spark/blob/5b96e82ad6a4f5d5e4034d9d7112077159cf5044/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L864-L867
### Why are the changes needed?
To improve code maintenance.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
By running the affected test suite:
```
$ build/sbt "core/testOnly *SparkThrowableSuite"
$ build/sbt "test:testOnly *ArrayDataIndexedSeqSuite"
```
Closes #37857 from MaxGekk/rm-INDEX_OUT_OF_BOUNDS.
Authored-by: Max Gekk <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
core/src/main/resources/error/error-classes.json | 6 ------
.../main/scala/org/apache/spark/SparkException.scala | 16 ----------------
.../org/apache/spark/sql/catalyst/util/ArrayData.scala | 4 +++-
.../apache/spark/sql/errors/QueryExecutionErrors.scala | 5 -----
.../sql/catalyst/util/ArrayDataIndexedSeqSuite.scala | 18 ++++++++++--------
5 files changed, 13 insertions(+), 36 deletions(-)
diff --git a/core/src/main/resources/error/error-classes.json
b/core/src/main/resources/error/error-classes.json
index 3e15334931b..29f1f4f0b30 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -239,12 +239,6 @@
}
}
},
- "INDEX_OUT_OF_BOUNDS" : {
- "message" : [
- "Index <indexValue> must be between 0 and the length of the ArrayData."
- ],
- "sqlState" : "22023"
- },
"INTERNAL_ERROR" : {
"message" : [
"<message>"
diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala
b/core/src/main/scala/org/apache/spark/SparkException.scala
index 67aa8cdfcac..aea796685ee 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -316,22 +316,6 @@ private[spark] class SparkIllegalArgumentException(
override def getQueryContext: Array[QueryContext] = context
}
-/**
- * Index out of bounds exception thrown from Spark with an error class.
- */
-private[spark] class SparkIndexOutOfBoundsException(
- errorClass: String,
- errorSubClass: Option[String] = None,
- messageParameters: Array[String])
- extends IndexOutOfBoundsException(
- SparkThrowableHelper.getMessage(errorClass, errorSubClass.orNull,
messageParameters))
- with SparkThrowable {
-
- override def getMessageParameters: Array[String] = messageParameters
- override def getErrorClass: String = errorClass
- override def getErrorSubClass: String = errorSubClass.orNull
-}
-
/**
* IO exception thrown from Spark with an error class.
*/
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index f5497665f2f..51759df901c 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util
import scala.reflect.ClassTag
+import org.apache.spark.SparkException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters,
UnsafeArrayData}
import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -200,7 +201,8 @@ class ArrayDataIndexedSeq[T](arrayData: ArrayData,
dataType: DataType) extends I
if (0 <= idx && idx < arrayData.numElements()) {
accessor(arrayData, idx).asInstanceOf[T]
} else {
- throw QueryExecutionErrors.indexOutOfBoundsOfArrayDataError(idx)
+ throw SparkException.internalError(
+ s"Index $idx must be between 0 and the length of the ArrayData.")
}
override def length: Int = arrayData.numElements()
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index f4ec70e81d9..662a10cf3ac 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -1372,11 +1372,6 @@ private[sql] object QueryExecutionErrors extends
QueryErrorsBase {
""".stripMargin.replaceAll("\n", " "))
}
- def indexOutOfBoundsOfArrayDataError(idx: Int): Throwable = {
- new SparkIndexOutOfBoundsException(
- errorClass = "INDEX_OUT_OF_BOUNDS", None, Array(toSQLValue(idx,
IntegerType)))
- }
-
def malformedRecordsDetectedInRecordParsingError(e: BadRecordException):
Throwable = {
new SparkException("Malformed records are detected in record parsing. " +
s"Parse Mode: ${FailFastMode.name}. To process malformed records as null
" +
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
index 56d2af7cb7e..b015829e672 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
import scala.util.Random
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
import org.apache.spark.sql.RandomDataGenerator
import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
import org.apache.spark.sql.catalyst.expressions.{SafeProjection,
UnsafeProjection}
@@ -53,13 +53,15 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite {
}
}
- intercept[IndexOutOfBoundsException] {
- seq(-1)
- }.getMessage().contains("must be between 0 and the length of the
ArrayData.")
-
- intercept[IndexOutOfBoundsException] {
- seq(seq.length)
- }.getMessage().contains("must be between 0 and the length of the
ArrayData.")
+ Seq(-1, seq.length).foreach { index =>
+ checkError(
+ exception = intercept[SparkException] {
+ seq(index)
+ },
+ errorClass = "INTERNAL_ERROR",
+ parameters = Map(
+ "message" -> s"Index $index must be between 0 and the length of the
ArrayData."))
+ }
}
private def testArrayData(): Unit = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]