This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 49389cd05e [GLUTEN-11088][VL] Fix Spark4.0 varchar type check suites
(#11202)
49389cd05e is described below
commit 49389cd05ea07356f71bfdfe660410604c1461ea
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Nov 27 11:16:41 2025 +0000
[GLUTEN-11088][VL] Fix Spark4.0 varchar type check suites (#11202)
Use one test to make sure the native code is really called, override
function assertLengthCheckFailure to assert result correct.
Spark 4.0 does a refactor, move the test result check to a public function,
so we can reuse the tests by override the check function in tests
---
.../gluten/utils/velox/VeloxTestSettings.scala | 12 +--
.../spark/sql/GlutenCharVarcharTestSuite.scala | 115 ++++++---------------
2 files changed, 33 insertions(+), 94 deletions(-)
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 07437631f9..ce10ddec49 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -713,16 +713,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("InMemoryRelation statistics")
// Extra ColumnarToRow is needed to transform vanilla columnar data to
gluten columnar data.
.exclude("SPARK-37369: Avoid redundant ColumnarToRow transition on
InMemoryTableScan")
- // TODO: fix in Spark-4.0
- // enableSuite[GlutenFileSourceCharVarcharTestSuite]
- // .exclude("length check for input string values: nested in array")
- // .exclude("length check for input string values: nested in array")
- // .exclude("length check for input string values: nested in map key")
- // .exclude("length check for input string values: nested in map value")
- // .exclude("length check for input string values: nested in both map key
and value")
- // .exclude("length check for input string values: nested in array of
struct")
- // .exclude("length check for input string values: nested in array of
array")
- // enableSuite[GlutenDSV2CharVarcharTestSuite]
+ enableSuite[GlutenFileSourceCharVarcharTestSuite]
+ enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
// Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
// The final caught Spark exception's getCause().getMessage() contains
'errMsg' but does not
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
index ce2f1b465e..ed40918de6 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
@@ -16,32 +16,41 @@
*/
package org.apache.spark.sql
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkThrowable}
+
+trait GlutenCharVarcharTestSuite extends CharVarcharTestSuite with
GlutenSQLTestsTrait {
+ protected val ERROR_MESSAGE =
+ "Exceeds char/varchar type length limitation: 5"
+
+ protected val VELOX_ERROR_MESSAGE =
+ "Exceeds allowed length limitation: 5"
+
+ override def assertLengthCheckFailure(func: () => Unit): Unit = {
+ val e = intercept[SparkThrowable](func())
+ e match {
+ // Spark throws exception
+ case _: SparkRuntimeException =>
+ checkError(
+ exception = e,
+ condition = "EXCEED_LIMIT_LENGTH",
+ parameters = Map("limit" -> "5")
+ )
+ // Gluten throws exception. but sometimes, Spark exception is wrapped in
GlutenException.
+ case e: SparkException =>
+ assert(e.getMessage.contains(VELOX_ERROR_MESSAGE) ||
e.getMessage.contains(ERROR_MESSAGE))
+ case _ => throw new RuntimeException(s"Unexpected exception: $e")
+ }
+ }
+}
class GlutenFileSourceCharVarcharTestSuite
extends FileSourceCharVarcharTestSuite
- with GlutenSQLTestsTrait {
+ with GlutenCharVarcharTestSuite {
private def testTableWrite(f: String => Unit): Unit = {
withTable("t")(f("char"))
withTable("t")(f("varchar"))
}
- private val ERROR_MESSAGE =
- "Exceeds char/varchar type length limitation: 5"
-
- testGluten("length check for input string values: nested in struct") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
- sql("INSERT INTO t SELECT struct(null)")
- checkAnswer(spark.table("t"), Row(Row(null)))
- val e = intercept[RuntimeException] {
- sql("INSERT INTO t SELECT struct('123456')")
- }
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
testGluten("length check for input string values: nested in array") {
testTableWrite {
typeName =>
@@ -51,73 +60,11 @@ class GlutenFileSourceCharVarcharTestSuite
val e = intercept[SparkException] {
sql("INSERT INTO t VALUES (array('a', '123456'))")
}
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in map key") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
- val e = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in map value") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c MAP<STRING, $typeName(5)>) USING $format")
- sql("INSERT INTO t VALUES (map('a', null))")
- checkAnswer(spark.table("t"), Row(Map("a" -> null)))
- val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in both map key and
value") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
- val e1 = intercept[SparkException](sql("INSERT INTO t VALUES
(map('123456', 'a'))"))
- assert(e1.getMessage.contains(ERROR_MESSAGE))
- val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a',
'123456'))"))
- assert(e2.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in struct of
array") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c STRUCT<c: ARRAY<$typeName(5)>>) USING $format")
- sql("INSERT INTO t SELECT struct(array(null))")
- checkAnswer(spark.table("t"), Row(Row(Seq(null))))
- val e = intercept[SparkException](sql("INSERT INTO t SELECT
struct(array('123456'))"))
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in array of
struct") {
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c ARRAY<STRUCT<c: $typeName(5)>>) USING $format")
- sql("INSERT INTO t VALUES (array(struct(null)))")
- checkAnswer(spark.table("t"), Row(Seq(Row(null))))
- val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(struct('123456')))"))
- assert(e.getMessage.contains(ERROR_MESSAGE))
- }
- }
-
- testGluten("length check for input string values: nested in array of array")
{
- testTableWrite {
- typeName =>
- sql(s"CREATE TABLE t(c ARRAY<ARRAY<$typeName(5)>>) USING $format")
- sql("INSERT INTO t VALUES (array(array(null)))")
- checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
- val e = intercept[SparkException](sql("INSERT INTO t VALUES
(array(array('123456')))"))
- assert(e.getMessage.contains(ERROR_MESSAGE))
+ assert(e.getMessage.contains(VELOX_ERROR_MESSAGE))
}
}
}
-class GlutenDSV2CharVarcharTestSuite extends DSV2CharVarcharTestSuite with
GlutenSQLTestsTrait {}
+class GlutenDSV2CharVarcharTestSuite
+ extends DSV2CharVarcharTestSuite
+ with GlutenCharVarcharTestSuite {}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]