This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 6f189c7cb [VL] Support Spark assert_true function (#6329)
6f189c7cb is described below
commit 6f189c7cbae1a7d6cf80ff7f0a96afb5299a804f
Author: 高阳阳 <[email protected]>
AuthorDate: Thu Jul 11 21:19:37 2024 +0800
[VL] Support Spark assert_true function (#6329)
---
.../org/apache/gluten/utils/CHExpressionUtil.scala | 5 +-
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 3 -
.../execution/ScalarFunctionsValidateSuite.scala | 14 +++
.../gluten/expression/ExpressionMappings.scala | 3 +
.../resources/sql-tests/inputs/misc-functions.sql | 22 ++++
.../sql-tests/results/misc-functions.sql.out | 137 +++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 5 +
.../spark/sql/GlutenColumnExpressionSuite.scala | 51 +++++++-
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 45 ++++++-
.../resources/sql-tests/inputs/misc-functions.sql | 22 ++++
.../sql-tests/results/misc-functions.sql.out | 137 +++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 5 +
.../spark/sql/GlutenColumnExpressionSuite.scala | 51 +++++++-
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 45 ++++++-
.../resources/sql-tests/inputs/misc-functions.sql | 22 ++++
.../sql-tests/results/misc-functions.sql.out | 134 ++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 5 +
.../spark/sql/GlutenColumnExpressionSuite.scala | 51 +++++++-
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 52 +++++++-
.../resources/sql-tests/inputs/misc-functions.sql | 22 ++++
.../sql-tests/results/misc-functions.sql.out | 134 ++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 5 +
.../spark/sql/GlutenColumnExpressionSuite.scala | 51 +++++++-
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 52 +++++++-
.../apache/gluten/expression/ExpressionNames.scala | 1 +
25 files changed, 1062 insertions(+), 12 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index ac03a7a5b..d65de1cea 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -215,6 +215,9 @@ object CHExpressionUtil {
UNIX_MICROS -> DefaultValidator(),
TIMESTAMP_MILLIS -> DefaultValidator(),
TIMESTAMP_MICROS -> DefaultValidator(),
- STACK -> DefaultValidator()
+ STACK -> DefaultValidator(),
+ TRANSFORM_KEYS -> DefaultValidator(),
+ TRANSFORM_VALUES -> DefaultValidator(),
+ RAISE_ERROR -> DefaultValidator()
)
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index e13ebd971..2b9d01738 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -22,7 +22,6 @@ import org.apache.gluten.datasource.ArrowConvertorRule
import org.apache.gluten.exception.GlutenNotSupportException
import org.apache.gluten.execution._
import org.apache.gluten.expression._
-import org.apache.gluten.expression.ExpressionNames.{TRANSFORM_KEYS,
TRANSFORM_VALUES}
import org.apache.gluten.expression.aggregate.{HLLAdapter,
VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet}
import org.apache.gluten.extension._
import org.apache.gluten.extension.columnar.FallbackTags
@@ -835,8 +834,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
Sig[VeloxCollectSet](ExpressionNames.COLLECT_SET),
Sig[VeloxBloomFilterMightContain](ExpressionNames.MIGHT_CONTAIN),
Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG),
- Sig[TransformKeys](TRANSFORM_KEYS),
- Sig[TransformValues](TRANSFORM_VALUES),
// For test purpose.
Sig[VeloxDummyExpression](VeloxDummyExpression.VELOX_DUMMY_EXPRESSION)
)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index e81c956fe..fc3bf320e 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -16,6 +16,7 @@
*/
package org.apache.gluten.execution
+import org.apache.spark.SparkException
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.types._
@@ -663,6 +664,19 @@ class ScalarFunctionsValidateSuite extends
FunctionsValidateTest {
}
}
+ test("Test raise_error, assert_true function") {
+ runQueryAndCompare("""SELECT assert_true(l_orderkey >= 1), l_orderkey
+ | from lineitem limit 100""".stripMargin) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ val e = intercept[SparkException] {
+ sql("""SELECT assert_true(l_orderkey >= 100), l_orderkey from
+ | lineitem limit 100""".stripMargin).collect()
+ }
+ assert(e.getCause.isInstanceOf[RuntimeException])
+ assert(e.getMessage.contains("l_orderkey"))
+ }
+
test("Test E function") {
runQueryAndCompare("""SELECT E() from lineitem limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 51e78a97e..77e85b354 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -261,6 +261,8 @@ object ExpressionMappings {
Sig[MapEntries](MAP_ENTRIES),
Sig[MapZipWith](MAP_ZIP_WITH),
Sig[StringToMap](STR_TO_MAP),
+ Sig[TransformKeys](TRANSFORM_KEYS),
+ Sig[TransformValues](TRANSFORM_VALUES),
// Struct functions
Sig[GetStructField](GET_STRUCT_FIELD),
Sig[CreateNamedStruct](NAMED_STRUCT),
@@ -284,6 +286,7 @@ object ExpressionMappings {
Sig[SparkPartitionID](SPARK_PARTITION_ID),
Sig[WidthBucket](WIDTH_BUCKET),
Sig[ReplicateRows](REPLICATE_ROWS),
+ Sig[RaiseError](RAISE_ERROR),
// Decimal
Sig[UnscaledValue](UNSCALED_VALUE),
// Generator function
diff --git
a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql
b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git
a/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out
b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..6985233c3
--- /dev/null
+++
b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,137 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint smallint int bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float double decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date timestamp interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1,
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int> map<int,int> struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as
boolean)' is not true!):void>
+-- !query output
+NULL NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index a17f72de3..d5e8df638 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -296,6 +296,11 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("sliding range between with aggregation")
.exclude("store and retrieve column stats in different time zones")
enableSuite[GlutenColumnExpressionSuite]
+ // Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
+ // The final caught Spark exception's getCause().getMessage() contains
'errMsg' but does not
+ // equal 'errMsg' exactly. The following two tests will be skipped and
overridden in Gluten.
+ .exclude("raise_error")
+ .exclude("assert_true")
enableSuite[GlutenDataFrameImplicitsSuite]
enableSuite[GlutenGeneratorFunctionSuite]
enableSuite[GlutenDataFrameTimeWindowingSuite]
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name,
lit, raise_error}
class GlutenColumnExpressionSuite extends ColumnExpressionSuite with
GlutenSQLTestsTrait {
import testImplicits._
+ testGluten("raise_error") {
+ val strDf = Seq(("hello")).toDF("a")
+
+ val e1 = intercept[SparkException] {
+ strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val e2 = intercept[SparkException] {
+ strDf.select(raise_error($"a")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "hello")
+ }
+
+ testGluten("assert_true") {
+ // assert_true(condition, errMsgCol)
+ val booleanDf = Seq((true), (false)).toDF("cond")
+ checkAnswer(
+ booleanDf.filter("cond = true").select(assert_true($"cond")),
+ Row(null) :: Nil
+ )
+ val e1 = intercept[SparkException] {
+ booleanDf.select(assert_true($"cond",
lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val nullDf = Seq(("first row", None), ("second row",
Some(true))).toDF("n", "cond")
+ checkAnswer(
+ nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+ Row(null) :: Nil
+ )
+ val e2 = intercept[SparkException] {
+ nullDf.select(assert_true($"cond", $"n")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "first row")
+
+ // assert_true(condition)
+ val intDf = Seq((0, 1)).toDF("a", "b")
+ checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+ val e3 = intercept[SparkException] {
+ intDf.select(assert_true($"a" > $"b")).collect()
+ }
+ assert(e3.getCause.isInstanceOf[RuntimeException])
+ assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+ }
+
testGluten(
"input_file_name, input_file_block_start and input_file_block_length " +
"should fall back if scan falls back") {
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 4b75ce13c..4fbd89bda 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,10 @@
package org.apache.spark.sql
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils,
SystemParameters}
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -39,6 +40,7 @@ import java.util.Locale
import scala.collection.mutable.ArrayBuffer
import scala.sys.process.{Process, ProcessLogger}
import scala.util.Try
+import scala.util.control.NonFatal
/**
* End-to-end test cases for SQL queries.
@@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite
super.afterAll()
}
}
+
+ /**
+ * This method handles exceptions occurred during query execution as they
may need special care to
+ * become comparable to the expected output.
+ *
+ * @param result
+ * a function that returns a pair of schema and output
+ */
+ override protected def handleExceptions(
+ result: => (String, Seq[String])): (String, Seq[String]) = {
+ try {
+ result
+ } catch {
+ case a: AnalysisException =>
+ // Do not output the logical plan tree which contains expression IDs.
+ // Also implement a crude way of masking expression IDs in the error
message
+ // with a generic pattern "###".
+ val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
+ (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+ case s: SparkException if s.getCause != null =>
+ // For a runtime exception, it is hard to match because its message
contains
+ // information of stage, task ID, etc.
+ // To make result matching simpler, here we match the cause of the
exception if it exists.
+ s.getCause match {
+ case e: GlutenException =>
+ val reasonPattern = "Reason: (.*)".r
+ val reason =
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+ reason match {
+ case Some(r) =>
+ (emptySchema, Seq(e.getClass.getName, r))
+ case None => (emptySchema, Seq())
+ }
+ case cause =>
+ (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+ }
+ case NonFatal(e) =>
+ // If there is an exception, put the exception class followed by the
message.
+ (emptySchema, Seq(e.getClass.getName, e.getMessage))
+ }
+ }
}
diff --git
a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql
b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git
a/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out
b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..6985233c3
--- /dev/null
+++
b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,137 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint smallint int bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float double decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date timestamp interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1,
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int> map<int,int> struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as
boolean)' is not true!):void>
+-- !query output
+NULL NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index ae3e7c7b8..fcc2bd343 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -950,6 +950,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFileSourceCharVarcharTestSuite]
enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
+ // Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
+ // The final caught Spark exception's getCause().getMessage() contains
'errMsg' but does not
+ // equal 'errMsg' exactly. The following two tests will be skipped and
overridden in Gluten.
+ .exclude("raise_error")
+ .exclude("assert_true")
enableSuite[GlutenComplexTypeSuite]
enableSuite[GlutenConfigBehaviorSuite]
// Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name,
lit, raise_error}
class GlutenColumnExpressionSuite extends ColumnExpressionSuite with
GlutenSQLTestsTrait {
import testImplicits._
+ testGluten("raise_error") {
+ val strDf = Seq(("hello")).toDF("a")
+
+ val e1 = intercept[SparkException] {
+ strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val e2 = intercept[SparkException] {
+ strDf.select(raise_error($"a")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "hello")
+ }
+
+ testGluten("assert_true") {
+ // assert_true(condition, errMsgCol)
+ val booleanDf = Seq((true), (false)).toDF("cond")
+ checkAnswer(
+ booleanDf.filter("cond = true").select(assert_true($"cond")),
+ Row(null) :: Nil
+ )
+ val e1 = intercept[SparkException] {
+ booleanDf.select(assert_true($"cond",
lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val nullDf = Seq(("first row", None), ("second row",
Some(true))).toDF("n", "cond")
+ checkAnswer(
+ nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+ Row(null) :: Nil
+ )
+ val e2 = intercept[SparkException] {
+ nullDf.select(assert_true($"cond", $"n")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "first row")
+
+ // assert_true(condition)
+ val intDf = Seq((0, 1)).toDF("a", "b")
+ checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+ val e3 = intercept[SparkException] {
+ intDf.select(assert_true($"a" > $"b")).collect()
+ }
+ assert(e3.getCause.isInstanceOf[RuntimeException])
+ assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+ }
+
testGluten(
"input_file_name, input_file_block_start and input_file_block_length " +
"should fall back if scan falls back") {
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 4536aa540..6e2a9efa8 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,10 @@
package org.apache.spark.sql
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils,
SystemParameters}
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -39,6 +40,7 @@ import java.util.Locale
import scala.collection.mutable.ArrayBuffer
import scala.sys.process.{Process, ProcessLogger}
import scala.util.Try
+import scala.util.control.NonFatal
/**
* End-to-end test cases for SQL queries.
@@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite
super.afterAll()
}
}
+
+ /**
+ * This method handles exceptions occurred during query execution as they
may need special care to
+ * become comparable to the expected output.
+ *
+ * @param result
+ * a function that returns a pair of schema and output
+ */
+ override protected def handleExceptions(
+ result: => (String, Seq[String])): (String, Seq[String]) = {
+ try {
+ result
+ } catch {
+ case a: AnalysisException =>
+ // Do not output the logical plan tree which contains expression IDs.
+ // Also implement a crude way of masking expression IDs in the error
message
+ // with a generic pattern "###".
+ val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
+ (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+ case s: SparkException if s.getCause != null =>
+ // For a runtime exception, it is hard to match because its message
contains
+ // information of stage, task ID, etc.
+ // To make result matching simpler, here we match the cause of the
exception if it exists.
+ val cause = s.getCause
+ cause match {
+ case e: GlutenException =>
+ val reasonPattern = "Reason: (.*)".r
+ val reason =
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+ reason match {
+ case Some(r) =>
+ (emptySchema, Seq(e.getClass.getName, r))
+ case None => (emptySchema, Seq())
+ }
+ case _ => (emptySchema, Seq(cause.getClass.getName,
cause.getMessage))
+ }
+ case NonFatal(e) =>
+ // If there is an exception, put the exception class followed by the
message.
+ (emptySchema, Seq(e.getClass.getName, e.getMessage))
+ }
+ }
}
diff --git
a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql
b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git
a/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out
b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..d6d1289a5
--- /dev/null
+++
b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint smallint int bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float double decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date timestamp interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1,
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int> map<int,int> struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as
boolean)' is not true!):void>
+-- !query output
+NULL NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 0da19922f..57346f493 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -955,6 +955,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFileSourceCharVarcharTestSuite]
enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
+ // Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
+ // The final caught Spark exception's getCause().getMessage() contains
'errMsg' but does not
+ // equal 'errMsg' exactly. The following two tests will be skipped and
overridden in Gluten.
+ .exclude("raise_error")
+ .exclude("assert_true")
enableSuite[GlutenComplexTypeSuite]
enableSuite[GlutenConfigBehaviorSuite]
// Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name,
lit, raise_error}
class GlutenColumnExpressionSuite extends ColumnExpressionSuite with
GlutenSQLTestsTrait {
import testImplicits._
+ testGluten("raise_error") {
+ val strDf = Seq(("hello")).toDF("a")
+
+ val e1 = intercept[SparkException] {
+ strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val e2 = intercept[SparkException] {
+ strDf.select(raise_error($"a")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "hello")
+ }
+
+ testGluten("assert_true") {
+ // assert_true(condition, errMsgCol)
+ val booleanDf = Seq((true), (false)).toDF("cond")
+ checkAnswer(
+ booleanDf.filter("cond = true").select(assert_true($"cond")),
+ Row(null) :: Nil
+ )
+ val e1 = intercept[SparkException] {
+ booleanDf.select(assert_true($"cond",
lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val nullDf = Seq(("first row", None), ("second row",
Some(true))).toDF("n", "cond")
+ checkAnswer(
+ nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+ Row(null) :: Nil
+ )
+ val e2 = intercept[SparkException] {
+ nullDf.select(assert_true($"cond", $"n")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "first row")
+
+ // assert_true(condition)
+ val intDf = Seq((0, 1)).toDF("a", "b")
+ checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+ val e3 = intercept[SparkException] {
+ intDf.select(assert_true($"a" > $"b")).collect()
+ }
+ assert(e3.getCause.isInstanceOf[RuntimeException])
+ assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+ }
+
testGluten(
"input_file_name, input_file_block_start and input_file_block_length " +
"should fall back if scan falls back") {
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 0ea1f13ec..8a291990e 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,12 @@
package org.apache.spark.sql
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils,
SystemParameters}
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
+import org.apache.spark.ErrorMessageFormat.MINIMAL
+import org.apache.spark.SparkThrowableHelper.getMessage
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -40,6 +43,7 @@ import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.sys.process.{Process, ProcessLogger}
import scala.util.Try
+import scala.util.control.NonFatal
/**
* End-to-end test cases for SQL queries.
@@ -781,4 +785,50 @@ class GlutenSQLQueryTestSuite
super.afterAll()
}
}
+
+ /**
+ * This method handles exceptions occurred during query execution as they
may need special care to
+ * become comparable to the expected output.
+ *
+ * @param result
+ * a function that returns a pair of schema and output
+ */
+ override protected def handleExceptions(
+ result: => (String, Seq[String])): (String, Seq[String]) = {
+ val format = MINIMAL
+ try {
+ result
+ } catch {
+ case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+ (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+ case a: AnalysisException =>
+ // Do not output the logical plan tree which contains expression IDs.
+ // Also implement a crude way of masking expression IDs in the error
message
+ // with a generic pattern "###".
+ (emptySchema, Seq(a.getClass.getName,
a.getSimpleMessage.replaceAll("#\\d+", "#x")))
+ case s: SparkException if s.getCause != null =>
+ // For a runtime exception, it is hard to match because its message
contains
+ // information of stage, task ID, etc.
+ // To make result matching simpler, here we match the cause of the
exception if it exists.
+ s.getCause match {
+ case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+ (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+ case e: GlutenException =>
+ val reasonPattern = "Reason: (.*)".r
+ val reason =
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+ reason match {
+ case Some(r) =>
+ (emptySchema, Seq(e.getClass.getName, r))
+ case None => (emptySchema, Seq())
+ }
+
+ case cause =>
+ (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+ }
+ case NonFatal(e) =>
+ // If there is an exception, put the exception class followed by the
message.
+ (emptySchema, Seq(e.getClass.getName, e.getMessage))
+ }
+ }
}
diff --git
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git
a/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out
b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..d6d1289a5
--- /dev/null
+++
b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint smallint int bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float double decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'),
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date timestamp interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1,
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1,
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int> map<int,int> struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as
boolean)' is not true!):void>
+-- !query output
+NULL NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e54aca34e..9716a7c14 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -969,6 +969,11 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFileSourceCharVarcharTestSuite]
enableSuite[GlutenDSV2CharVarcharTestSuite]
enableSuite[GlutenColumnExpressionSuite]
+ // Velox raise_error('errMsg') throws a velox_user_error exception with
the message 'errMsg'.
+ // The final caught Spark exception's getCause().getMessage() contains
'errMsg' but does not
+ // equal 'errMsg' exactly. The following two tests will be skipped and
overridden in Gluten.
+ .exclude("raise_error")
+ .exclude("assert_true")
enableSuite[GlutenComplexTypeSuite]
enableSuite[GlutenConfigBehaviorSuite]
// Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
*/
package org.apache.spark.sql
+import org.apache.spark.SparkException
import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name,
lit, raise_error}
class GlutenColumnExpressionSuite extends ColumnExpressionSuite with
GlutenSQLTestsTrait {
import testImplicits._
+ testGluten("raise_error") {
+ val strDf = Seq(("hello")).toDF("a")
+
+ val e1 = intercept[SparkException] {
+ strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val e2 = intercept[SparkException] {
+ strDf.select(raise_error($"a")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "hello")
+ }
+
+ testGluten("assert_true") {
+ // assert_true(condition, errMsgCol)
+ val booleanDf = Seq((true), (false)).toDF("cond")
+ checkAnswer(
+ booleanDf.filter("cond = true").select(assert_true($"cond")),
+ Row(null) :: Nil
+ )
+ val e1 = intercept[SparkException] {
+ booleanDf.select(assert_true($"cond",
lit(null.asInstanceOf[String]))).collect()
+ }
+ assert(e1.getCause.isInstanceOf[RuntimeException])
+
+ val nullDf = Seq(("first row", None), ("second row",
Some(true))).toDF("n", "cond")
+ checkAnswer(
+ nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+ Row(null) :: Nil
+ )
+ val e2 = intercept[SparkException] {
+ nullDf.select(assert_true($"cond", $"n")).collect()
+ }
+ assert(e2.getCause.isInstanceOf[RuntimeException])
+ assert(e2.getCause.getMessage contains "first row")
+
+ // assert_true(condition)
+ val intDf = Seq((0, 1)).toDF("a", "b")
+ checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+ val e3 = intercept[SparkException] {
+ intDf.select(assert_true($"a" > $"b")).collect()
+ }
+ assert(e3.getCause.isInstanceOf[RuntimeException])
+ assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+ }
+
testGluten(
"input_file_name, input_file_block_start and input_file_block_length " +
"should fall back if scan falls back") {
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index b1f3945bf..8a6f5f32f 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,12 @@
package org.apache.spark.sql
import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils,
SystemParameters}
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
+import org.apache.spark.ErrorMessageFormat.MINIMAL
+import org.apache.spark.SparkThrowableHelper.getMessage
import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -40,6 +43,7 @@ import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import scala.sys.process.{Process, ProcessLogger}
import scala.util.Try
+import scala.util.control.NonFatal
/**
* End-to-end test cases for SQL queries.
@@ -783,4 +787,50 @@ class GlutenSQLQueryTestSuite
super.afterAll()
}
}
+
+ /**
+ * This method handles exceptions occurred during query execution as they
may need special care to
+ * become comparable to the expected output.
+ *
+ * @param result
+ * a function that returns a pair of schema and output
+ */
+ override protected def handleExceptions(
+ result: => (String, Seq[String])): (String, Seq[String]) = {
+ val format = MINIMAL
+ try {
+ result
+ } catch {
+ case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+ (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+ case a: AnalysisException =>
+ // Do not output the logical plan tree which contains expression IDs.
+ // Also implement a crude way of masking expression IDs in the error
message
+ // with a generic pattern "###".
+ (emptySchema, Seq(a.getClass.getName,
a.getSimpleMessage.replaceAll("#\\d+", "#x")))
+ case s: SparkException if s.getCause != null =>
+ // For a runtime exception, it is hard to match because its message
contains
+ // information of stage, task ID, etc.
+ // To make result matching simpler, here we match the cause of the
exception if it exists.
+ s.getCause match {
+ case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+ (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+ case e: GlutenException =>
+ val reasonPattern = "Reason: (.*)".r
+ val reason =
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+ reason match {
+ case Some(r) =>
+ (emptySchema, Seq(e.getClass.getName, r))
+ case None => (emptySchema, Seq())
+ }
+
+ case cause =>
+ (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+ }
+ case NonFatal(e) =>
+ // If there is an exception, put the exception class followed by the
message.
+ (emptySchema, Seq(e.getClass.getName, e.getMessage))
+ }
+ }
}
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 0b31ec346..41bc86749 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -316,6 +316,7 @@ object ExpressionNames {
final val MONOTONICALLY_INCREASING_ID = "monotonically_increasing_id"
final val WIDTH_BUCKET = "width_bucket"
final val REPLICATE_ROWS = "replicaterows"
+ final val RAISE_ERROR = "raise_error"
// Directly use child expression transformer
final val KNOWN_NULLABLE = "known_nullable"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]