(incubator-gluten) branch main updated: [VL] Support Spark assert_true function (#6329)

philo Thu, 11 Jul 2024 06:19:47 -0700

This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 6f189c7cb [VL] Support Spark assert_true function (#6329)
6f189c7cb is described below

commit 6f189c7cbae1a7d6cf80ff7f0a96afb5299a804f
Author: 高阳阳 <[email protected]>
AuthorDate: Thu Jul 11 21:19:37 2024 +0800

    [VL] Support Spark assert_true function (#6329)
---
 .../org/apache/gluten/utils/CHExpressionUtil.scala |   5 +-
 .../backendsapi/velox/VeloxSparkPlanExecApi.scala  |   3 -
 .../execution/ScalarFunctionsValidateSuite.scala   |  14 +++
 .../gluten/expression/ExpressionMappings.scala     |   3 +
 .../resources/sql-tests/inputs/misc-functions.sql  |  22 ++++
 .../sql-tests/results/misc-functions.sql.out       | 137 +++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |   5 +
 .../spark/sql/GlutenColumnExpressionSuite.scala    |  51 +++++++-
 .../apache/spark/sql/GlutenSQLQueryTestSuite.scala |  45 ++++++-
 .../resources/sql-tests/inputs/misc-functions.sql  |  22 ++++
 .../sql-tests/results/misc-functions.sql.out       | 137 +++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |   5 +
 .../spark/sql/GlutenColumnExpressionSuite.scala    |  51 +++++++-
 .../apache/spark/sql/GlutenSQLQueryTestSuite.scala |  45 ++++++-
 .../resources/sql-tests/inputs/misc-functions.sql  |  22 ++++
 .../sql-tests/results/misc-functions.sql.out       | 134 ++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |   5 +
 .../spark/sql/GlutenColumnExpressionSuite.scala    |  51 +++++++-
 .../apache/spark/sql/GlutenSQLQueryTestSuite.scala |  52 +++++++-
 .../resources/sql-tests/inputs/misc-functions.sql  |  22 ++++
 .../sql-tests/results/misc-functions.sql.out       | 134 ++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |   5 +
 .../spark/sql/GlutenColumnExpressionSuite.scala    |  51 +++++++-
 .../apache/spark/sql/GlutenSQLQueryTestSuite.scala |  52 +++++++-
 .../apache/gluten/expression/ExpressionNames.scala |   1 +
 25 files changed, 1062 insertions(+), 12 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index ac03a7a5b..d65de1cea 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -215,6 +215,9 @@ object CHExpressionUtil {
     UNIX_MICROS -> DefaultValidator(),
     TIMESTAMP_MILLIS -> DefaultValidator(),
     TIMESTAMP_MICROS -> DefaultValidator(),
-    STACK -> DefaultValidator()
+    STACK -> DefaultValidator(),
+    TRANSFORM_KEYS -> DefaultValidator(),
+    TRANSFORM_VALUES -> DefaultValidator(),
+    RAISE_ERROR -> DefaultValidator()
   )
 }
diff --git 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index e13ebd971..2b9d01738 100644
--- 
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++ 
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -22,7 +22,6 @@ import org.apache.gluten.datasource.ArrowConvertorRule
 import org.apache.gluten.exception.GlutenNotSupportException
 import org.apache.gluten.execution._
 import org.apache.gluten.expression._
-import org.apache.gluten.expression.ExpressionNames.{TRANSFORM_KEYS, 
TRANSFORM_VALUES}
 import org.apache.gluten.expression.aggregate.{HLLAdapter, 
VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet}
 import org.apache.gluten.extension._
 import org.apache.gluten.extension.columnar.FallbackTags
@@ -835,8 +834,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
       Sig[VeloxCollectSet](ExpressionNames.COLLECT_SET),
       Sig[VeloxBloomFilterMightContain](ExpressionNames.MIGHT_CONTAIN),
       Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG),
-      Sig[TransformKeys](TRANSFORM_KEYS),
-      Sig[TransformValues](TRANSFORM_VALUES),
       // For test purpose.
       Sig[VeloxDummyExpression](VeloxDummyExpression.VELOX_DUMMY_EXPRESSION)
     )
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index e81c956fe..fc3bf320e 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.gluten.execution
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.ProjectExec
 import org.apache.spark.sql.types._
 
@@ -663,6 +664,19 @@ class ScalarFunctionsValidateSuite extends 
FunctionsValidateTest {
     }
   }
 
+  test("Test raise_error, assert_true function") {
+    runQueryAndCompare("""SELECT assert_true(l_orderkey >= 1), l_orderkey
+                         | from lineitem limit 100""".stripMargin) {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+    val e = intercept[SparkException] {
+      sql("""SELECT assert_true(l_orderkey >= 100), l_orderkey from
+            | lineitem limit 100""".stripMargin).collect()
+    }
+    assert(e.getCause.isInstanceOf[RuntimeException])
+    assert(e.getMessage.contains("l_orderkey"))
+  }
+
   test("Test E function") {
     runQueryAndCompare("""SELECT E() from lineitem limit 100""".stripMargin) {
       checkGlutenOperatorMatch[ProjectExecTransformer]
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index 51e78a97e..77e85b354 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -261,6 +261,8 @@ object ExpressionMappings {
     Sig[MapEntries](MAP_ENTRIES),
     Sig[MapZipWith](MAP_ZIP_WITH),
     Sig[StringToMap](STR_TO_MAP),
+    Sig[TransformKeys](TRANSFORM_KEYS),
+    Sig[TransformValues](TRANSFORM_VALUES),
     // Struct functions
     Sig[GetStructField](GET_STRUCT_FIELD),
     Sig[CreateNamedStruct](NAMED_STRUCT),
@@ -284,6 +286,7 @@ object ExpressionMappings {
     Sig[SparkPartitionID](SPARK_PARTITION_ID),
     Sig[WidthBucket](WIDTH_BUCKET),
     Sig[ReplicateRows](REPLICATE_ROWS),
+    Sig[RaiseError](RAISE_ERROR),
     // Decimal
     Sig[UnscaledValue](UNSCALED_VALUE),
     // Generator function
diff --git 
a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql 
b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git 
a/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out 
b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..6985233c3
--- /dev/null
+++ 
b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,137 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint        smallint        int     bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float  double  decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23 
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date   timestamp       interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1, 
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int>     map<int,int>    struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as 
boolean)' is not true!):void>
+-- !query output
+NULL   NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index a17f72de3..d5e8df638 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -296,6 +296,11 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("sliding range between with aggregation")
     .exclude("store and retrieve column stats in different time zones")
   enableSuite[GlutenColumnExpressionSuite]
+    // Velox raise_error('errMsg') throws a velox_user_error exception with 
the message 'errMsg'.
+    // The final caught Spark exception's getCause().getMessage() contains 
'errMsg' but does not
+    // equal 'errMsg' exactly. The following two tests will be skipped and 
overridden in Gluten.
+    .exclude("raise_error")
+    .exclude("assert_true")
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenGeneratorFunctionSuite]
   enableSuite[GlutenDataFrameTimeWindowingSuite]
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
  */
 package org.apache.spark.sql
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, 
lit, raise_error}
 
 class GlutenColumnExpressionSuite extends ColumnExpressionSuite with 
GlutenSQLTestsTrait {
   import testImplicits._
+  testGluten("raise_error") {
+    val strDf = Seq(("hello")).toDF("a")
+
+    val e1 = intercept[SparkException] {
+      strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val e2 = intercept[SparkException] {
+      strDf.select(raise_error($"a")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "hello")
+  }
+
+  testGluten("assert_true") {
+    // assert_true(condition, errMsgCol)
+    val booleanDf = Seq((true), (false)).toDF("cond")
+    checkAnswer(
+      booleanDf.filter("cond = true").select(assert_true($"cond")),
+      Row(null) :: Nil
+    )
+    val e1 = intercept[SparkException] {
+      booleanDf.select(assert_true($"cond", 
lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val nullDf = Seq(("first row", None), ("second row", 
Some(true))).toDF("n", "cond")
+    checkAnswer(
+      nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+      Row(null) :: Nil
+    )
+    val e2 = intercept[SparkException] {
+      nullDf.select(assert_true($"cond", $"n")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "first row")
+
+    // assert_true(condition)
+    val intDf = Seq((0, 1)).toDF("a", "b")
+    checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+    val e3 = intercept[SparkException] {
+      intDf.select(assert_true($"a" > $"b")).collect()
+    }
+    assert(e3.getCause.isInstanceOf[RuntimeException])
+    assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+  }
+
   testGluten(
     "input_file_name, input_file_block_start and input_file_block_length " +
       "should fall back if scan falls back") {
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 4b75ce13c..4fbd89bda 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,10 @@
 package org.apache.spark.sql
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
 import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, 
SystemParameters}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -39,6 +40,7 @@ import java.util.Locale
 import scala.collection.mutable.ArrayBuffer
 import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
+import scala.util.control.NonFatal
 
 /**
  * End-to-end test cases for SQL queries.
@@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite
       super.afterAll()
     }
   }
+
+  /**
+   * This method handles exceptions occurred during query execution as they 
may need special care to
+   * become comparable to the expected output.
+   *
+   * @param result
+   *   a function that returns a pair of schema and output
+   */
+  override protected def handleExceptions(
+      result: => (String, Seq[String])): (String, Seq[String]) = {
+    try {
+      result
+    } catch {
+      case a: AnalysisException =>
+        // Do not output the logical plan tree which contains expression IDs.
+        // Also implement a crude way of masking expression IDs in the error 
message
+        // with a generic pattern "###".
+        val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
+        (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message 
contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the 
exception if it exists.
+        s.getCause match {
+          case e: GlutenException =>
+            val reasonPattern = "Reason: (.*)".r
+            val reason = 
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+            reason match {
+              case Some(r) =>
+                (emptySchema, Seq(e.getClass.getName, r))
+              case None => (emptySchema, Seq())
+            }
+          case cause =>
+            (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+        }
+      case NonFatal(e) =>
+        // If there is an exception, put the exception class followed by the 
message.
+        (emptySchema, Seq(e.getClass.getName, e.getMessage))
+    }
+  }
 }
diff --git 
a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql 
b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git 
a/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out 
b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..6985233c3
--- /dev/null
+++ 
b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,137 @@
+-- Automatically generated by SQLQueryTestSuite
+-- Number of queries: 16
+
+
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint        smallint        int     bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float  double  decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23 
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date   timestamp       interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1, 
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int>     map<int,int>    struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as 
boolean)' is not true!):void>
+-- !query output
+NULL   NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index ae3e7c7b8..fcc2bd343 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -950,6 +950,11 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenFileSourceCharVarcharTestSuite]
   enableSuite[GlutenDSV2CharVarcharTestSuite]
   enableSuite[GlutenColumnExpressionSuite]
+    // Velox raise_error('errMsg') throws a velox_user_error exception with 
the message 'errMsg'.
+    // The final caught Spark exception's getCause().getMessage() contains 
'errMsg' but does not
+    // equal 'errMsg' exactly. The following two tests will be skipped and 
overridden in Gluten.
+    .exclude("raise_error")
+    .exclude("assert_true")
   enableSuite[GlutenComplexTypeSuite]
   enableSuite[GlutenConfigBehaviorSuite]
     // Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
  */
 package org.apache.spark.sql
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, 
lit, raise_error}
 
 class GlutenColumnExpressionSuite extends ColumnExpressionSuite with 
GlutenSQLTestsTrait {
   import testImplicits._
+  testGluten("raise_error") {
+    val strDf = Seq(("hello")).toDF("a")
+
+    val e1 = intercept[SparkException] {
+      strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val e2 = intercept[SparkException] {
+      strDf.select(raise_error($"a")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "hello")
+  }
+
+  testGluten("assert_true") {
+    // assert_true(condition, errMsgCol)
+    val booleanDf = Seq((true), (false)).toDF("cond")
+    checkAnswer(
+      booleanDf.filter("cond = true").select(assert_true($"cond")),
+      Row(null) :: Nil
+    )
+    val e1 = intercept[SparkException] {
+      booleanDf.select(assert_true($"cond", 
lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val nullDf = Seq(("first row", None), ("second row", 
Some(true))).toDF("n", "cond")
+    checkAnswer(
+      nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+      Row(null) :: Nil
+    )
+    val e2 = intercept[SparkException] {
+      nullDf.select(assert_true($"cond", $"n")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "first row")
+
+    // assert_true(condition)
+    val intDf = Seq((0, 1)).toDF("a", "b")
+    checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+    val e3 = intercept[SparkException] {
+      intDf.select(assert_true($"a" > $"b")).collect()
+    }
+    assert(e3.getCause.isInstanceOf[RuntimeException])
+    assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+  }
+
   testGluten(
     "input_file_name, input_file_block_start and input_file_block_length " +
       "should fall back if scan falls back") {
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 4536aa540..6e2a9efa8 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,10 @@
 package org.apache.spark.sql
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
 import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, 
SystemParameters}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -39,6 +40,7 @@ import java.util.Locale
 import scala.collection.mutable.ArrayBuffer
 import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
+import scala.util.control.NonFatal
 
 /**
  * End-to-end test cases for SQL queries.
@@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite
       super.afterAll()
     }
   }
+
+  /**
+   * This method handles exceptions occurred during query execution as they 
may need special care to
+   * become comparable to the expected output.
+   *
+   * @param result
+   *   a function that returns a pair of schema and output
+   */
+  override protected def handleExceptions(
+      result: => (String, Seq[String])): (String, Seq[String]) = {
+    try {
+      result
+    } catch {
+      case a: AnalysisException =>
+        // Do not output the logical plan tree which contains expression IDs.
+        // Also implement a crude way of masking expression IDs in the error 
message
+        // with a generic pattern "###".
+        val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage
+        (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message 
contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the 
exception if it exists.
+        val cause = s.getCause
+        cause match {
+          case e: GlutenException =>
+            val reasonPattern = "Reason: (.*)".r
+            val reason = 
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+            reason match {
+              case Some(r) =>
+                (emptySchema, Seq(e.getClass.getName, r))
+              case None => (emptySchema, Seq())
+            }
+          case _ => (emptySchema, Seq(cause.getClass.getName, 
cause.getMessage))
+        }
+      case NonFatal(e) =>
+        // If there is an exception, put the exception class followed by the 
message.
+        (emptySchema, Seq(e.getClass.getName, e.getMessage))
+    }
+  }
 }
diff --git 
a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql 
b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git 
a/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out 
b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..d6d1289a5
--- /dev/null
+++ 
b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint        smallint        int     bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float  double  decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23 
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date   timestamp       interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1, 
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int>     map<int,int>    struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as 
boolean)' is not true!):void>
+-- !query output
+NULL   NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 0da19922f..57346f493 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -955,6 +955,11 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenFileSourceCharVarcharTestSuite]
   enableSuite[GlutenDSV2CharVarcharTestSuite]
   enableSuite[GlutenColumnExpressionSuite]
+    // Velox raise_error('errMsg') throws a velox_user_error exception with 
the message 'errMsg'.
+    // The final caught Spark exception's getCause().getMessage() contains 
'errMsg' but does not
+    // equal 'errMsg' exactly. The following two tests will be skipped and 
overridden in Gluten.
+    .exclude("raise_error")
+    .exclude("assert_true")
   enableSuite[GlutenComplexTypeSuite]
   enableSuite[GlutenConfigBehaviorSuite]
     // Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
  */
 package org.apache.spark.sql
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, 
lit, raise_error}
 
 class GlutenColumnExpressionSuite extends ColumnExpressionSuite with 
GlutenSQLTestsTrait {
   import testImplicits._
+  testGluten("raise_error") {
+    val strDf = Seq(("hello")).toDF("a")
+
+    val e1 = intercept[SparkException] {
+      strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val e2 = intercept[SparkException] {
+      strDf.select(raise_error($"a")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "hello")
+  }
+
+  testGluten("assert_true") {
+    // assert_true(condition, errMsgCol)
+    val booleanDf = Seq((true), (false)).toDF("cond")
+    checkAnswer(
+      booleanDf.filter("cond = true").select(assert_true($"cond")),
+      Row(null) :: Nil
+    )
+    val e1 = intercept[SparkException] {
+      booleanDf.select(assert_true($"cond", 
lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val nullDf = Seq(("first row", None), ("second row", 
Some(true))).toDF("n", "cond")
+    checkAnswer(
+      nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+      Row(null) :: Nil
+    )
+    val e2 = intercept[SparkException] {
+      nullDf.select(assert_true($"cond", $"n")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "first row")
+
+    // assert_true(condition)
+    val intDf = Seq((0, 1)).toDF("a", "b")
+    checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+    val e3 = intercept[SparkException] {
+      intDf.select(assert_true($"a" > $"b")).collect()
+    }
+    assert(e3.getCause.isInstanceOf[RuntimeException])
+    assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+  }
+
   testGluten(
     "input_file_name, input_file_block_start and input_file_block_length " +
       "should fall back if scan falls back") {
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 0ea1f13ec..8a291990e 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,12 @@
 package org.apache.spark.sql
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
 import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, 
SystemParameters}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
+import org.apache.spark.ErrorMessageFormat.MINIMAL
+import org.apache.spark.SparkThrowableHelper.getMessage
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -40,6 +43,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
+import scala.util.control.NonFatal
 
 /**
  * End-to-end test cases for SQL queries.
@@ -781,4 +785,50 @@ class GlutenSQLQueryTestSuite
       super.afterAll()
     }
   }
+
+  /**
+   * This method handles exceptions occurred during query execution as they 
may need special care to
+   * become comparable to the expected output.
+   *
+   * @param result
+   *   a function that returns a pair of schema and output
+   */
+  override protected def handleExceptions(
+      result: => (String, Seq[String])): (String, Seq[String]) = {
+    val format = MINIMAL
+    try {
+      result
+    } catch {
+      case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+        (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+      case a: AnalysisException =>
+        // Do not output the logical plan tree which contains expression IDs.
+        // Also implement a crude way of masking expression IDs in the error 
message
+        // with a generic pattern "###".
+        (emptySchema, Seq(a.getClass.getName, 
a.getSimpleMessage.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message 
contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the 
exception if it exists.
+        s.getCause match {
+          case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+            (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+          case e: GlutenException =>
+            val reasonPattern = "Reason: (.*)".r
+            val reason = 
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+            reason match {
+              case Some(r) =>
+                (emptySchema, Seq(e.getClass.getName, r))
+              case None => (emptySchema, Seq())
+            }
+
+          case cause =>
+            (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+        }
+      case NonFatal(e) =>
+        // If there is an exception, put the exception class followed by the 
message.
+        (emptySchema, Seq(e.getClass.getName, e.getMessage))
+    }
+  }
 }
diff --git 
a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql 
b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql
new file mode 100644
index 000000000..907ff3300
--- /dev/null
+++ b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql
@@ -0,0 +1,22 @@
+-- test for misc functions
+
+-- typeof
+select typeof(null);
+select typeof(true);
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L);
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2);
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days');
+select typeof(x'ABCD'), typeof('SPARK');
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'));
+
+-- Spark-32793: Rewrite AssertTrue with RaiseError
+SELECT assert_true(true), assert_true(boolean(1));
+SELECT assert_true(false);
+SELECT assert_true(boolean(0));
+SELECT assert_true(null);
+SELECT assert_true(boolean(null));
+SELECT assert_true(false, 'custom error message');
+
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v);
+SELECT raise_error('error message');
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc;
diff --git 
a/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out 
b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out
new file mode 100644
index 000000000..d6d1289a5
--- /dev/null
+++ 
b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -0,0 +1,134 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select typeof(null)
+-- !query schema
+struct<typeof(NULL):string>
+-- !query output
+void
+
+
+-- !query
+select typeof(true)
+-- !query schema
+struct<typeof(true):string>
+-- !query output
+boolean
+
+
+-- !query
+select typeof(1Y), typeof(1S), typeof(1), typeof(1L)
+-- !query schema
+struct<typeof(1):string,typeof(1):string,typeof(1):string,typeof(1):string>
+-- !query output
+tinyint        smallint        int     bigint
+
+
+-- !query
+select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2)
+-- !query schema
+struct<typeof(CAST(1.0 AS FLOAT)):string,typeof(1.0):string,typeof(1.2):string>
+-- !query output
+float  double  decimal(2,1)
+
+
+-- !query
+select typeof(date '1986-05-23'),  typeof(timestamp '1986-05-23'), 
typeof(interval '23 days')
+-- !query schema
+struct<typeof(DATE '1986-05-23'):string,typeof(TIMESTAMP '1986-05-23 
00:00:00'):string,typeof(INTERVAL '23' DAY):string>
+-- !query output
+date   timestamp       interval day
+
+
+-- !query
+select typeof(x'ABCD'), typeof('SPARK')
+-- !query schema
+struct<typeof(X'ABCD'):string,typeof(SPARK):string>
+-- !query output
+binary string
+
+
+-- !query
+select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 
'b', 'spark'))
+-- !query schema
+struct<typeof(array(1, 2)):string,typeof(map(1, 
2)):string,typeof(named_struct(a, 1, b, spark)):string>
+-- !query output
+array<int>     map<int,int>    struct<a:int,b:string>
+
+
+-- !query
+SELECT assert_true(true), assert_true(boolean(1))
+-- !query schema
+struct<assert_true(true, 'true' is not true!):void,assert_true(1, 'cast(1 as 
boolean)' is not true!):void>
+-- !query output
+NULL   NULL
+
+
+-- !query
+SELECT assert_true(false)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'false' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(0 as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'null' is not true!
+
+
+-- !query
+SELECT assert_true(boolean(null))
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+'cast(null as boolean)' is not true!
+
+
+-- !query
+SELECT assert_true(false, 'custom error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+custom error message
+
+
+-- !query
+CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT raise_error('error message')
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+error message
+
+
+-- !query
+SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc
+-- !query schema
+struct<>
+-- !query output
+org.apache.gluten.exception.GlutenException
+too big: 8
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e54aca34e..9716a7c14 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -969,6 +969,11 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenFileSourceCharVarcharTestSuite]
   enableSuite[GlutenDSV2CharVarcharTestSuite]
   enableSuite[GlutenColumnExpressionSuite]
+    // Velox raise_error('errMsg') throws a velox_user_error exception with 
the message 'errMsg'.
+    // The final caught Spark exception's getCause().getMessage() contains 
'errMsg' but does not
+    // equal 'errMsg' exactly. The following two tests will be skipped and 
overridden in Gluten.
+    .exclude("raise_error")
+    .exclude("assert_true")
   enableSuite[GlutenComplexTypeSuite]
   enableSuite[GlutenConfigBehaviorSuite]
     // Will be fixed by cleaning up ColumnarShuffleExchangeExec.
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
index da22e60f9..437cef292 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala
@@ -16,11 +16,60 @@
  */
 package org.apache.spark.sql
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.ProjectExec
-import org.apache.spark.sql.functions.{expr, input_file_name}
+import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, 
lit, raise_error}
 
 class GlutenColumnExpressionSuite extends ColumnExpressionSuite with 
GlutenSQLTestsTrait {
   import testImplicits._
+  testGluten("raise_error") {
+    val strDf = Seq(("hello")).toDF("a")
+
+    val e1 = intercept[SparkException] {
+      strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val e2 = intercept[SparkException] {
+      strDf.select(raise_error($"a")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "hello")
+  }
+
+  testGluten("assert_true") {
+    // assert_true(condition, errMsgCol)
+    val booleanDf = Seq((true), (false)).toDF("cond")
+    checkAnswer(
+      booleanDf.filter("cond = true").select(assert_true($"cond")),
+      Row(null) :: Nil
+    )
+    val e1 = intercept[SparkException] {
+      booleanDf.select(assert_true($"cond", 
lit(null.asInstanceOf[String]))).collect()
+    }
+    assert(e1.getCause.isInstanceOf[RuntimeException])
+
+    val nullDf = Seq(("first row", None), ("second row", 
Some(true))).toDF("n", "cond")
+    checkAnswer(
+      nullDf.filter("cond = true").select(assert_true($"cond", $"cond")),
+      Row(null) :: Nil
+    )
+    val e2 = intercept[SparkException] {
+      nullDf.select(assert_true($"cond", $"n")).collect()
+    }
+    assert(e2.getCause.isInstanceOf[RuntimeException])
+    assert(e2.getCause.getMessage contains "first row")
+
+    // assert_true(condition)
+    val intDf = Seq((0, 1)).toDF("a", "b")
+    checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil)
+    val e3 = intercept[SparkException] {
+      intDf.select(assert_true($"a" > $"b")).collect()
+    }
+    assert(e3.getCause.isInstanceOf[RuntimeException])
+    assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!")
+  }
+
   testGluten(
     "input_file_name, input_file_block_start and input_file_block_length " +
       "should fall back if scan falls back") {
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index b1f3945bf..8a6f5f32f 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -17,9 +17,12 @@
 package org.apache.spark.sql
 
 import org.apache.gluten.GlutenConfig
+import org.apache.gluten.exception.GlutenException
 import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, 
SystemParameters}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
+import org.apache.spark.ErrorMessageFormat.MINIMAL
+import org.apache.spark.SparkThrowableHelper.getMessage
 import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -40,6 +43,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.sys.process.{Process, ProcessLogger}
 import scala.util.Try
+import scala.util.control.NonFatal
 
 /**
  * End-to-end test cases for SQL queries.
@@ -783,4 +787,50 @@ class GlutenSQLQueryTestSuite
       super.afterAll()
     }
   }
+
+  /**
+   * This method handles exceptions occurred during query execution as they 
may need special care to
+   * become comparable to the expected output.
+   *
+   * @param result
+   *   a function that returns a pair of schema and output
+   */
+  override protected def handleExceptions(
+      result: => (String, Seq[String])): (String, Seq[String]) = {
+    val format = MINIMAL
+    try {
+      result
+    } catch {
+      case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+        (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+      case a: AnalysisException =>
+        // Do not output the logical plan tree which contains expression IDs.
+        // Also implement a crude way of masking expression IDs in the error 
message
+        // with a generic pattern "###".
+        (emptySchema, Seq(a.getClass.getName, 
a.getSimpleMessage.replaceAll("#\\d+", "#x")))
+      case s: SparkException if s.getCause != null =>
+        // For a runtime exception, it is hard to match because its message 
contains
+        // information of stage, task ID, etc.
+        // To make result matching simpler, here we match the cause of the 
exception if it exists.
+        s.getCause match {
+          case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+            (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+          case e: GlutenException =>
+            val reasonPattern = "Reason: (.*)".r
+            val reason = 
reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1))
+
+            reason match {
+              case Some(r) =>
+                (emptySchema, Seq(e.getClass.getName, r))
+              case None => (emptySchema, Seq())
+            }
+
+          case cause =>
+            (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+        }
+      case NonFatal(e) =>
+        // If there is an exception, put the exception class followed by the 
message.
+        (emptySchema, Seq(e.getClass.getName, e.getMessage))
+    }
+  }
 }
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 0b31ec346..41bc86749 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -316,6 +316,7 @@ object ExpressionNames {
   final val MONOTONICALLY_INCREASING_ID = "monotonically_increasing_id"
   final val WIDTH_BUCKET = "width_bucket"
   final val REPLICATE_ROWS = "replicaterows"
+  final val RAISE_ERROR = "raise_error"
 
   // Directly use child expression transformer
   final val KNOWN_NULLABLE = "known_nullable"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Support Spark assert_true function (#6329)

Reply via email to