This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 36f0a8fc75 [GLUTEN-8215][VL] Support cast timestamp to date (#8212)
36f0a8fc75 is described below

commit 36f0a8fc75d08d409ffa538af8cc4781f97d15d0
Author: Mingliang Zhu <[email protected]>
AuthorDate: Tue Dec 17 14:32:24 2024 +0800

    [GLUTEN-8215][VL] Support cast timestamp to date (#8212)
---
 .../gluten/execution/MiscOperatorSuite.scala       |  7 ++
 .../substrait/SubstraitToVeloxPlanValidator.cc     | 11 ++-
 .../gluten/utils/velox/VeloxTestSettings.scala     |  3 +
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 89 ++++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |  3 +
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 89 ++++++++++++++++++++++
 .../gluten/utils/velox/VeloxTestSettings.scala     |  3 +
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 89 ++++++++++++++++++++++
 .../sql/catalyst/expressions/GlutenCastSuite.scala | 15 ++--
 .../gluten/utils/velox/VeloxTestSettings.scala     |  3 +
 .../spark/sql/GlutenDateFunctionsSuite.scala       | 89 ++++++++++++++++++++++
 11 files changed, 388 insertions(+), 13 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index 8063a5d122..989def88e7 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -1791,6 +1791,13 @@ class MiscOperatorSuite extends 
VeloxWholeStageTransformerSuite with AdaptiveSpa
     assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
   }
 
+  test("cast timestamp to date") {
+    val query = "select cast(ts as date) from values (timestamp'2024-01-01 
00:00:00') as tab(ts)"
+    runQueryAndCompare(query) {
+      checkGlutenOperatorMatch[ProjectExecTransformer]
+    }
+  }
+
   test("timestamp broadcast join") {
     spark.range(0, 5).createOrReplaceTempView("right")
     spark.sql("SELECT id, timestamp_micros(id) as ts from 
right").createOrReplaceTempView("left")
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 84dfe68e2d..996b3bdce0 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -299,10 +299,13 @@ bool SubstraitToVeloxPlanValidator::validateCast(
     case TypeKind::VARBINARY:
       LOG_VALIDATION_MSG("Invalid input type in casting: 
ARRAY/MAP/ROW/VARBINARY.");
       return false;
-    case TypeKind::TIMESTAMP: {
-      LOG_VALIDATION_MSG("Casting from TIMESTAMP is not supported or has 
incorrect result.");
-      return false;
-    }
+    case TypeKind::TIMESTAMP:
+      // Only support cast timestamp to date
+      if (!toType->isDate()) {
+        LOG_VALIDATION_MSG(
+            "Casting from TIMESTAMP to " + toType->toString() + " is not 
supported or has incorrect result.");
+        return false;
+      }
     default: {
     }
   }
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15495270a1..2c6b882850 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Legacy mode is not supported and velox getTimestamp function does not 
throw
+    // exception when format is "yyyy-dd-aa".
+    .exclude("function to_date")
   enableSuite[GlutenDataFrameFunctionsSuite]
     // blocked by Velox-5768
     .exclude("aggregate function - array for primitive type containing null")
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320dd..5ddfe6fc1f 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("function to_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val s1 = "2015-07-22 10:00:00"
+    val s2 = "2014-12-31"
+    val s3 = "2014-31-12"
+    val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+    checkAnswer(
+      df.select(to_date(col("t"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("s"))),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    checkAnswer(
+      df.selectExpr("to_date(t)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(d)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(s)"),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    // now with format
+    checkAnswer(
+      df.select(to_date(col("t"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+    withSQLConf(confKey -> "corrected") {
+      checkAnswer(
+        df.select(to_date(col("s"), "yyyy-MM-dd")),
+        Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+    }
+    // legacyParserPolicy is not respected by Gluten.
+    // withSQLConf(confKey -> "exception") {
+    //   checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+    // }
+
+    // now switch format
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-dd-MM")),
+      Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+    // invalid format
+    checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), 
Row(null), Row(null)))
+    // velox getTimestamp function does not throw exception when format is 
"yyyy-dd-aa".
+    // val e =
+    //   intercept[SparkUpgradeException](df.select(to_date(col("s"), 
"yyyy-dd-aa")).collect())
+    // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+    // assert(
+    //   e.getMessage.contains("You may get a different result due to the 
upgrading to Spark"))
+
+    // February
+    val x1 = "2016-02-29"
+    val x2 = "2017-02-29"
+    val df1 = Seq(x1, x2).toDF("x")
+    checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) 
:: Row(null) :: Nil)
+  }
 }
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 407b9c8b95..f83b91ede1 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Legacy mode is not supported and velox getTimestamp function does not 
throw
+    // exception when format is "yyyy-dd-aa".
+    .exclude("function to_date")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("function to_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val s1 = "2015-07-22 10:00:00"
+    val s2 = "2014-12-31"
+    val s3 = "2014-31-12"
+    val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+    checkAnswer(
+      df.select(to_date(col("t"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("s"))),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    checkAnswer(
+      df.selectExpr("to_date(t)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(d)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(s)"),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    // now with format
+    checkAnswer(
+      df.select(to_date(col("t"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+    withSQLConf(confKey -> "corrected") {
+      checkAnswer(
+        df.select(to_date(col("s"), "yyyy-MM-dd")),
+        Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+    }
+    // legacyParserPolicy is not respected by Gluten.
+    // withSQLConf(confKey -> "exception") {
+    //   checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+    // }
+
+    // now switch format
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-dd-MM")),
+      Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+    // invalid format
+    checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), 
Row(null), Row(null)))
+    // velox getTimestamp function does not throw exception when format is 
"yyyy-dd-aa".
+    // val e =
+    //   intercept[SparkUpgradeException](df.select(to_date(col("s"), 
"yyyy-dd-aa")).collect())
+    // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+    // assert(
+    //   e.getMessage.contains("You may get a different result due to the 
upgrading to Spark"))
+
+    // February
+    val x1 = "2016-02-29"
+    val x2 = "2017-02-29"
+    val df1 = Seq(x1, x2).toDF("x")
+    checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) 
:: Row(null) :: Nil)
+  }
 }
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index dbb01fbe70..b0446d3ca7 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Legacy mode is not supported and velox getTimestamp function does not 
throw
+    // exception when format is "yyyy-dd-aa".
+    .exclude("function to_date")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("function to_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val s1 = "2015-07-22 10:00:00"
+    val s2 = "2014-12-31"
+    val s3 = "2014-31-12"
+    val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+    checkAnswer(
+      df.select(to_date(col("t"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("s"))),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    checkAnswer(
+      df.selectExpr("to_date(t)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(d)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(s)"),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    // now with format
+    checkAnswer(
+      df.select(to_date(col("t"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+    withSQLConf(confKey -> "corrected") {
+      checkAnswer(
+        df.select(to_date(col("s"), "yyyy-MM-dd")),
+        Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+    }
+    // legacyParserPolicy is not respected by Gluten.
+    // withSQLConf(confKey -> "exception") {
+    //   checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+    // }
+
+    // now switch format
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-dd-MM")),
+      Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+    // invalid format
+    checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), 
Row(null), Row(null)))
+    // velox getTimestamp function does not throw exception when format is 
"yyyy-dd-aa".
+    // val e =
+    //   intercept[SparkUpgradeException](df.select(to_date(col("s"), 
"yyyy-dd-aa")).collect())
+    // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+    // assert(
+    //   e.getMessage.contains("You may get a different result due to the 
upgrading to Spark"))
+
+    // February
+    val x1 = "2016-02-29"
+    val x2 = "2017-02-29"
+    val df1 = Seq(x1, x2).toDF("x")
+    checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) 
:: Row(null) :: Nil)
+  }
 }
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
index b8ac906d80..f2a83bf234 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
@@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with 
GlutenTestsTrait {
 
   testGluten("missing cases - from boolean") {
     (DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach {
-      t =>
-        t match {
-          case BooleanType =>
-            checkEvaluation(cast(cast(true, BooleanType), t), true)
-            checkEvaluation(cast(cast(false, BooleanType), t), false)
-          case _ =>
-            checkEvaluation(cast(cast(true, BooleanType), t), 1)
-            checkEvaluation(cast(cast(false, BooleanType), t), 0)
-        }
+      case t @ BooleanType =>
+        checkEvaluation(cast(cast(true, BooleanType), t), true)
+        checkEvaluation(cast(cast(false, BooleanType), t), false)
+      case t =>
+        checkEvaluation(cast(cast(true, BooleanType), t), 1)
+        checkEvaluation(cast(cast(false, BooleanType), t), 0)
     }
   }
 
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f5a1a07695..a01d0cb4b3 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("to_timestamp")
     // Legacy mode is not supported, assuming this mode is not commonly used.
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+    // Legacy mode is not supported and velox getTimestamp function does not 
throw
+    // exception when format is "yyyy-dd-aa".
+    .exclude("function to_date")
   enableSuite[GlutenDeprecatedAPISuite]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
   enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite 
with GlutenSQLTestsTra
         }
     }
   }
+
+  testGluten("function to_date") {
+    val d1 = Date.valueOf("2015-07-22")
+    val d2 = Date.valueOf("2015-07-01")
+    val d3 = Date.valueOf("2014-12-31")
+    val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+    val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+    val s1 = "2015-07-22 10:00:00"
+    val s2 = "2014-12-31"
+    val s3 = "2014-31-12"
+    val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+    checkAnswer(
+      df.select(to_date(col("t"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"))),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("s"))),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    checkAnswer(
+      df.selectExpr("to_date(t)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(d)"),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.selectExpr("to_date(s)"),
+      Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), 
Row(null)))
+
+    // now with format
+    checkAnswer(
+      df.select(to_date(col("t"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2014-12-31")),
+        Row(Date.valueOf("2014-12-31"))))
+    checkAnswer(
+      df.select(to_date(col("d"), "yyyy-MM-dd")),
+      Seq(
+        Row(Date.valueOf("2015-07-22")),
+        Row(Date.valueOf("2015-07-01")),
+        Row(Date.valueOf("2014-12-31"))))
+    val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+    withSQLConf(confKey -> "corrected") {
+      checkAnswer(
+        df.select(to_date(col("s"), "yyyy-MM-dd")),
+        Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+    }
+    // legacyParserPolicy is not respected by Gluten.
+    // withSQLConf(confKey -> "exception") {
+    //   checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+    // }
+
+    // now switch format
+    checkAnswer(
+      df.select(to_date(col("s"), "yyyy-dd-MM")),
+      Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+    // invalid format
+    checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), 
Row(null), Row(null)))
+    // velox getTimestamp function does not throw exception when format is 
"yyyy-dd-aa".
+    // val e =
+    //   intercept[SparkUpgradeException](df.select(to_date(col("s"), 
"yyyy-dd-aa")).collect())
+    // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+    // assert(
+    //   e.getMessage.contains("You may get a different result due to the 
upgrading to Spark"))
+
+    // February
+    val x1 = "2016-02-29"
+    val x2 = "2017-02-29"
+    val df1 = Seq(x1, x2).toDF("x")
+    checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) 
:: Row(null) :: Nil)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to