This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 36f0a8fc75 [GLUTEN-8215][VL] Support cast timestamp to date (#8212)
36f0a8fc75 is described below
commit 36f0a8fc75d08d409ffa538af8cc4781f97d15d0
Author: Mingliang Zhu <[email protected]>
AuthorDate: Tue Dec 17 14:32:24 2024 +0800
[GLUTEN-8215][VL] Support cast timestamp to date (#8212)
---
.../gluten/execution/MiscOperatorSuite.scala | 7 ++
.../substrait/SubstraitToVeloxPlanValidator.cc | 11 ++-
.../gluten/utils/velox/VeloxTestSettings.scala | 3 +
.../spark/sql/GlutenDateFunctionsSuite.scala | 89 ++++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 3 +
.../spark/sql/GlutenDateFunctionsSuite.scala | 89 ++++++++++++++++++++++
.../gluten/utils/velox/VeloxTestSettings.scala | 3 +
.../spark/sql/GlutenDateFunctionsSuite.scala | 89 ++++++++++++++++++++++
.../sql/catalyst/expressions/GlutenCastSuite.scala | 15 ++--
.../gluten/utils/velox/VeloxTestSettings.scala | 3 +
.../spark/sql/GlutenDateFunctionsSuite.scala | 89 ++++++++++++++++++++++
11 files changed, 388 insertions(+), 13 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index 8063a5d122..989def88e7 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -1791,6 +1791,13 @@ class MiscOperatorSuite extends
VeloxWholeStageTransformerSuite with AdaptiveSpa
assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
}
+ test("cast timestamp to date") {
+ val query = "select cast(ts as date) from values (timestamp'2024-01-01
00:00:00') as tab(ts)"
+ runQueryAndCompare(query) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+
test("timestamp broadcast join") {
spark.range(0, 5).createOrReplaceTempView("right")
spark.sql("SELECT id, timestamp_micros(id) as ts from
right").createOrReplaceTempView("left")
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 84dfe68e2d..996b3bdce0 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -299,10 +299,13 @@ bool SubstraitToVeloxPlanValidator::validateCast(
case TypeKind::VARBINARY:
LOG_VALIDATION_MSG("Invalid input type in casting:
ARRAY/MAP/ROW/VARBINARY.");
return false;
- case TypeKind::TIMESTAMP: {
- LOG_VALIDATION_MSG("Casting from TIMESTAMP is not supported or has
incorrect result.");
- return false;
- }
+ case TypeKind::TIMESTAMP:
+ // Only support cast timestamp to date
+ if (!toType->isDate()) {
+ LOG_VALIDATION_MSG(
+ "Casting from TIMESTAMP to " + toType->toString() + " is not
supported or has incorrect result.");
+ return false;
+ }
default: {
}
}
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15495270a1..2c6b882850 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not
throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320dd..5ddfe6fc1f 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null),
Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is
"yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"),
"yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the
upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29"))
:: Row(null) :: Nil)
+ }
}
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 407b9c8b95..f83b91ede1 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not
throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null),
Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is
"yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"),
"yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the
upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29"))
:: Row(null) :: Nil)
+ }
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index dbb01fbe70..b0446d3ca7 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not
throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null),
Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is
"yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"),
"yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the
upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29"))
:: Row(null) :: Nil)
+ }
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
index b8ac906d80..f2a83bf234 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
@@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with
GlutenTestsTrait {
testGluten("missing cases - from boolean") {
(DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach {
- t =>
- t match {
- case BooleanType =>
- checkEvaluation(cast(cast(true, BooleanType), t), true)
- checkEvaluation(cast(cast(false, BooleanType), t), false)
- case _ =>
- checkEvaluation(cast(cast(true, BooleanType), t), 1)
- checkEvaluation(cast(cast(false, BooleanType), t), 0)
- }
+ case t @ BooleanType =>
+ checkEvaluation(cast(cast(true, BooleanType), t), true)
+ checkEvaluation(cast(cast(false, BooleanType), t), false)
+ case t =>
+ checkEvaluation(cast(cast(true, BooleanType), t), 1)
+ checkEvaluation(cast(cast(false, BooleanType), t), 0)
}
}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f5a1a07695..a01d0cb4b3 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not
throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de43..ae86c9d06e 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")),
Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null),
Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is
"yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"),
"yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the
upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29"))
:: Row(null) :: Nil)
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]