This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 70c3259341 [GLUTEN-8581][VL] Fix Spark legacy date formatter under
case insensitive configuration (#8583)
70c3259341 is described below
commit 70c32593419a4be48b5a066df379b1f91fcc6565
Author: Xiuli Wei <[email protected]>
AuthorDate: Sat Jan 25 19:22:06 2025 +0800
[GLUTEN-8581][VL] Fix Spark legacy date formatter under case insensitive
configuration (#8583)
---
cpp/velox/compute/WholeStageResultIterator.cc | 3 +-
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenDateFunctionsSuite.scala | 63 ++--------------------
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenDateFunctionsSuite.scala | 62 ++-------------------
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenDateFunctionsSuite.scala | 61 ++-------------------
.../gluten/utils/velox/VeloxTestSettings.scala | 2 -
.../spark/sql/GlutenDateFunctionsSuite.scala | 62 ++-------------------
.../org/apache/gluten/config/GlutenConfig.scala | 7 +++
10 files changed, 25 insertions(+), 241 deletions(-)
diff --git a/cpp/velox/compute/WholeStageResultIterator.cc
b/cpp/velox/compute/WholeStageResultIterator.cc
index 20f5485664..f7062c71f3 100644
--- a/cpp/velox/compute/WholeStageResultIterator.cc
+++ b/cpp/velox/compute/WholeStageResultIterator.cc
@@ -555,7 +555,8 @@ std::unordered_map<std::string, std::string>
WholeStageResultIterator::getQueryC
configs[velox::core::QueryConfig::kSparkPartitionId] =
std::to_string(taskInfo_.partitionId);
- // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy
is set to 'LEGACY'.
+ // Enable Spark legacy date formatter if spark.sql.legacy.timeParserPolicy
is set to 'LEGACY'
+ // or 'legacy'
if (veloxCfg_->get<std::string>(kSparkLegacyTimeParserPolicy, "") ==
"LEGACY") {
configs[velox::core::QueryConfig::kSparkLegacyDateFormatter] = "true";
} else {
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index b08f4300f8..b7dbff4fb6 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -263,8 +263,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported datetime format: specifier X is not supported by velox.
.exclude("to_timestamp with microseconds precision")
- // Replaced by another test.
- .exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not
throw
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 5ddfe6fc1f..aa94dc50c4 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -114,19 +114,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"),
Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)),
Row(null)))
- // legacyParserPolicy is not respected by Gluten.
// invalid format
- // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
- // if (legacyParserPolicy == "legacy") {
- // checkAnswer(invalid,
- // Seq(Row(null), Row(null), Row(null), Row(null)))
- // } else {
- // val e = intercept[SparkUpgradeException](invalid.collect())
- // assert(e.getCause.isInstanceOf[IllegalArgumentException])
- // assert(e.getMessage.contains(
- // "You may get a different result due to the upgrading to
Spark"))
- // }
-
+ val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
// February
val y1 = "2016-02-29"
val y2 = "2017-02-29"
@@ -198,53 +188,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
- // Not consistent behavior with gluten + velox.
- // invalid format
- // val invalid = df1.selectExpr(s"to_unix_timestamp(x,
'yyyy-MM-dd bb:HH:ss')")
- // val e =
intercept[IllegalArgumentException](invalid.collect())
- // assert(e.getMessage.contains('b'))
- }
- }
- }
-
- // Ported from spark with a test case for legacy mode removed.
- testGluten("to_timestamp") {
- Seq("legacy", "corrected").foreach {
- legacyParserPolicy =>
- withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
legacyParserPolicy) {
- val date1 = Date.valueOf("2015-07-24")
- val date2 = Date.valueOf("2015-07-25")
- val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
- val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
- val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
- val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
- val s1 = "2015/07/24 10:00:00.5"
- val s2 = "2015/07/25 02:02:02.6"
- val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
- val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
- val ss1 = "2015-07-24 10:00:00"
- val ss2 = "2015-07-25 02:02:02"
- val fmt = "yyyy/MM/dd HH:mm:ss.S"
- val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d",
"ts", "s", "ss")
-
- checkAnswer(
- df.select(to_timestamp(col("ss"))),
- df.select(timestamp_seconds(unix_timestamp(col("ss")))))
- checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1),
Row(ts2)))
- if (legacyParserPolicy == "legacy") {
- // In Spark 2.4 and earlier, to_timestamp() parses in seconds
precision and cuts off
- // the fractional part of seconds. The behavior was changed by
SPARK-27438.
- // Ignore this test case. Velox returns null for such case.
- // val legacyFmt = "yyyy/MM/dd HH:mm:ss"
- // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
- // Row(ts1), Row(ts2)))
- } else {
- checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m),
Row(ts2m)))
- }
- checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1),
Row(ts2)))
- checkAnswer(
- df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
- Seq(Row(ts_date1), Row(ts_date2)))
+ val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd
bb:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
}
}
}
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e3fc20503b..079086f2b3 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1072,8 +1072,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported datetime format: specifier X is not supported by velox.
.exclude("to_timestamp with microseconds precision")
- // Replaced by another test.
- .exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not
throw
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index ae86c9d06e..f9c5995caf 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -112,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"),
Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)),
Row(null)))
- // legacyParserPolicy is not respected by Gluten.
// invalid format
- // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
- // if (legacyParserPolicy == "legacy") {
- // checkAnswer(invalid,
- // Seq(Row(null), Row(null), Row(null), Row(null)))
- // } else {
- // val e = intercept[SparkUpgradeException](invalid.collect())
- // assert(e.getCause.isInstanceOf[IllegalArgumentException])
- // assert( e.getMessage.contains(
- // "You may get a different result due to the upgrading to
Spark"))
- // }
+ val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
// February
val y1 = "2016-02-29"
@@ -196,53 +187,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
- // Not consistent behavior with gluten + velox.
- // invalid format
- // val invalid = df1.selectExpr(s"to_unix_timestamp(x,
'yyyy-MM-dd bb:HH:ss')")
- // val e =
intercept[IllegalArgumentException](invalid.collect())
- // assert(e.getMessage.contains('b'))
- }
- }
- }
-
- // Ported from spark with a test case for legacy mode removed.
- testGluten("to_timestamp") {
- Seq("legacy", "corrected").foreach {
- legacyParserPolicy =>
- withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
legacyParserPolicy) {
- val date1 = Date.valueOf("2015-07-24")
- val date2 = Date.valueOf("2015-07-25")
- val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
- val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
- val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
- val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
- val s1 = "2015/07/24 10:00:00.5"
- val s2 = "2015/07/25 02:02:02.6"
- val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
- val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
- val ss1 = "2015-07-24 10:00:00"
- val ss2 = "2015-07-25 02:02:02"
- val fmt = "yyyy/MM/dd HH:mm:ss.S"
- val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d",
"ts", "s", "ss")
-
- checkAnswer(
- df.select(to_timestamp(col("ss"))),
- df.select(timestamp_seconds(unix_timestamp(col("ss")))))
- checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1),
Row(ts2)))
- if (legacyParserPolicy == "legacy") {
- // In Spark 2.4 and earlier, to_timestamp() parses in seconds
precision and cuts off
- // the fractional part of seconds. The behavior was changed by
SPARK-27438.
- // Ignore this test case. Velox returns null for such case.
- // val legacyFmt = "yyyy/MM/dd HH:mm:ss"
- // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
- // Row(ts1), Row(ts2)))
- } else {
- checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m),
Row(ts2m)))
- }
- checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1),
Row(ts2)))
- checkAnswer(
- df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
- Seq(Row(ts_date1), Row(ts_date2)))
+ val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd
bb:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
}
}
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 06e8309baf..24c5edeed7 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1102,8 +1102,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported datetime format: specifier X is not supported by velox.
.exclude("to_timestamp with microseconds precision")
- // Replaced by another test.
- .exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not
throw
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index ae86c9d06e..d66f26d7c0 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -112,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"),
Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)),
Row(null)))
- // legacyParserPolicy is not respected by Gluten.
// invalid format
- // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
- // if (legacyParserPolicy == "legacy") {
- // checkAnswer(invalid,
- // Seq(Row(null), Row(null), Row(null), Row(null)))
- // } else {
- // val e = intercept[SparkUpgradeException](invalid.collect())
- // assert(e.getCause.isInstanceOf[IllegalArgumentException])
- // assert( e.getMessage.contains(
- // "You may get a different result due to the upgrading to
Spark"))
- // }
+ val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
// February
val y1 = "2016-02-29"
@@ -196,53 +187,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
- // Not consistent behavior with gluten + velox.
// invalid format
- // val invalid = df1.selectExpr(s"to_unix_timestamp(x,
'yyyy-MM-dd bb:HH:ss')")
- // val e =
intercept[IllegalArgumentException](invalid.collect())
- // assert(e.getMessage.contains('b'))
- }
- }
- }
-
- // Ported from spark with a test case for legacy mode removed.
- testGluten("to_timestamp") {
- Seq("legacy", "corrected").foreach {
- legacyParserPolicy =>
- withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
legacyParserPolicy) {
- val date1 = Date.valueOf("2015-07-24")
- val date2 = Date.valueOf("2015-07-25")
- val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
- val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
- val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
- val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
- val s1 = "2015/07/24 10:00:00.5"
- val s2 = "2015/07/25 02:02:02.6"
- val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
- val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
- val ss1 = "2015-07-24 10:00:00"
- val ss2 = "2015-07-25 02:02:02"
- val fmt = "yyyy/MM/dd HH:mm:ss.S"
- val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d",
"ts", "s", "ss")
-
- checkAnswer(
- df.select(to_timestamp(col("ss"))),
- df.select(timestamp_seconds(unix_timestamp(col("ss")))))
- checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1),
Row(ts2)))
- if (legacyParserPolicy == "legacy") {
- // In Spark 2.4 and earlier, to_timestamp() parses in seconds
precision and cuts off
- // the fractional part of seconds. The behavior was changed by
SPARK-27438.
- // Ignore this test case. Velox returns null for such case.
- // val legacyFmt = "yyyy/MM/dd HH:mm:ss"
- // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
- // Row(ts1), Row(ts2)))
- } else {
- checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m),
Row(ts2m)))
- }
- checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1),
Row(ts2)))
- checkAnswer(
- df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
- Seq(Row(ts_date1), Row(ts_date2)))
+ val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd
bb:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
}
}
}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 30dda10999..c8a18d6881 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1124,8 +1124,6 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_unix_timestamp")
// Unsupported datetime format: specifier X is not supported by velox.
.exclude("to_timestamp with microseconds precision")
- // Replaced by another test.
- .exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
// Legacy mode is not supported and velox getTimestamp function does not
throw
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index ae86c9d06e..f9c5995caf 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -112,18 +112,9 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd mm:HH:ss')"),
Seq(Row(secs(ts4.getTime)), Row(null), Row(secs(ts3.getTime)),
Row(null)))
- // legacyParserPolicy is not respected by Gluten.
// invalid format
- // val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
- // if (legacyParserPolicy == "legacy") {
- // checkAnswer(invalid,
- // Seq(Row(null), Row(null), Row(null), Row(null)))
- // } else {
- // val e = intercept[SparkUpgradeException](invalid.collect())
- // assert(e.getCause.isInstanceOf[IllegalArgumentException])
- // assert( e.getMessage.contains(
- // "You may get a different result due to the upgrading to
Spark"))
- // }
+ val invalid = df1.selectExpr(s"unix_timestamp(x, 'yyyy-MM-dd
aa:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
// February
val y1 = "2016-02-29"
@@ -196,53 +187,8 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite
with GlutenSQLTestsTra
df2.select(unix_timestamp(col("y"), "yyyy-MM-dd")),
Seq(Row(secs(ts5.getTime)), Row(null)))
- // Not consistent behavior with gluten + velox.
- // invalid format
- // val invalid = df1.selectExpr(s"to_unix_timestamp(x,
'yyyy-MM-dd bb:HH:ss')")
- // val e =
intercept[IllegalArgumentException](invalid.collect())
- // assert(e.getMessage.contains('b'))
- }
- }
- }
-
- // Ported from spark with a test case for legacy mode removed.
- testGluten("to_timestamp") {
- Seq("legacy", "corrected").foreach {
- legacyParserPolicy =>
- withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key ->
legacyParserPolicy) {
- val date1 = Date.valueOf("2015-07-24")
- val date2 = Date.valueOf("2015-07-25")
- val ts_date1 = Timestamp.valueOf("2015-07-24 00:00:00")
- val ts_date2 = Timestamp.valueOf("2015-07-25 00:00:00")
- val ts1 = Timestamp.valueOf("2015-07-24 10:00:00")
- val ts2 = Timestamp.valueOf("2015-07-25 02:02:02")
- val s1 = "2015/07/24 10:00:00.5"
- val s2 = "2015/07/25 02:02:02.6"
- val ts1m = Timestamp.valueOf("2015-07-24 10:00:00.5")
- val ts2m = Timestamp.valueOf("2015-07-25 02:02:02.6")
- val ss1 = "2015-07-24 10:00:00"
- val ss2 = "2015-07-25 02:02:02"
- val fmt = "yyyy/MM/dd HH:mm:ss.S"
- val df = Seq((date1, ts1, s1, ss1), (date2, ts2, s2, ss2)).toDF("d",
"ts", "s", "ss")
-
- checkAnswer(
- df.select(to_timestamp(col("ss"))),
- df.select(timestamp_seconds(unix_timestamp(col("ss")))))
- checkAnswer(df.select(to_timestamp(col("ss"))), Seq(Row(ts1),
Row(ts2)))
- if (legacyParserPolicy == "legacy") {
- // In Spark 2.4 and earlier, to_timestamp() parses in seconds
precision and cuts off
- // the fractional part of seconds. The behavior was changed by
SPARK-27438.
- // Ignore this test case. Velox returns null for such case.
- // val legacyFmt = "yyyy/MM/dd HH:mm:ss"
- // checkAnswer(df.select(to_timestamp(col("s"), legacyFmt)), Seq(
- // Row(ts1), Row(ts2)))
- } else {
- checkAnswer(df.select(to_timestamp(col("s"), fmt)), Seq(Row(ts1m),
Row(ts2m)))
- }
- checkAnswer(df.select(to_timestamp(col("ts"), fmt)), Seq(Row(ts1),
Row(ts2)))
- checkAnswer(
- df.select(to_timestamp(col("d"), "yyyy-MM-dd")),
- Seq(Row(ts_date1), Row(ts_date2)))
+ val invalid = df1.selectExpr(s"to_unix_timestamp(x, 'yyyy-MM-dd
bb:HH:ss')")
+ checkAnswer(invalid, Seq(Row(null), Row(null), Row(null), Row(null)))
}
}
}
diff --git
a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
index 0c5d39ec76..1cdc3d552a 100644
--- a/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/config/GlutenConfig.scala
@@ -669,6 +669,13 @@ object GlutenConfig {
SPARK_SHUFFLE_FILE_BUFFER,
(JavaUtils.byteStringAs(v, ByteUnit.KiB) * 1024).toString))
+ conf
+ .get(LEGACY_TIME_PARSER_POLICY.key)
+ .foreach(
+ v =>
+ nativeConfMap
+ .put(LEGACY_TIME_PARSER_POLICY.key, v.toUpperCase(Locale.ROOT)))
+
// Backend's dynamic session conf only.
val confPrefix = prefixOf(backendName)
conf
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]