This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new aab9ff0e1 [GLUTEN-5102][VL] Support cast date as timestamp (#5240)
aab9ff0e1 is described below
commit aab9ff0e187b7b6b76dc1668d5cb7c0de9c8a1d9
Author: Qian Sun <[email protected]>
AuthorDate: Sun Apr 7 14:11:07 2024 +0800
[GLUTEN-5102][VL] Support cast date as timestamp (#5240)
---
.../org/apache/gluten/execution/TestOperator.scala | 54 +++++++++++++++++++++-
.../substrait/SubstraitToVeloxPlanValidator.cc | 22 +++++----
2 files changed, 66 insertions(+), 10 deletions(-)
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
index ad5e87436..08cf032b6 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
@@ -22,10 +22,12 @@ import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.SparkConf
import org.apache.spark.sql.{AnalysisException, Row}
import org.apache.spark.sql.execution.{FilterExec, GenerateExec, ProjectExec,
RDDScanExec}
-import org.apache.spark.sql.functions.{avg, col, lit, udf}
+import org.apache.spark.sql.functions.{avg, col, lit, to_date, udf}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types.{DecimalType, StringType, StructField,
StructType}
+import java.util.concurrent.TimeUnit
+
import scala.collection.JavaConverters
class TestOperator extends VeloxWholeStageTransformerSuite {
@@ -1241,4 +1243,54 @@ class TestOperator extends
VeloxWholeStageTransformerSuite {
}
}
}
+
+ test("Cast date to string") {
+ withTempPath {
+ path =>
+ Seq("2023-01-01", "2023-01-02", "2023-01-03")
+ .toDF("dateColumn")
+ .select(to_date($"dateColumn", "yyyy-MM-dd").as("dateColumn"))
+ .write
+ .parquet(path.getCanonicalPath)
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+ runQueryAndCompare("SELECT cast(dateColumn as string) from view") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
+ test("Cast date to timestamp") {
+ withTempPath {
+ path =>
+ Seq("2023-01-01", "2023-01-02", "2023-01-03")
+ .toDF("dateColumn")
+ .select(to_date($"dateColumn", "yyyy-MM-dd").as("dateColumn"))
+ .write
+ .parquet(path.getCanonicalPath)
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+ runQueryAndCompare("SELECT cast(dateColumn as timestamp) from view") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
+ test("cast date to timestamp with timezone") {
+ sql("SET spark.sql.session.timeZone = America/Los_Angeles")
+ val dfInLA = sql("SELECT cast(date'2023-01-02 01:01:01' as timestamp) as
ts")
+
+ sql("SET spark.sql.session.timeZone = Asia/Shanghai")
+ val dfInSH = sql("SELECT cast(date'2023-01-02 01:01:01' as timestamp) as
ts")
+
+ // Casting date to timestamp considers configured local timezone.
+ // There is 16-hour difference between America/Los_Angeles & Asia/Shanghai.
+ val timeInMillisInLA = dfInLA.collect()(0).getTimestamp(0).getTime()
+ val timeInMillisInSH = dfInSH.collect()(0).getTimestamp(0).getTime()
+ assert(TimeUnit.MILLISECONDS.toHours(timeInMillisInLA - timeInMillisInSH)
== 16)
+
+ // check ProjectExecTransformer
+ val plan1 = dfInLA.queryExecution.executedPlan
+ val plan2 = dfInSH.queryExecution.executedPlan
+ assert(plan1.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
+ assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
+ }
}
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 43f808066..9c15ee374 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -263,20 +263,24 @@ bool SubstraitToVeloxPlanValidator::validateCast(
}
const auto& toType = SubstraitParser::parseType(castExpr.type());
- if (toType->kind() == TypeKind::TIMESTAMP || toType->isIntervalYearMonth()) {
- LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not
supported.");
+ core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(),
inputType);
+
+ // Only support cast from date to timestamp
+ if (toType->kind() == TypeKind::TIMESTAMP && !input->type()->isDate()) {
+ LOG_VALIDATION_MSG(
+ "Casting from " + input->type()->toString() + " to " +
toType->toString() + " is not supported.");
return false;
}
- core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(),
inputType);
+ if (toType->isIntervalYearMonth()) {
+ LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not
supported.");
+ return false;
+ }
- // Casting from some types is not supported. See CastExpr::applyCast.
+ // Casting from some types is not supported. See CastExpr::applyPeeled.
if (input->type()->isDate()) {
- if (toType->kind() == TypeKind::TIMESTAMP) {
- LOG_VALIDATION_MSG("Casting from DATE to TIMESTAMP is not supported.");
- return false;
- }
- if (toType->kind() != TypeKind::VARCHAR) {
+ // Only support cast date to varchar & timestamp
+ if (toType->kind() != TypeKind::VARCHAR && toType->kind() !=
TypeKind::TIMESTAMP) {
LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is
not supported.");
return false;
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]