This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new aab9ff0e1 [GLUTEN-5102][VL] Support cast date as timestamp (#5240)
aab9ff0e1 is described below

commit aab9ff0e187b7b6b76dc1668d5cb7c0de9c8a1d9
Author: Qian Sun <[email protected]>
AuthorDate: Sun Apr 7 14:11:07 2024 +0800

    [GLUTEN-5102][VL] Support cast date as timestamp (#5240)
---
 .../org/apache/gluten/execution/TestOperator.scala | 54 +++++++++++++++++++++-
 .../substrait/SubstraitToVeloxPlanValidator.cc     | 22 +++++----
 2 files changed, 66 insertions(+), 10 deletions(-)

diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala 
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
index ad5e87436..08cf032b6 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala
@@ -22,10 +22,12 @@ import org.apache.gluten.sql.shims.SparkShimLoader
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.execution.{FilterExec, GenerateExec, ProjectExec, 
RDDScanExec}
-import org.apache.spark.sql.functions.{avg, col, lit, udf}
+import org.apache.spark.sql.functions.{avg, col, lit, to_date, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DecimalType, StringType, StructField, 
StructType}
 
+import java.util.concurrent.TimeUnit
+
 import scala.collection.JavaConverters
 
 class TestOperator extends VeloxWholeStageTransformerSuite {
@@ -1241,4 +1243,54 @@ class TestOperator extends 
VeloxWholeStageTransformerSuite {
       }
     }
   }
+
+  test("Cast date to string") {
+    withTempPath {
+      path =>
+        Seq("2023-01-01", "2023-01-02", "2023-01-03")
+          .toDF("dateColumn")
+          .select(to_date($"dateColumn", "yyyy-MM-dd").as("dateColumn"))
+          .write
+          .parquet(path.getCanonicalPath)
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+        runQueryAndCompare("SELECT cast(dateColumn as string) from view") {
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
+  test("Cast date to timestamp") {
+    withTempPath {
+      path =>
+        Seq("2023-01-01", "2023-01-02", "2023-01-03")
+          .toDF("dateColumn")
+          .select(to_date($"dateColumn", "yyyy-MM-dd").as("dateColumn"))
+          .write
+          .parquet(path.getCanonicalPath)
+        
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("view")
+        runQueryAndCompare("SELECT cast(dateColumn as timestamp) from view") {
+          checkGlutenOperatorMatch[ProjectExecTransformer]
+        }
+    }
+  }
+
+  test("cast date to timestamp with timezone") {
+    sql("SET spark.sql.session.timeZone = America/Los_Angeles")
+    val dfInLA = sql("SELECT cast(date'2023-01-02 01:01:01' as timestamp) as 
ts")
+
+    sql("SET spark.sql.session.timeZone = Asia/Shanghai")
+    val dfInSH = sql("SELECT cast(date'2023-01-02 01:01:01' as timestamp) as 
ts")
+
+    // Casting date to timestamp considers configured local timezone.
+    // There is 16-hour difference between America/Los_Angeles & Asia/Shanghai.
+    val timeInMillisInLA = dfInLA.collect()(0).getTimestamp(0).getTime()
+    val timeInMillisInSH = dfInSH.collect()(0).getTimestamp(0).getTime()
+    assert(TimeUnit.MILLISECONDS.toHours(timeInMillisInLA - timeInMillisInSH) 
== 16)
+
+    // check ProjectExecTransformer
+    val plan1 = dfInLA.queryExecution.executedPlan
+    val plan2 = dfInSH.queryExecution.executedPlan
+    assert(plan1.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
+    assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
+  }
 }
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc 
b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 43f808066..9c15ee374 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -263,20 +263,24 @@ bool SubstraitToVeloxPlanValidator::validateCast(
   }
 
   const auto& toType = SubstraitParser::parseType(castExpr.type());
-  if (toType->kind() == TypeKind::TIMESTAMP || toType->isIntervalYearMonth()) {
-    LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not 
supported.");
+  core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(), 
inputType);
+
+  // Only support cast from date to timestamp
+  if (toType->kind() == TypeKind::TIMESTAMP && !input->type()->isDate()) {
+    LOG_VALIDATION_MSG(
+        "Casting from " + input->type()->toString() + " to " + 
toType->toString() + " is not supported.");
     return false;
   }
 
-  core::TypedExprPtr input = exprConverter_->toVeloxExpr(castExpr.input(), 
inputType);
+  if (toType->isIntervalYearMonth()) {
+    LOG_VALIDATION_MSG("Casting to " + toType->toString() + " is not 
supported.");
+    return false;
+  }
 
-  // Casting from some types is not supported. See CastExpr::applyCast.
+  // Casting from some types is not supported. See CastExpr::applyPeeled.
   if (input->type()->isDate()) {
-    if (toType->kind() == TypeKind::TIMESTAMP) {
-      LOG_VALIDATION_MSG("Casting from DATE to TIMESTAMP is not supported.");
-      return false;
-    }
-    if (toType->kind() != TypeKind::VARCHAR) {
+    // Only support cast date to varchar & timestamp
+    if (toType->kind() != TypeKind::VARCHAR && toType->kind() != 
TypeKind::TIMESTAMP) {
       LOG_VALIDATION_MSG("Casting from DATE to " + toType->toString() + " is 
not supported.");
       return false;
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to