This is an automated email from the ASF dual-hosted git repository.

felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 8399ac7676 Use Substrait timestamp_tz for Spark TimestampType to 
preserve timezone-aware semantics (#11074)
8399ac7676 is described below

commit 8399ac7676555bf9918d9a304c882aa88f188525
Author: Joey <[email protected]>
AuthorDate: Sun Nov 16 07:52:10 2025 +0800

    Use Substrait timestamp_tz for Spark TimestampType to preserve 
timezone-aware semantics (#11074)
    
    Spark’s TimestampType is timezone-aware: it internally stores timestamps in 
UTC (by converting input values to UTC based on the session time zone or just 
read UTC timestamp from parquet file) and represents an absolute point in time. 
This semantics aligns with Substrait’s timestamp_tz type, which also denotes a 
timezone-aware timestamp that can be unambiguously mapped to a moment on the 
timeline.
    
    To maintain semantic consistency between Spark and Substrait, this PR maps 
Spark’s TimestampType to Substrait’s timestamp_tz.
---
 cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp               | 6 +++---
 cpp-ch/local-engine/Parser/ExpressionParser.cpp                     | 4 ++--
 cpp-ch/local-engine/Parser/TypeParser.cpp                           | 4 ++--
 cpp/velox/substrait/SubstraitParser.cc                              | 4 ++--
 cpp/velox/substrait/SubstraitToVeloxExpr.cc                         | 2 +-
 cpp/velox/substrait/VeloxToSubstraitExpr.cc                         | 4 ++--
 cpp/velox/substrait/VeloxToSubstraitType.cc                         | 6 +++---
 .../apache/gluten/substrait/expression/TimestampLiteralNode.java    | 2 +-
 .../java/org/apache/gluten/substrait/type/TimestampTypeNode.java    | 4 ++--
 .../main/scala/org/apache/gluten/expression/ConverterUtils.scala    | 4 ++--
 10 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp 
b/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
index f90bd12176..3d80670087 100644
--- a/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
+++ b/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
@@ -107,7 +107,7 @@ SchemaPtr SerializedSchemaBuilder::build()
         else if (type == "Timestamp")
         {
             auto * t = type_struct->mutable_types()->Add();
-            t->mutable_timestamp()->set_nullability(
+            t->mutable_timestamp_tz()->set_nullability(
                 this->nullability_map[name] ? 
substrait::Type_Nullability_NULLABILITY_NULLABLE
                                             : 
substrait::Type_Nullability_NULLABILITY_REQUIRED);
         }
@@ -256,7 +256,7 @@ std::shared_ptr<substrait::Type> 
SerializedPlanBuilder::buildType(const DB::Data
         const auto * ch_type_datetime64 = 
checkAndGetDataType<DataTypeDateTime64>(ch_type_without_nullable.get());
         if (ch_type_datetime64->getScale() != 6)
             throw Exception(ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support 
converting from {}", ch_type->getName());
-        res->mutable_timestamp()->set_nullability(type_nullability);
+        res->mutable_timestamp_tz()->set_nullability(type_nullability);
     }
     else if (which.isDate32())
         res->mutable_date()->set_nullability(type_nullability);
@@ -365,7 +365,7 @@ substrait::Expression * literalTimestamp(int64_t value)
 {
     substrait::Expression * rel = new substrait::Expression();
     auto * literal = rel->mutable_literal();
-    literal->set_timestamp(value);
+    literal->set_timestamp_tz(value);
     return rel;
 }
 
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp 
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 39aabcf2d2..12392a9dfe 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -112,9 +112,9 @@ std::pair<DB::DataTypePtr, DB::Field> 
LiteralParser::parse(const substrait::Expr
             field = literal.date();
             break;
         }
-        case substrait::Expression_Literal::kTimestamp: {
+        case substrait::Expression_Literal::kTimestampTz: {
             type = std::make_shared<DB::DataTypeDateTime64>(6);
-            field = DecimalField<DB::DateTime64>(literal.timestamp(), 6);
+            field = DecimalField<DB::DateTime64>(literal.timestamp_tz(), 6);
             break;
         }
         case substrait::Expression_Literal::kDecimal: {
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp 
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 49e76fdb31..bdb8c52e9c 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -153,10 +153,10 @@ DB::DataTypePtr TypeParser::parseType(const 
substrait::Type & substrait_type, st
         ch_type = std::make_shared<DB::DataTypeFloat64>();
         ch_type = tryWrapNullable(substrait_type.fp64().nullability(), 
ch_type);
     }
-    else if (substrait_type.has_timestamp())
+    else if (substrait_type.has_timestamp_tz())
     {
         ch_type = std::make_shared<DB::DataTypeDateTime64>(6);
-        ch_type = tryWrapNullable(substrait_type.timestamp().nullability(), 
ch_type);
+        ch_type = tryWrapNullable(substrait_type.timestamp_tz().nullability(), 
ch_type);
     }
     else if (substrait_type.has_date())
     {
diff --git a/cpp/velox/substrait/SubstraitParser.cc 
b/cpp/velox/substrait/SubstraitParser.cc
index ed4ad36c68..2bc1dd71c3 100644
--- a/cpp/velox/substrait/SubstraitParser.cc
+++ b/cpp/velox/substrait/SubstraitParser.cc
@@ -76,7 +76,7 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type& 
substraitType, bool
       return UNKNOWN();
     case ::substrait::Type::KindCase::kDate:
       return DATE();
-    case ::substrait::Type::KindCase::kTimestamp:
+    case ::substrait::Type::KindCase::kTimestampTz:
       return TIMESTAMP();
     case ::substrait::Type::KindCase::kDecimal: {
       auto precision = substraitType.decimal().precision();
@@ -368,7 +368,7 @@ bool SubstraitParser::getLiteralValue(const 
::substrait::Expression::Literal& li
 
 template <>
 Timestamp SubstraitParser::getLiteralValue(const 
::substrait::Expression::Literal& literal) {
-  return Timestamp::fromMicros(literal.timestamp());
+  return Timestamp::fromMicros(literal.timestamp_tz());
 }
 
 template <>
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc 
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index fdee942eaa..25e78de709 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -131,7 +131,7 @@ TypePtr getScalarType(const 
::substrait::Expression::Literal& literal) {
     }
     case ::substrait::Expression_Literal::LiteralTypeCase::kDate:
       return DATE();
-    case ::substrait::Expression_Literal::LiteralTypeCase::kTimestamp:
+    case ::substrait::Expression_Literal::LiteralTypeCase::kTimestampTz:
       return TIMESTAMP();
     case ::substrait::Expression_Literal::LiteralTypeCase::kString:
       return VARCHAR();
diff --git a/cpp/velox/substrait/VeloxToSubstraitExpr.cc 
b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
index f17fda06a2..66a2b4ff80 100644
--- a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
@@ -136,7 +136,7 @@ const ::substrait::Expression_Literal& 
toSubstraitNotNullLiteral(
     case velox::TypeKind::TIMESTAMP: {
       auto vTimeStamp = variantValue.value<TypeKind::TIMESTAMP>();
       auto micros = vTimeStamp.getSeconds() * 1000000 + vTimeStamp.getNanos() 
/ 1000;
-      literalExpr->set_timestamp(micros);
+      literalExpr->set_timestamp_tz(micros);
       break;
     }
     case velox::TypeKind::VARCHAR: {
@@ -250,7 +250,7 @@ const ::substrait::Expression_Literal& 
toSubstraitNotNullLiteral<TypeKind::TIMES
   ::substrait::Expression_Literal* literalExpr =
       
google::protobuf::Arena::CreateMessage<::substrait::Expression_Literal>(&arena);
   auto micros = value.getSeconds() * 1000000 + value.getNanos() / 1000;
-  literalExpr->set_timestamp(micros);
+  literalExpr->set_timestamp_tz(micros);
   literalExpr->set_nullable(false);
   return *literalExpr;
 }
diff --git a/cpp/velox/substrait/VeloxToSubstraitType.cc 
b/cpp/velox/substrait/VeloxToSubstraitType.cc
index b08fe83db6..b6bcf3bcc9 100644
--- a/cpp/velox/substrait/VeloxToSubstraitType.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitType.cc
@@ -88,9 +88,9 @@ const ::substrait::Type& 
VeloxToSubstraitTypeConvertor::toSubstraitType(
       break;
     }
     case velox::TypeKind::TIMESTAMP: {
-      auto substraitTimestamp = 
google::protobuf::Arena::CreateMessage<::substrait::Type_Timestamp>(&arena);
-      
substraitTimestamp->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
-      substraitType->set_allocated_timestamp(substraitTimestamp);
+      auto substraitTimestampTZ = 
google::protobuf::Arena::CreateMessage<::substrait::Type_TimestampTZ>(&arena);
+      
substraitTimestampTZ->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
+      substraitType->set_allocated_timestamp_tz(substraitTimestampTZ);
       break;
     }
     case velox::TypeKind::ARRAY: {
diff --git 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
index ec253edbc4..15e7254e17 100644
--- 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
+++ 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
@@ -32,6 +32,6 @@ public class TimestampLiteralNode extends 
LiteralNodeWithValue<Long> {
 
   @Override
   protected void updateLiteralBuilder(Builder literalBuilder, Long value) {
-    literalBuilder.setTimestamp(value);
+    literalBuilder.setTimestampTz(value);
   }
 }
diff --git 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
index f25b5e900e..472df5da97 100644
--- 
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
+++ 
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
@@ -26,7 +26,7 @@ public class TimestampTypeNode extends TypeNode {
 
   @Override
   public Type toProtobuf() {
-    Type.Timestamp.Builder timestampBuilder = Type.Timestamp.newBuilder();
+    Type.TimestampTZ.Builder timestampBuilder = Type.TimestampTZ.newBuilder();
     if (nullable) {
       timestampBuilder.setNullability(Type.Nullability.NULLABILITY_NULLABLE);
     } else {
@@ -34,7 +34,7 @@ public class TimestampTypeNode extends TypeNode {
     }
 
     Type.Builder builder = Type.newBuilder();
-    builder.setTimestamp(timestampBuilder.build());
+    builder.setTimestampTz(timestampBuilder.build());
     return builder.build();
   }
 }
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
index 1e217eb564..6db1f188d8 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
@@ -160,8 +160,8 @@ object ConverterUtils extends Logging {
         (StringType, isNullable(substraitType.getString.getNullability))
       case Type.KindCase.BINARY =>
         (BinaryType, isNullable(substraitType.getBinary.getNullability))
-      case Type.KindCase.TIMESTAMP =>
-        (TimestampType, isNullable(substraitType.getTimestamp.getNullability))
+      case Type.KindCase.TIMESTAMP_TZ =>
+        (TimestampType, 
isNullable(substraitType.getTimestampTz.getNullability))
       case Type.KindCase.DATE =>
         (DateType, isNullable(substraitType.getDate.getNullability))
       case Type.KindCase.DECIMAL =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to