This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 663ae51f5 [GLUTEN-6333][CH] Support rangepartitioning by timestamptype 
(#6336)
663ae51f5 is described below

commit 663ae51f5d42415769354be465a09609441ffa01
Author: Shuai li <[email protected]>
AuthorDate: Thu Jul 4 20:28:27 2024 +0800

    [GLUTEN-6333][CH] Support rangepartitioning by timestamptype (#6336)
    
    [CH] Support rangepartitioning by timestamptype
---
 .../utils/RangePartitionerBoundsGenerator.scala    |  2 ++
 .../GlutenClickHouseDatetimeExpressionSuite.scala  | 22 ++++++++++++++++++++++
 cpp-ch/local-engine/Parser/TypeParser.cpp          |  3 ++-
 cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp    |  5 +++++
 4 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
index 61fbc86b3..87c6ae343 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
@@ -199,6 +199,7 @@ class RangePartitionerBoundsGenerator[K: Ordering: 
ClassTag, V](
             case d: DecimalType =>
               val decimal = row.getDecimal(i, d.precision, d.scale).toString()
               node.put("value", decimal)
+            case _: TimestampType => node.put("value", row.getLong(i))
             case _ =>
               throw new IllegalArgumentException(
                 s"Unsupported data type ${ordering.dataType.toString}")
@@ -244,6 +245,7 @@ object RangePartitionerBoundsGenerator {
       case _: StringType => true
       case _: DateType => true
       case _: DecimalType => true
+      case _: TimestampType => true
       case _ => false
     }
   }
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
index 534166075..a1749efb1 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
@@ -162,4 +162,26 @@ class GlutenClickHouseDatetimeExpressionSuite
          |""".stripMargin
     compareResultsAgainstVanillaSpark(sql, true, { _ => })
   }
+
+  test("support range partition by timestamp") {
+    import testImplicits._
+    val df = Seq(
+      (1, Timestamp.valueOf("2015-07-22 10:01:40.123456")),
+      (2, Timestamp.valueOf("2014-12-31 05:29:06.123456")),
+      (3, Timestamp.valueOf("2015-07-22 16:01:40.123456")),
+      (4, Timestamp.valueOf("2012-02-29 23:01:40.123456"))
+    ).toDF("i", "t")
+
+    df.createOrReplaceTempView("test")
+
+    val sql =
+      s"""
+         | select
+         | /** repartition(2) */
+         | *
+         | from test
+         | order by t
+         |""".stripMargin
+    compareResultsAgainstVanillaSpark(sql, compareResult = true, { _ => })
+  }
 }
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp 
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 3ad19bb2b..0d5e54bb1 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -59,7 +59,8 @@ std::unordered_map<String, String> 
TypeParser::type_names_mapping
        {"FloatType", "Float32"},
        {"DoubleType", "Float64"},
        {"StringType", "String"},
-       {"DateType", "Date32"}};
+       {"DateType", "Date32"},
+       {"TimestampType", "DateTime64"}};
 
 String TypeParser::getCHTypeName(const String & spark_type_name)
 {
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp 
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
index 7e3642dac..6804770c3 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
@@ -291,6 +291,11 @@ void 
RangeSelectorBuilder::initRangeBlock(Poco::JSON::Array::Ptr range_bounds)
                     int val = field_value.convert<Int32>();
                     col->insert(val);
                 }
+                else if (const auto * timestamp = dynamic_cast<const 
DB::DataTypeDateTime64 *>(type_info.inner_type.get()))
+                {
+                    auto value = field_value.convert<Int64>();
+                    col->insert(DecimalField<DateTime64>(value, 6));
+                }
                 else if (const auto * decimal32 = dynamic_cast<const 
DB::DataTypeDecimal<DB::Decimal32> *>(type_info.inner_type.get()))
                 {
                     auto value = 
decimal32->parseFromString(field_value.convert<std::string>());


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to