This is an automated email from the ASF dual-hosted git repository.
zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 663ae51f5 [GLUTEN-6333][CH] Support rangepartitioning by timestamptype
(#6336)
663ae51f5 is described below
commit 663ae51f5d42415769354be465a09609441ffa01
Author: Shuai li <[email protected]>
AuthorDate: Thu Jul 4 20:28:27 2024 +0800
[GLUTEN-6333][CH] Support rangepartitioning by timestamptype (#6336)
[CH] Support rangepartitioning by timestamptype
---
.../utils/RangePartitionerBoundsGenerator.scala | 2 ++
.../GlutenClickHouseDatetimeExpressionSuite.scala | 22 ++++++++++++++++++++++
cpp-ch/local-engine/Parser/TypeParser.cpp | 3 ++-
cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp | 5 +++++
4 files changed, 31 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
index 61fbc86b3..87c6ae343 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/utils/RangePartitionerBoundsGenerator.scala
@@ -199,6 +199,7 @@ class RangePartitionerBoundsGenerator[K: Ordering:
ClassTag, V](
case d: DecimalType =>
val decimal = row.getDecimal(i, d.precision, d.scale).toString()
node.put("value", decimal)
+ case _: TimestampType => node.put("value", row.getLong(i))
case _ =>
throw new IllegalArgumentException(
s"Unsupported data type ${ordering.dataType.toString}")
@@ -244,6 +245,7 @@ object RangePartitionerBoundsGenerator {
case _: StringType => true
case _: DateType => true
case _: DecimalType => true
+ case _: TimestampType => true
case _ => false
}
}
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
index 534166075..a1749efb1 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDatetimeExpressionSuite.scala
@@ -162,4 +162,26 @@ class GlutenClickHouseDatetimeExpressionSuite
|""".stripMargin
compareResultsAgainstVanillaSpark(sql, true, { _ => })
}
+
+ test("support range partition by timestamp") {
+ import testImplicits._
+ val df = Seq(
+ (1, Timestamp.valueOf("2015-07-22 10:01:40.123456")),
+ (2, Timestamp.valueOf("2014-12-31 05:29:06.123456")),
+ (3, Timestamp.valueOf("2015-07-22 16:01:40.123456")),
+ (4, Timestamp.valueOf("2012-02-29 23:01:40.123456"))
+ ).toDF("i", "t")
+
+ df.createOrReplaceTempView("test")
+
+ val sql =
+ s"""
+ | select
+ | /** repartition(2) */
+ | *
+ | from test
+ | order by t
+ |""".stripMargin
+ compareResultsAgainstVanillaSpark(sql, compareResult = true, { _ => })
+ }
}
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 3ad19bb2b..0d5e54bb1 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -59,7 +59,8 @@ std::unordered_map<String, String>
TypeParser::type_names_mapping
{"FloatType", "Float32"},
{"DoubleType", "Float64"},
{"StringType", "String"},
- {"DateType", "Date32"}};
+ {"DateType", "Date32"},
+ {"TimestampType", "DateTime64"}};
String TypeParser::getCHTypeName(const String & spark_type_name)
{
diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
index 7e3642dac..6804770c3 100644
--- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
+++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp
@@ -291,6 +291,11 @@ void
RangeSelectorBuilder::initRangeBlock(Poco::JSON::Array::Ptr range_bounds)
int val = field_value.convert<Int32>();
col->insert(val);
}
+ else if (const auto * timestamp = dynamic_cast<const
DB::DataTypeDateTime64 *>(type_info.inner_type.get()))
+ {
+ auto value = field_value.convert<Int64>();
+ col->insert(DecimalField<DateTime64>(value, 6));
+ }
else if (const auto * decimal32 = dynamic_cast<const
DB::DataTypeDecimal<DB::Decimal32> *>(type_info.inner_type.get()))
{
auto value =
decimal32->parseFromString(field_value.convert<std::string>());
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]