This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new fdd766a65 [VL] Enable make_timestamp Spark function (#4746)
fdd766a65 is described below
commit fdd766a656ae89aaae5f8ec85843c50af80faff1
Author: Rong Ma <[email protected]>
AuthorDate: Tue Mar 19 20:19:38 2024 +0800
[VL] Enable make_timestamp Spark function (#4746)
---
.../backendsapi/velox/SparkPlanExecApiImpl.scala | 10 ++++-
.../execution/VeloxFunctionsValidateSuite.scala | 43 ++++++++++++++++++++++
cpp/velox/jni/VeloxJniWrapper.cc | 5 ++-
.../backendsapi/SparkPlanExecApi.scala | 27 +++++++++-----
.../expression/ExpressionConverter.scala | 5 +++
.../expression/ExpressionMappings.scala | 1 +
.../glutenproject/expression/ExpressionNames.scala | 1 +
7 files changed, 80 insertions(+), 12 deletions(-)
diff --git
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/SparkPlanExecApiImpl.scala
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/SparkPlanExecApiImpl.scala
index e14ff795b..64980209c 100644
---
a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/SparkPlanExecApiImpl.scala
+++
b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/SparkPlanExecApiImpl.scala
@@ -137,7 +137,7 @@ class SparkPlanExecApiImpl extends SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, Seq(child), expr)
}
- /** Transform inline to Substrait. */
+ /** Transform posexplode to Substrait. */
override def genPosExplodeTransformer(
substraitExprName: String,
child: ExpressionTransformer,
@@ -154,6 +154,14 @@ class SparkPlanExecApiImpl extends SparkPlanExecApi {
GenericExpressionTransformer(substraitExprName, Seq(child), expr)
}
+ /** Transform make_timestamp to Substrait. */
+ override def genMakeTimestampTransformer(
+ substraitExprName: String,
+ children: Seq[ExpressionTransformer],
+ expr: Expression): ExpressionTransformer = {
+ GenericExpressionTransformer(substraitExprName, children, expr)
+ }
+
/**
* * Plans.
*/
diff --git
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
index 187c93514..5c6def05e 100644
---
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
@@ -469,4 +469,47 @@ class VeloxFunctionsValidateSuite extends
VeloxWholeStageTransformerSuite {
checkOperatorMatch[ProjectExecTransformer]
}
}
+
+ test("Test make_timestamp function") {
+ withTempPath {
+ path =>
+ // w/o timezone.
+ Seq(
+ (2017, 7, 11, 6, 30, Decimal(45678000, 18, 6)),
+ (1, 1, 1, 1, 1, Decimal(1, 18, 6)),
+ (1, 1, 1, 1, 1, null)
+ )
+ .toDF("year", "month", "day", "hour", "min", "sec")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("make_timestamp_tbl1")
+
+ runQueryAndCompare(
+ "select make_timestamp(year, month, day, hour, min, sec) from
make_timestamp_tbl1") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ withTempPath {
+ path =>
+ // w/ timezone.
+ Seq(
+ (2017, 7, 11, 6, 30, Decimal(45678000, 18, 6), "CET"),
+ (1, 1, 1, 1, 1, Decimal(1, 18, 6), null),
+ (1, 1, 1, 1, 1, null, "CST")
+ )
+ .toDF("year", "month", "day", "hour", "min", "sec", "timezone")
+ .write
+ .parquet(path.getCanonicalPath)
+
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("make_timestamp_tbl2")
+
+ runQueryAndCompare("""
+ |select make_timestamp(year, month, day, hour,
min, sec, timezone)
+ |from make_timestamp_tbl2
+ |""".stripMargin) {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
}
diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc
index 063605001..cb1604629 100644
--- a/cpp/velox/jni/VeloxJniWrapper.cc
+++ b/cpp/velox/jni/VeloxJniWrapper.cc
@@ -109,8 +109,9 @@
Java_io_glutenproject_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFailu
::substrait::Plan subPlan;
gluten::parseProtobuf(planData, planSize, &subPlan);
- // A query context used for function validation.
- std::unordered_map<std::string, std::string>
configs{{velox::core::QueryConfig::kSparkPartitionId, "0"}};
+ // A query context with dummy configs. Used for function validation.
+ std::unordered_map<std::string, std::string> configs{
+ {velox::core::QueryConfig::kSparkPartitionId, "0"},
{velox::core::QueryConfig::kSessionTimezone, "GMT"}};
velox::core::QueryCtx queryCtx(nullptr, velox::core::QueryConfig(configs));
auto pool = gluten::defaultLeafVeloxMemoryPool().get();
// An execution context used for function validation.
diff --git
a/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala
b/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala
index b4466cf20..ba717ec00 100644
---
a/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/backendsapi/SparkPlanExecApi.scala
@@ -183,14 +183,6 @@ trait SparkPlanExecApi {
rightNode: ExpressionNode,
original: GetArrayItem): ExpressionNode
- def genPosExplodeTransformer(
- substraitExprName: String,
- child: ExpressionTransformer,
- original: PosExplode,
- attributeSeq: Seq[Attribute]): ExpressionTransformer = {
- PosExplodeTransformer(substraitExprName, child, original, attributeSeq)
- }
-
/** Transform NaNvl to Substrait. */
def genNaNvlTransformer(
substraitExprName: String,
@@ -213,7 +205,24 @@ trait SparkPlanExecApi {
substraitExprName: String,
child: ExpressionTransformer,
expr: Expression): ExpressionTransformer = {
- throw new GlutenNotSupportException("map_entries is not supported")
+ throw new GlutenNotSupportException("inline is not supported")
+ }
+
+ /** Transform posexplode to Substrait. */
+ def genPosExplodeTransformer(
+ substraitExprName: String,
+ child: ExpressionTransformer,
+ original: PosExplode,
+ attributeSeq: Seq[Attribute]): ExpressionTransformer = {
+ PosExplodeTransformer(substraitExprName, child, original, attributeSeq)
+ }
+
+ /** Transform make_timestamp to Substrait. */
+ def genMakeTimestampTransformer(
+ substraitExprName: String,
+ children: Seq[ExpressionTransformer],
+ expr: Expression): ExpressionTransformer = {
+ throw new GlutenNotSupportException("make_timestamp is not supported")
}
/**
diff --git
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
index b13786a11..dd9fbed1c 100644
---
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
@@ -523,6 +523,11 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
replaceWithExpressionTransformerInternal(n.right, attributeSeq,
expressionsMap),
n
)
+ case m: MakeTimestamp =>
+
BackendsApiManager.getSparkPlanExecApiInstance.genMakeTimestampTransformer(
+ substraitExprName,
+ m.children.map(replaceWithExpressionTransformerInternal(_,
attributeSeq, expressionsMap)),
+ m)
case e: Transformable =>
val childrenTransformers =
e.children.map(replaceWithExpressionTransformerInternal(_,
attributeSeq, expressionsMap))
diff --git
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
index c0ff0e870..e85f8d4e2 100644
---
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala
@@ -177,6 +177,7 @@ object ExpressionMappings {
Sig[LastDay](LAST_DAY),
Sig[MonthsBetween](MONTHS_BETWEEN),
Sig[DateFromUnixDate](DATE_FROM_UNIX_DATE),
+ Sig[MakeTimestamp](MAKE_TIMESTAMP),
// JSON functions
Sig[GetJsonObject](GET_JSON_OBJECT),
Sig[LengthOfJsonArray](JSON_ARRAY_LENGTH),
diff --git
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
index 91c68e903..4691c16d3 100644
---
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
@@ -188,6 +188,7 @@ object ExpressionNames {
final val LAST_DAY = "last_day"
final val MONTHS_BETWEEN = "months_between"
final val DATE_FROM_UNIX_DATE = "date_from_unix_date"
+ final val MAKE_TIMESTAMP = "make_timestamp"
// JSON functions
final val GET_JSON_OBJECT = "get_json_object"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]