This is an automated email from the ASF dual-hosted git repository.

taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 58f1cf680 [GLUTEN-6388][CH] Support function format (#6716)
58f1cf680 is described below

commit 58f1cf680a38fddac9f1fb77ce66239646d75822
Author: 李扬 <[email protected]>
AuthorDate: Fri Aug 9 15:23:49 2024 +0800

    [GLUTEN-6388][CH] Support function format (#6716)
    
    * support function printf
    
    * support function format_string
    
    * fix failed uts
    
    * fix failed uts
    
    * fix failed ut
---
 .../scala/org/apache/gluten/utils/CHExpressionUtil.scala  |  8 ++++++++
 .../gluten/execution/GlutenFunctionValidateSuite.scala    | 15 +++++++++++++++
 .../scalar_function_parser/CommonScalarFunctionParser.cpp |  5 +++--
 .../org/apache/gluten/expression/ExpressionMappings.scala |  1 +
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../gluten/utils/clickhouse/ClickHouseTestSettings.scala  |  1 +
 .../org/apache/gluten/expression/ExpressionNames.scala    |  1 +
 9 files changed, 32 insertions(+), 2 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index d65de1cea..ae072b0fb 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -166,6 +166,13 @@ case class ArrayJoinValidator() extends FunctionValidator {
   }
 }
 
+case class FormatStringValidator() extends FunctionValidator {
+  override def doValidate(expr: Expression): Boolean = {
+    val formatString = expr.asInstanceOf[FormatString]
+    formatString.children.head.isInstanceOf[Literal]
+  }
+}
+
 object CHExpressionUtil {
 
   final val CH_AGGREGATE_FUNC_BLACKLIST: Map[String, FunctionValidator] = Map(
@@ -199,6 +206,7 @@ object CHExpressionUtil {
     SPARK_PARTITION_ID -> DefaultValidator(),
     URL_DECODE -> DefaultValidator(),
     URL_ENCODE -> DefaultValidator(),
+    FORMAT_STRING -> FormatStringValidator(),
     SKEWNESS -> DefaultValidator(),
     SOUNDEX -> DefaultValidator(),
     MAKE_YM_INTERVAL -> DefaultValidator(),
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 7db5761a2..45485ac90 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -740,4 +740,19 @@ class GlutenFunctionValidateSuite extends 
GlutenClickHouseWholeStageTransformerS
                 |""".stripMargin
     runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
+
+  test("test function format_string") {
+    val sql = """
+                | SELECT
+                |  format_string(
+                |    'hello world %d %d %s %f',
+                |    id,
+                |    id,
+                |    CAST(id AS STRING),
+                |    CAST(id AS float)
+                |  )
+                |FROM range(10)
+                |""".stripMargin
+    runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
+  }
 }
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 726d1683d..9c3dc18ec 100644
--- 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++ 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -61,6 +61,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, 
get_timestamp, parseDateTim
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp, 
parseDateTimeInJodaSyntaxOrNull);
 
+// math functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Negative, negative, negate);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Pmod, pmod, pmod);
@@ -107,6 +108,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rand, rand, 
randCanonical);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Bin, bin, sparkBin);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rint, rint, sparkRint);
 
+// string functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Like, like, like);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(NotLike, not_like, notLike);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(StartsWith, starts_with, 
startsWithUTF8);
@@ -130,6 +132,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Initcap, initcap, 
initcapUTF8);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Conv, conv, sparkConv);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein, 
editDistanceUTF8);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FormatString, format_string, printf);
 
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash, 
sparkMurmurHash3_32);
@@ -150,7 +153,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FloorDatetime, 
floor_datetime, dateTrunc)
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Floor, floor, sparkFloor);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MothsBetween, months_between, 
sparkMonthsBetween);
 
-
 // array functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Shuffle, shuffle, arrayShuffle);
@@ -165,7 +167,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys, 
mapKeys);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays, 
mapFromArrays);
 
-
 // json functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FlattenJsonStringOnRequired, 
flattenJSONStringOnRequired, flattenJSONStringOnRequired);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToJson, to_json, toJSONString);
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index ebf0c5139..e0628f111 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -105,6 +105,7 @@ object ExpressionMappings {
     Sig[Levenshtein](LEVENSHTEIN),
     Sig[UnBase64](UNBASE64),
     Sig[Base64](BASE64),
+    Sig[FormatString](FORMAT_STRING),
 
     // URL functions
     Sig[ParseUrl](PARSE_URL),
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 8fd68d517..5c2833de4 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -912,6 +912,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("REPEAT")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
+    .exclude("FORMAT") // refer 
https://github.com/apache/incubator-gluten/issues/6765
     .excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
   enableSuite[GlutenTryCastSuite]
     .exclude("null cast")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index f69598adf..c8e162e61 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -871,6 +871,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("REPEAT")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
+    .exclude("FORMAT") // refer 
https://github.com/apache/incubator-gluten/issues/6765
   enableSuite[GlutenTryCastSuite]
     .exclude("null cast")
     .exclude("cast string to date")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index ab288e835..77c12621e 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("REPEAT")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
+    .exclude("FORMAT") // refer 
https://github.com/apache/incubator-gluten/issues/6765
   enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
   enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
   enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index ab288e835..77c12621e 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("REPEAT")
     .exclude("ParseUrl")
     .exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string 
is not a valid url")
+    .exclude("FORMAT") // refer 
https://github.com/apache/incubator-gluten/issues/6765
   enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
   enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
   enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
diff --git 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index d47dbc4cc..96a615615 100644
--- 
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++ 
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -132,6 +132,7 @@ object ExpressionNames {
   final val UNBASE64 = "unbase64"
   final val BASE64 = "base64"
   final val MASK = "mask"
+  final val FORMAT_STRING = "format_string"
 
   // URL functions
   final val PARSE_URL = "parse_url"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to