This is an automated email from the ASF dual-hosted git repository.
taiyangli pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 58f1cf680 [GLUTEN-6388][CH] Support function format (#6716)
58f1cf680 is described below
commit 58f1cf680a38fddac9f1fb77ce66239646d75822
Author: 李扬 <[email protected]>
AuthorDate: Fri Aug 9 15:23:49 2024 +0800
[GLUTEN-6388][CH] Support function format (#6716)
* support function printf
* support function format_string
* fix failed uts
* fix failed uts
* fix failed ut
---
.../scala/org/apache/gluten/utils/CHExpressionUtil.scala | 8 ++++++++
.../gluten/execution/GlutenFunctionValidateSuite.scala | 15 +++++++++++++++
.../scalar_function_parser/CommonScalarFunctionParser.cpp | 5 +++--
.../org/apache/gluten/expression/ExpressionMappings.scala | 1 +
.../gluten/utils/clickhouse/ClickHouseTestSettings.scala | 1 +
.../gluten/utils/clickhouse/ClickHouseTestSettings.scala | 1 +
.../gluten/utils/clickhouse/ClickHouseTestSettings.scala | 1 +
.../gluten/utils/clickhouse/ClickHouseTestSettings.scala | 1 +
.../org/apache/gluten/expression/ExpressionNames.scala | 1 +
9 files changed, 32 insertions(+), 2 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index d65de1cea..ae072b0fb 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -166,6 +166,13 @@ case class ArrayJoinValidator() extends FunctionValidator {
}
}
+case class FormatStringValidator() extends FunctionValidator {
+ override def doValidate(expr: Expression): Boolean = {
+ val formatString = expr.asInstanceOf[FormatString]
+ formatString.children.head.isInstanceOf[Literal]
+ }
+}
+
object CHExpressionUtil {
final val CH_AGGREGATE_FUNC_BLACKLIST: Map[String, FunctionValidator] = Map(
@@ -199,6 +206,7 @@ object CHExpressionUtil {
SPARK_PARTITION_ID -> DefaultValidator(),
URL_DECODE -> DefaultValidator(),
URL_ENCODE -> DefaultValidator(),
+ FORMAT_STRING -> FormatStringValidator(),
SKEWNESS -> DefaultValidator(),
SOUNDEX -> DefaultValidator(),
MAKE_YM_INTERVAL -> DefaultValidator(),
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
index 7db5761a2..45485ac90 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenFunctionValidateSuite.scala
@@ -740,4 +740,19 @@ class GlutenFunctionValidateSuite extends
GlutenClickHouseWholeStageTransformerS
|""".stripMargin
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
+
+ test("test function format_string") {
+ val sql = """
+ | SELECT
+ | format_string(
+ | 'hello world %d %d %s %f',
+ | id,
+ | id,
+ | CAST(id AS STRING),
+ | CAST(id AS float)
+ | )
+ |FROM range(10)
+ |""".stripMargin
+ runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
+ }
}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 726d1683d..9c3dc18ec 100644
---
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -61,6 +61,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp,
get_timestamp, parseDateTim
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp,
parseDateTimeInJodaSyntaxOrNull);
+// math functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Negative, negative, negate);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Pmod, pmod, pmod);
@@ -107,6 +108,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rand, rand,
randCanonical);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Bin, bin, sparkBin);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Rint, rint, sparkRint);
+// string functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Like, like, like);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(NotLike, not_like, notLike);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(StartsWith, starts_with,
startsWithUTF8);
@@ -130,6 +132,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Initcap, initcap,
initcapUTF8);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Conv, conv, sparkConv);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein,
editDistanceUTF8);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FormatString, format_string, printf);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash,
sparkMurmurHash3_32);
@@ -150,7 +153,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FloorDatetime,
floor_datetime, dateTrunc)
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Floor, floor, sparkFloor);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MothsBetween, months_between,
sparkMonthsBetween);
-
// array functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Array, array, array);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Shuffle, shuffle, arrayShuffle);
@@ -165,7 +167,6 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapKeys, map_keys,
mapKeys);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapValues, map_values, mapValues);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(MapFromArrays, map_from_arrays,
mapFromArrays);
-
// json functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(FlattenJsonStringOnRequired,
flattenJSONStringOnRequired, flattenJSONStringOnRequired);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToJson, to_json, toJSONString);
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
index ebf0c5139..e0628f111 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala
@@ -105,6 +105,7 @@ object ExpressionMappings {
Sig[Levenshtein](LEVENSHTEIN),
Sig[UnBase64](UNBASE64),
Sig[Base64](BASE64),
+ Sig[FormatString](FORMAT_STRING),
// URL functions
Sig[ParseUrl](PARSE_URL),
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 8fd68d517..5c2833de4 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -912,6 +912,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string
is not a valid url")
+ .exclude("FORMAT") // refer
https://github.com/apache/incubator-gluten/issues/6765
.excludeGlutenTest("SPARK-40213: ascii for Latin-1 Supplement characters")
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index f69598adf..c8e162e61 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -871,6 +871,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string
is not a valid url")
+ .exclude("FORMAT") // refer
https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenTryCastSuite]
.exclude("null cast")
.exclude("cast string to date")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index ab288e835..77c12621e 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string
is not a valid url")
+ .exclude("FORMAT") // refer
https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index ab288e835..77c12621e 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -775,6 +775,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("REPEAT")
.exclude("ParseUrl")
.exclude("SPARK-33468: ParseUrl in ANSI mode should fail if input string
is not a valid url")
+ .exclude("FORMAT") // refer
https://github.com/apache/incubator-gluten/issues/6765
enableSuite[GlutenDataSourceV2DataFrameSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSessionCatalogSuite]
enableSuite[GlutenDataSourceV2SQLSuiteV1Filter]
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index d47dbc4cc..96a615615 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -132,6 +132,7 @@ object ExpressionNames {
final val UNBASE64 = "unbase64"
final val BASE64 = "base64"
final val MASK = "mask"
+ final val FORMAT_STRING = "format_string"
// URL functions
final val PARSE_URL = "parse_url"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]