This is an automated email from the ASF dual-hosted git repository.
chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d168c49508 [VL] Support function ToPrettyString (#11566)
d168c49508 is described below
commit d168c49508451e80cce6cdfdc07c3eedd6b34dcb
Author: Jin Chengcheng <[email protected]>
AuthorDate: Tue Feb 24 21:41:55 2026 +0800
[VL] Support function ToPrettyString (#11566)
---
.../scala/org/apache/gluten/utils/CHExpressionUtil.scala | 1 +
.../org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 ++
.../scala/org/apache/spark/sql/GlutenDataFrameSuite.scala | 13 ++++++++++++-
.../org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 ++
.../scala/org/apache/spark/sql/GlutenDataFrameSuite.scala | 10 ++++++++++
.../org/apache/gluten/expression/ExpressionNames.scala | 1 +
.../org/apache/gluten/sql/shims/spark35/Spark35Shims.scala | 3 ++-
.../org/apache/gluten/sql/shims/spark40/Spark40Shims.scala | 3 ++-
.../org/apache/gluten/sql/shims/spark41/Spark41Shims.scala | 3 ++-
9 files changed, 34 insertions(+), 4 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
index 27d77d42cf..ddef6957a9 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala
@@ -174,6 +174,7 @@ object CHExpressionUtil {
final val CH_BLACKLIST_SCALAR_FUNCTION: Map[String, FunctionValidator] = Map(
SPLIT_PART -> DefaultValidator(),
+ TO_PRETTY_STRING -> DefaultValidator(),
TO_UNIX_TIMESTAMP -> UnixTimeStampValidator(),
UNIX_TIMESTAMP -> UnixTimeStampValidator(),
SEQUENCE -> SequenceValidator(),
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index e94e958a0f..a2deb7740f 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -882,6 +882,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-41048: Improve output partitioning and ordering with AQE
cache")
// Rewrite this test since it checks the physical operator which is
changed in Gluten
.exclude("SPARK-27439: Explain result should match collected result after
view change")
+ // https://github.com/apache/incubator-gluten/issues/11570
+ .exclude("getRows: binary")
enableSuite[GlutenDataFrameTimeWindowingSuite]
enableSuite[GlutenDataFrameTungstenSuite]
enableSuite[GlutenDataFrameWindowFunctionsSuite]
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
index 069f308fdc..fd96768ded 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
@@ -34,6 +34,8 @@ import scala.util.Random
class GlutenDataFrameSuite extends DataFrameSuite with GlutenSQLTestsTrait {
+ import testImplicits._
+
testGluten("repartitionByRange") {
val partitionNum = 10
withSQLConf(
@@ -235,6 +237,16 @@ class GlutenDataFrameSuite extends DataFrameSuite with
GlutenSQLTestsTrait {
}
}
+ testGluten("getRows: binary") {
+ val df = Seq(
+ ("12".getBytes(StandardCharsets.UTF_8),
"ABC.".getBytes(StandardCharsets.UTF_8)),
+ ("34".getBytes(StandardCharsets.UTF_8),
"12346".getBytes(StandardCharsets.UTF_8))
+ ).toDF()
+ val expectedAnswer =
+ Seq(Seq("_1", "_2"), Seq("[31 32]", "[41 42 43 2E]"), Seq("[33 34]",
"[31 32 33 34 36]"))
+ assert(df.getRows(10, 20) === expectedAnswer)
+ }
+
// TODO: fix in spark-4.0
/** Failed to check WholeStageCodegenExec, so we rewrite the UT. */
// testGluten("SPARK-22520: support code generation for large CaseWhen") {
@@ -252,7 +264,6 @@ class GlutenDataFrameSuite extends DataFrameSuite with
GlutenSQLTestsTrait {
// // We check WholeStageTransformer instead of WholeStageCodegenExec
//
assert(df.queryExecution.executedPlan.find(_.isInstanceOf[WholeStageTransformer]).isDefined)
// }
- import testImplicits._
private lazy val person2: DataFrame = Seq(
("Bob", 16, 176),
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 0f2f10b24d..b247659b0d 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -852,6 +852,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-41048: Improve output partitioning and ordering with AQE
cache")
// Rewrite this test since it checks the physical operator which is
changed in Gluten
.exclude("SPARK-27439: Explain result should match collected result after
view change")
+ // https://github.com/apache/incubator-gluten/issues/11570
+ .exclude("getRows: binary")
enableSuite[GlutenDataFrameTimeWindowingSuite]
enableSuite[GlutenDataFrameTungstenSuite]
enableSuite[GlutenDataFrameWindowFunctionsSuite]
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
index 069f308fdc..c474ecf7f6 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/spark/sql/GlutenDataFrameSuite.scala
@@ -394,6 +394,16 @@ class GlutenDataFrameSuite extends DataFrameSuite with
GlutenSQLTestsTrait {
}
}
+ testGluten("getRows: binary") {
+ val df = Seq(
+ ("12".getBytes(StandardCharsets.UTF_8),
"ABC.".getBytes(StandardCharsets.UTF_8)),
+ ("34".getBytes(StandardCharsets.UTF_8),
"12346".getBytes(StandardCharsets.UTF_8))
+ ).toDF()
+ val expectedAnswer =
+ Seq(Seq("_1", "_2"), Seq("[31 32]", "[41 42 43 2E]"), Seq("[33 34]",
"[31 32 33 34 36]"))
+ assert(df.getRows(10, 20) === expectedAnswer)
+ }
+
// TODO: fix in spark-4.0
// private def withExpr(newExpr: Expression): Column = new Column(newExpr)
diff --git
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
index 741c1f7a19..2542e54adb 100644
---
a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala
@@ -141,6 +141,7 @@ object ExpressionNames {
final val MASK = "mask"
final val FORMAT_STRING = "format_string"
final val LUHN_CHECK = "luhn_check"
+ final val TO_PRETTY_STRING = "to_pretty_string"
// URL functions
final val PARSE_URL = "parse_url"
diff --git
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 0619cea66a..0ee78c08a6 100644
---
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -93,7 +93,8 @@ class Spark35Shims extends SparkShims {
Sig[CheckOverflowInTableInsert](ExpressionNames.CHECK_OVERFLOW_IN_TABLE_INSERT),
Sig[ArrayAppend](ExpressionNames.ARRAY_APPEND),
Sig[UrlEncode](ExpressionNames.URL_ENCODE),
- Sig[UrlDecode](ExpressionNames.URL_DECODE)
+ Sig[UrlDecode](ExpressionNames.URL_DECODE),
+ Sig[ToPrettyString](ExpressionNames.TO_PRETTY_STRING)
)
}
diff --git
a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
index 5d2d3258a7..fbe69ecc73 100644
---
a/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
+++
b/shims/spark40/src/main/scala/org/apache/gluten/sql/shims/spark40/Spark40Shims.scala
@@ -97,7 +97,8 @@ class Spark40Shims extends SparkShims {
Sig[ArrayAppend](ExpressionNames.ARRAY_APPEND),
Sig[UrlEncode](ExpressionNames.URL_ENCODE),
Sig[KnownNotContainsNull](ExpressionNames.KNOWN_NOT_CONTAINS_NULL),
- Sig[UrlDecode](ExpressionNames.URL_DECODE)
+ Sig[UrlDecode](ExpressionNames.URL_DECODE),
+ Sig[ToPrettyString](ExpressionNames.TO_PRETTY_STRING)
)
}
diff --git
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
index bb3a7d7248..62f6b4d398 100644
---
a/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
+++
b/shims/spark41/src/main/scala/org/apache/gluten/sql/shims/spark41/Spark41Shims.scala
@@ -96,7 +96,8 @@ class Spark41Shims extends SparkShims {
Sig[ArrayAppend](ExpressionNames.ARRAY_APPEND),
Sig[UrlEncode](ExpressionNames.URL_ENCODE),
Sig[KnownNotContainsNull](ExpressionNames.KNOWN_NOT_CONTAINS_NULL),
- Sig[UrlDecode](ExpressionNames.URL_DECODE)
+ Sig[UrlDecode](ExpressionNames.URL_DECODE),
+ Sig[ToPrettyString](ExpressionNames.TO_PRETTY_STRING)
)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]