This is an automated email from the ASF dual-hosted git repository.
mingliang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ce75c4fd18 [GLUTEN-10521][VL] Fall back `to_json` function for
uppercase struct field name (#10523)
ce75c4fd18 is described below
commit ce75c4fd180d69096da847944b66d9feb32acb41
Author: Mingliang Zhu <[email protected]>
AuthorDate: Tue Aug 26 18:18:30 2025 +0800
[GLUTEN-10521][VL] Fall back `to_json` function for uppercase struct field
name (#10523)
---
.../backendsapi/velox/VeloxSparkPlanExecApi.scala | 8 ++++++++
.../gluten/expression/ExpressionRestrictions.scala | 7 ++++++-
.../functions/JsonFunctionsValidateSuite.scala | 20 ++++++++++++++++----
.../apache/gluten/expression/ExpressionUtils.scala | 11 +++++++++++
4 files changed, 41 insertions(+), 5 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index aaa6836f14..c3ac63f767 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -817,6 +817,14 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
ExpressionNames.TO_JSON,
ToJsonRestrictions.NOT_SUPPORT_WITH_OPTIONS)
}
+ if (
+ !SQLConf.get.caseSensitiveAnalysis &&
+ ExpressionUtils.hasUppercaseStructFieldName(child.dataType)
+ ) {
+ GlutenExceptionUtil.throwsNotFullySupported(
+ ExpressionNames.TO_JSON,
+ ToJsonRestrictions.NOT_SUPPORT_UPPERCASE_STRUCT)
+ }
ToJsonTransformer(substraitExprName, child, expr)
}
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
index 569da7d2fc..af16e5ed17 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionRestrictions.scala
@@ -64,9 +64,14 @@ object ToJsonRestrictions extends ExpressionRestrictions {
val NOT_SUPPORT_WITH_OPTIONS: String =
s"${ExpressionNames.TO_JSON} with options is not supported in Velox"
+ val NOT_SUPPORT_UPPERCASE_STRUCT: String =
+ s"When 'spark.sql.caseSensitive = false', ${ExpressionNames.TO_JSON}
produces unexpected" +
+ s" result for struct field with uppercase name"
+
override val functionName: String = ExpressionNames.TO_JSON
- override val restrictionMessages: Array[String] =
Array(NOT_SUPPORT_WITH_OPTIONS)
+ override val restrictionMessages: Array[String] =
+ Array(NOT_SUPPORT_WITH_OPTIONS, NOT_SUPPORT_UPPERCASE_STRUCT)
}
object Unbase64Restrictions extends ExpressionRestrictions {
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/functions/JsonFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/functions/JsonFunctionsValidateSuite.scala
index fd54502e45..c387c5c234 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/functions/JsonFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/functions/JsonFunctionsValidateSuite.scala
@@ -380,10 +380,14 @@ class JsonFunctionsValidateSuite extends
FunctionsValidateSuite {
test("to_json function") {
withTable("t") {
- spark.sql("create table t (a int, b string, c array<int>, d map<int,
string>) using parquet")
- spark.sql("""insert into t values (1, 'str', array(1,2,3), map(1, 'v')),
- |(2, 'str2', array(), map(1, 'v1', 2, 'v2')),
- |(3, '', array(1), map())
+ spark.sql(
+ """
+ |create table t (a int, b string, c array<int>, d map<int, string>,
e struct<aA: int>)
+ |using parquet
+ |""".stripMargin)
+ spark.sql("""insert into t values (1, 'str', array(1,2,3), map(1, 'v'),
struct(1)),
+ |(2, 'str2', array(), map(1, 'v1', 2, 'v2'), struct(2)),
+ |(3, '', array(1), map(), struct(null))
|""".stripMargin)
runQueryAndCompare("select to_json(named_struct('a', a, 'b', b, 'c', c,
'd', d)) from t") {
@@ -397,6 +401,14 @@ class JsonFunctionsValidateSuite extends
FunctionsValidateSuite {
runQueryAndCompare("select to_json(d) from t") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
+
+ runQueryAndCompare("select to_json(e) from t") {
+ checkSparkOperatorMatch[ProjectExec]
+ }
+
+ runQueryAndCompare("select to_json(Array(named_struct('aA', a))) from
t") {
+ checkSparkOperatorMatch[ProjectExec]
+ }
}
}
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionUtils.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionUtils.scala
index 92c73f528c..77af7491d4 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionUtils.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ExpressionUtils.scala
@@ -18,6 +18,7 @@ package org.apache.gluten.expression
import org.apache.spark.sql.catalyst.expressions.{Expression, LeafExpression}
import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
object ExpressionUtils {
@@ -40,4 +41,14 @@ object ExpressionUtils {
def hasComplexExpressions(plan: SparkPlan, threshold: Int): Boolean = {
hasComplexExpressions(plan.expressions, threshold)
}
+
+ def hasUppercaseStructFieldName(dataType: DataType): Boolean = {
+ dataType match {
+ case StructType(fields) => fields.exists(_.name.exists(_.isUpper))
+ case ArrayType(elementType, _) =>
hasUppercaseStructFieldName(elementType)
+ case MapType(keyType, valueType, _) =>
+ hasUppercaseStructFieldName(keyType) ||
hasUppercaseStructFieldName(valueType)
+ case _ => false
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]