This is an automated email from the ASF dual-hosted git repository.

philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new f49fec78a6 [GLUTEN-7178][VL] Fix field not found error when struct 
field name contains upper case (#7304)
f49fec78a6 is described below

commit f49fec78a66f51baa498b2a80d85f5fe1ff0187f
Author: Mingliang Zhu <[email protected]>
AuthorDate: Tue Sep 24 10:51:38 2024 +0800

    [GLUTEN-7178][VL] Fix field not found error when struct field name contains 
upper case (#7304)
---
 .../gluten/backendsapi/clickhouse/CHBackend.scala      |  2 ++
 .../apache/gluten/execution/MiscOperatorSuite.scala    | 18 ++++++++++++++++++
 gluten-arrow/pom.xml                                   |  6 ++++++
 .../org/apache/spark/sql/utils/SparkArrowUtil.scala    | 13 +++++++++++--
 .../apache/gluten/backendsapi/BackendSettingsApi.scala |  2 ++
 .../org/apache/gluten/expression/ConverterUtils.scala  | 10 +++++++++-
 6 files changed, 48 insertions(+), 3 deletions(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
index dbba7237d9..af668b15fe 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
@@ -326,6 +326,8 @@ object CHBackendSettings extends BackendSettingsApi with 
Logging {
 
   override def supportStructType(): Boolean = true
 
+  override def structFieldToLowerCase(): Boolean = false
+
   override def supportExpandExec(): Boolean = true
 
   override def excludeScanExecFromCollapsedStage(): Boolean =
diff --git 
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
 
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index fc56d049ff..0432c0da96 100644
--- 
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++ 
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -2094,4 +2094,22 @@ class MiscOperatorSuite extends 
VeloxWholeStageTransformerSuite with AdaptiveSpa
       runQueryAndCompare("select col0 / (col1 + 1E-8) from t") { _ => }
     }
   }
+
+  test("Fix struct field case error") {
+    val excludedRules = 
"org.apache.spark.sql.catalyst.optimizer.PushDownPredicates," +
+      "org.apache.spark.sql.catalyst.optimizer.PushPredicateThroughNonJoin"
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> excludedRules) {
+      withTempPath {
+        path =>
+          sql("select named_struct('A', a) as c1 from values (1), (2) as 
data(a)").write.parquet(
+            path.getAbsolutePath)
+          val df = spark.read
+            .parquet(path.getAbsolutePath)
+            .union(spark.read.parquet(path.getAbsolutePath))
+            .filter("c1.A > 1")
+            .select("c1.A")
+          checkAnswer(df, Seq(Row(2), Row(2)))
+      }
+    }
+  }
 }
diff --git a/gluten-arrow/pom.xml b/gluten-arrow/pom.xml
index ffba2682e9..2c734bd142 100644
--- a/gluten-arrow/pom.xml
+++ b/gluten-arrow/pom.xml
@@ -51,6 +51,12 @@
       <version>${project.version}</version>
       <scope>compile</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.gluten</groupId>
+      <artifactId>gluten-substrait</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
     <dependency>
       <groupId>io.glutenproject</groupId>
       <artifactId>protobuf-java</artifactId>
diff --git 
a/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala 
b/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
index ec6ac35af3..7c8261c2ad 100644
--- 
a/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
+++ 
b/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.utils
 
+import org.apache.gluten.expression.ConverterUtils
+
 import org.apache.spark.sql.types._
 
 import org.apache.arrow.vector.complex.MapVector
@@ -92,9 +94,16 @@ object SparkArrowUtil {
           name,
           fieldType,
           fields
-            .map(field => toArrowField(field.name, field.dataType, 
field.nullable, timeZoneId))
+            .map(
+              field =>
+                toArrowField(
+                  ConverterUtils.normalizeStructFieldName(field.name),
+                  field.dataType,
+                  field.nullable,
+                  timeZoneId))
             .toSeq
-            .asJava)
+            .asJava
+        )
       case MapType(keyType, valueType, valueContainsNull) =>
         val mapType = new FieldType(nullable, new ArrowType.Map(false), null)
         // Note: Map Type struct can not be null, Struct Type key field can 
not be null
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
index 3a2823ffb7..7d07431a87 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
@@ -68,6 +68,8 @@ trait BackendSettingsApi {
   }
   def supportStructType(): Boolean = false
 
+  def structFieldToLowerCase(): Boolean = true
+
   // Whether to fallback aggregate at the same time if its empty-output child 
is fallen back.
   def fallbackAggregateWithEmptyOutputChild(): Boolean = false
 
diff --git 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
index 4b929e5251..730f96aad3 100644
--- 
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
+++ 
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
@@ -54,6 +54,14 @@ object ConverterUtils extends Logging {
     if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
   }
 
+  def normalizeStructFieldName(name: String): String = {
+    if (BackendsApiManager.getSettings.structFieldToLowerCase()) {
+      normalizeColName(name)
+    } else {
+      name
+    }
+  }
+
   def getShortAttributeName(attr: Attribute): String = {
     val name = normalizeColName(attr.name)
     val subIndex = name.indexOf("(")
@@ -259,7 +267,7 @@ object ConverterUtils extends Logging {
         val fieldNames = new JArrayList[String]
         for (structField <- s.fields) {
           fieldNodes.add(getTypeNode(structField.dataType, 
structField.nullable))
-          fieldNames.add(structField.name)
+          fieldNames.add(normalizeStructFieldName(structField.name))
         }
         TypeBuilder.makeStruct(nullable, fieldNodes, fieldNames)
       case _: NullType =>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to