This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new f49fec78a6 [GLUTEN-7178][VL] Fix field not found error when struct
field name contains upper case (#7304)
f49fec78a6 is described below
commit f49fec78a66f51baa498b2a80d85f5fe1ff0187f
Author: Mingliang Zhu <[email protected]>
AuthorDate: Tue Sep 24 10:51:38 2024 +0800
[GLUTEN-7178][VL] Fix field not found error when struct field name contains
upper case (#7304)
---
.../gluten/backendsapi/clickhouse/CHBackend.scala | 2 ++
.../apache/gluten/execution/MiscOperatorSuite.scala | 18 ++++++++++++++++++
gluten-arrow/pom.xml | 6 ++++++
.../org/apache/spark/sql/utils/SparkArrowUtil.scala | 13 +++++++++++--
.../apache/gluten/backendsapi/BackendSettingsApi.scala | 2 ++
.../org/apache/gluten/expression/ConverterUtils.scala | 10 +++++++++-
6 files changed, 48 insertions(+), 3 deletions(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
index dbba7237d9..af668b15fe 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala
@@ -326,6 +326,8 @@ object CHBackendSettings extends BackendSettingsApi with
Logging {
override def supportStructType(): Boolean = true
+ override def structFieldToLowerCase(): Boolean = false
+
override def supportExpandExec(): Boolean = true
override def excludeScanExecFromCollapsedStage(): Boolean =
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index fc56d049ff..0432c0da96 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -2094,4 +2094,22 @@ class MiscOperatorSuite extends
VeloxWholeStageTransformerSuite with AdaptiveSpa
runQueryAndCompare("select col0 / (col1 + 1E-8) from t") { _ => }
}
}
+
+ test("Fix struct field case error") {
+ val excludedRules =
"org.apache.spark.sql.catalyst.optimizer.PushDownPredicates," +
+ "org.apache.spark.sql.catalyst.optimizer.PushPredicateThroughNonJoin"
+ withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> excludedRules) {
+ withTempPath {
+ path =>
+ sql("select named_struct('A', a) as c1 from values (1), (2) as
data(a)").write.parquet(
+ path.getAbsolutePath)
+ val df = spark.read
+ .parquet(path.getAbsolutePath)
+ .union(spark.read.parquet(path.getAbsolutePath))
+ .filter("c1.A > 1")
+ .select("c1.A")
+ checkAnswer(df, Seq(Row(2), Row(2)))
+ }
+ }
+ }
}
diff --git a/gluten-arrow/pom.xml b/gluten-arrow/pom.xml
index ffba2682e9..2c734bd142 100644
--- a/gluten-arrow/pom.xml
+++ b/gluten-arrow/pom.xml
@@ -51,6 +51,12 @@
<version>${project.version}</version>
<scope>compile</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>gluten-substrait</artifactId>
+ <version>${project.version}</version>
+ <scope>provided</scope>
+ </dependency>
<dependency>
<groupId>io.glutenproject</groupId>
<artifactId>protobuf-java</artifactId>
diff --git
a/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
b/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
index ec6ac35af3..7c8261c2ad 100644
---
a/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
+++
b/gluten-arrow/src/main/scala/org/apache/spark/sql/utils/SparkArrowUtil.scala
@@ -16,6 +16,8 @@
*/
package org.apache.spark.sql.utils
+import org.apache.gluten.expression.ConverterUtils
+
import org.apache.spark.sql.types._
import org.apache.arrow.vector.complex.MapVector
@@ -92,9 +94,16 @@ object SparkArrowUtil {
name,
fieldType,
fields
- .map(field => toArrowField(field.name, field.dataType,
field.nullable, timeZoneId))
+ .map(
+ field =>
+ toArrowField(
+ ConverterUtils.normalizeStructFieldName(field.name),
+ field.dataType,
+ field.nullable,
+ timeZoneId))
.toSeq
- .asJava)
+ .asJava
+ )
case MapType(keyType, valueType, valueContainsNull) =>
val mapType = new FieldType(nullable, new ArrowType.Map(false), null)
// Note: Map Type struct can not be null, Struct Type key field can
not be null
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
index 3a2823ffb7..7d07431a87 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendSettingsApi.scala
@@ -68,6 +68,8 @@ trait BackendSettingsApi {
}
def supportStructType(): Boolean = false
+ def structFieldToLowerCase(): Boolean = true
+
// Whether to fallback aggregate at the same time if its empty-output child
is fallen back.
def fallbackAggregateWithEmptyOutputChild(): Boolean = false
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
index 4b929e5251..730f96aad3 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
@@ -54,6 +54,14 @@ object ConverterUtils extends Logging {
if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
}
+ def normalizeStructFieldName(name: String): String = {
+ if (BackendsApiManager.getSettings.structFieldToLowerCase()) {
+ normalizeColName(name)
+ } else {
+ name
+ }
+ }
+
def getShortAttributeName(attr: Attribute): String = {
val name = normalizeColName(attr.name)
val subIndex = name.indexOf("(")
@@ -259,7 +267,7 @@ object ConverterUtils extends Logging {
val fieldNames = new JArrayList[String]
for (structField <- s.fields) {
fieldNodes.add(getTypeNode(structField.dataType,
structField.nullable))
- fieldNames.add(structField.name)
+ fieldNames.add(normalizeStructFieldName(structField.name))
}
TypeBuilder.makeStruct(nullable, fieldNodes, fieldNames)
case _: NullType =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]