This is an automated email from the ASF dual-hosted git repository.

chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new a83acfb84e [GLUTEN-8969][VL] feat: Enable iceberg test 
TestPartitionValues (#10910)
a83acfb84e is described below

commit a83acfb84eef08fc57f3edd5b58b86e6deaa00eb
Author: Jin Chengcheng <[email protected]>
AuthorDate: Tue Oct 21 11:39:27 2025 +0100

    [GLUTEN-8969][VL] feat: Enable iceberg test TestPartitionValues (#10910)
    
    Before that:
    
    2025-10-19T06:32:33.7982447Z 06:32:33.786 WARN 
org.apache.spark.sql.execution.GlutenFallbackReporter: Validation failed for 
plan: BatchScan /tmp/junit9070726647653089838/source_table[QueryId=25582], due 
to:
    2025-10-19T06:32:33.7986073Z  - Found schema check failure for 
StructType(StructField(nested,StructType(StructField(id,LongType,false),StructField(data,StringType,false),StructField(b,BooleanType,false),StructField(i,IntegerType,false),StructField(l,LongType,false),StructField(f,FloatType,false),StructField(d,DoubleType,false),StructField(date,DateType,false),StructField(ts,TimestampNTZType,false),StructField(s,StringType,false),St...
    Because the TimestampNTZType type is not supported in native backend, 
remove it from the test.
    And Fix the validation UUID and FIXED type in complex data type, check the 
type in complex data type.
    Relax the decimal as partition write type restriction.
    
    Related issue: #8969
---
 .../test/java/org/apache/gluten/source/TestPartitionValues.java   | 8 +++-----
 .../main/scala/org/apache/gluten/execution/IcebergWriteExec.scala | 2 +-
 .../scala/org/apache/iceberg/spark/source/IcebergWriteUtil.scala  | 8 +++++---
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestPartitionValues.java
 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestPartitionValues.java
index 4fb6de296b..7fd2fc1abb 100644
--- 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestPartitionValues.java
+++ 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestPartitionValues.java
@@ -52,8 +52,6 @@ import java.util.stream.IntStream;
 import static org.apache.iceberg.types.Types.NestedField.optional;
 import static org.apache.iceberg.types.Types.NestedField.required;
 
-// testPartitionValueTypes failed by Non-whitespace character found after end 
of conversion: ""
-// Change the schema to test orc
 @RunWith(Parameterized.class)
 public class TestPartitionValues extends SparkTestBase {
   @Parameterized.Parameters(name = "format = {0}, vectorized = {1}")
@@ -77,8 +75,8 @@ public class TestPartitionValues extends SparkTestBase {
           required(105, "f", Types.FloatType.get()),
           required(106, "d", Types.DoubleType.get()),
           required(107, "date", Types.DateType.get()),
-          // Change the type to withoutZone because orc throws exception
-          required(108, "ts", Types.TimestampType.withoutZone()),
+          // Change the type to withoutZone because orc throws exception, this 
is TimestampNTZType
+          //          required(108, "ts", Types.TimestampType.withoutZone()),
           required(110, "s", Types.StringType.get()),
           required(113, "bytes", Types.BinaryType.get()),
           required(114, "dec_9_0", Types.DecimalType.of(9, 0)),
@@ -325,7 +323,7 @@ public class TestPartitionValues extends SparkTestBase {
   public void testNestedPartitionValues() throws Exception {
     String[] columnNames =
         new String[] {
-          "b", "i", "l", "f", "d", "date", "ts", "s", "bytes", "dec_9_0", 
"dec_11_2", "dec_38_10"
+          "b", "i", "l", "f", "d", "date", "s", "bytes", "dec_9_0", 
"dec_11_2", "dec_38_10"
         };
 
     HadoopTables tables = new 
HadoopTables(spark.sessionState().newHadoopConf());
diff --git 
a/gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergWriteExec.scala
 
b/gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergWriteExec.scala
index a3346468df..f06eda3c14 100644
--- 
a/gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergWriteExec.scala
+++ 
b/gluten-iceberg/src/main/scala/org/apache/gluten/execution/IcebergWriteExec.scala
@@ -55,7 +55,7 @@ trait IcebergWriteExec extends ColumnarV2TableWriteExec {
 
   private def validatePartitionType(schema: Schema, field: PartitionField): 
Boolean = {
     val partitionType = schema.findType(field.sourceId())
-    val unSupportType = Seq(TypeID.DOUBLE, TypeID.FLOAT, TypeID.DECIMAL)
+    val unSupportType = Seq(TypeID.DOUBLE, TypeID.FLOAT)
     !unSupportType.contains(partitionType.typeId())
   }
 
diff --git 
a/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergWriteUtil.scala
 
b/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergWriteUtil.scala
index 7ebd4cad44..12507af996 100644
--- 
a/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergWriteUtil.scala
+++ 
b/gluten-iceberg/src/main/scala/org/apache/iceberg/spark/source/IcebergWriteUtil.scala
@@ -37,9 +37,11 @@ object IcebergWriteUtil {
 
   private def hasUnsupportedDataType(dataType: Type): Boolean = {
     dataType match {
-      case _: ListType => true
-      case _: MapType => true
-      case _: org.apache.iceberg.types.Types.StructType => true
+      case l: ListType => hasUnsupportedDataType(l.elementType())
+      case m: MapType =>
+        hasUnsupportedDataType(m.keyType()) || 
hasUnsupportedDataType(m.valueType())
+      case s: org.apache.iceberg.types.Types.StructType =>
+        s.fields().stream().anyMatch(f => hasUnsupportedDataType(f.`type`()))
       case t if t.typeId() == TypeID.UUID || t.typeId() == TypeID.FIXED => true
       case _ => false
     }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to