This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 2990280dbd [GLUTEN-11683][VL] Enable GlutenParquetTypeWideningSuite
for Spark 4.0 and 4.1 (#11684)
2990280dbd is described below
commit 2990280dbd37b34306edefea08f9248edda0dc31
Author: Chang Chen <[email protected]>
AuthorDate: Sun Mar 8 18:14:34 2026 +0800
[GLUTEN-11683][VL] Enable GlutenParquetTypeWideningSuite for Spark 4.0 and
4.1 (#11684)
* Update builddep-veloxbe-inc.sh to build specific CMake targets
Co-authored-by: Copilot <[email protected]>
* Enable GlutenParquetTypeWideningSuite for Spark 4.0 and 4.1
Add translateException() to ClosableIterator as a virtual hook for
backend-specific exception translation. Override in ColumnarBatchOutIterator
to translate Velox type conversion errors into Spark's
SchemaColumnConvertNotSupportedException.
Exclude 63 tests: Velox native reader always rejects incompatible type
conversions (no parquet-mr fallback), and does not support
DELTA_BYTE_ARRAY encoding for FIXED_LEN_BYTE_ARRAY decimals.
Test results: 21 pass / 63 ignored (spark40 and spark41).
Co-authored-by: Copilot <[email protected]>
---------
Co-authored-by: Copilot <[email protected]>
---
dev/builddep-veloxbe-inc.sh | 4 +-
.../vectorized/ColumnarBatchOutIterator.java | 37 ++++++++++++
.../apache/gluten/iterator/ClosableIterator.java | 12 +++-
.../gluten/utils/velox/VeloxTestSettings.scala | 65 +++++++++++++++++++++-
.../gluten/utils/velox/VeloxTestSettings.scala | 65 +++++++++++++++++++++-
5 files changed, 177 insertions(+), 6 deletions(-)
diff --git a/dev/builddep-veloxbe-inc.sh b/dev/builddep-veloxbe-inc.sh
index 9fafdf3277..5fff1a3e76 100755
--- a/dev/builddep-veloxbe-inc.sh
+++ b/dev/builddep-veloxbe-inc.sh
@@ -147,12 +147,12 @@ fi
# Step 2: Build Velox
step 2 "Building Velox (incremental)"
-cmake --build "$VELOX_BUILD_DIR" -j $NUM_THREADS
+cmake --build "$VELOX_BUILD_DIR" --target velox -j $NUM_THREADS
echo "[Step 2/4] Velox build complete."
# Step 3: Build Gluten C++
step 3 "Building Gluten C++ (incremental)"
-cmake --build "$GLUTEN_BUILD_DIR" -j $NUM_THREADS
+cmake --build "$GLUTEN_BUILD_DIR" --target gluten velox -j $NUM_THREADS
echo "[Step 3/4] Gluten C++ build complete."
# Step 4: Copy libraries
diff --git
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
index 27162a800f..d251371841 100644
---
a/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
+++
b/gluten-arrow/src/main/java/org/apache/gluten/vectorized/ColumnarBatchOutIterator.java
@@ -17,10 +17,12 @@
package org.apache.gluten.vectorized;
import org.apache.gluten.columnarbatch.ColumnarBatches;
+import org.apache.gluten.exception.GlutenException;
import org.apache.gluten.iterator.ClosableIterator;
import org.apache.gluten.runtime.Runtime;
import org.apache.gluten.runtime.RuntimeAware;
+import
org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException;
import org.apache.spark.sql.vectorized.ColumnarBatch;
import java.io.IOException;
@@ -130,6 +132,41 @@ public class ColumnarBatchOutIterator extends
ClosableIterator<ColumnarBatch>
nativeRequestBarrier(iterHandle);
}
+ /**
+ * Translates a Velox type conversion error into a
SchemaColumnConvertNotSupportedException.
+ * Returns null if the message does not indicate a type conversion error.
+ */
+ private static RuntimeException translateToSchemaException(String msg) {
+ if (msg.contains("not allowed for requested type") || msg.contains("Not a
valid type for")) {
+ return new SchemaColumnConvertNotSupportedException("unknown", msg,
"unknown");
+ }
+ return null;
+ }
+
+ @Override
+ protected RuntimeException translateException(Exception e) {
+ String msg = findFirstNonNullMessage(e);
+ if (msg != null) {
+ RuntimeException schemaEx = translateToSchemaException(msg);
+ if (schemaEx != null) {
+ schemaEx.initCause(e);
+ return schemaEx;
+ }
+ }
+ return new GlutenException(e);
+ }
+
+ private static String findFirstNonNullMessage(Throwable t) {
+ while (t != null) {
+ String msg = t.getMessage();
+ if (msg != null) {
+ return msg;
+ }
+ t = t.getCause();
+ }
+ return null;
+ }
+
@Override
public void close0() {
// To make sure the outputted batches are still accessible after the
iterator is closed.
diff --git
a/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java
b/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java
index 7947b09af9..38764ec024 100644
--- a/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java
+++ b/gluten-core/src/main/java/org/apache/gluten/iterator/ClosableIterator.java
@@ -35,7 +35,7 @@ public abstract class ClosableIterator<T> implements
AutoCloseable, Serializable
try {
return hasNext0();
} catch (Exception e) {
- throw new GlutenException(e);
+ throw translateException(e);
}
}
@@ -47,7 +47,7 @@ public abstract class ClosableIterator<T> implements
AutoCloseable, Serializable
try {
return next0();
} catch (Exception e) {
- throw new GlutenException(e);
+ throw translateException(e);
}
}
@@ -63,4 +63,12 @@ public abstract class ClosableIterator<T> implements
AutoCloseable, Serializable
protected abstract boolean hasNext0() throws Exception;
protected abstract T next0() throws Exception;
+
+ /**
+ * Translates a native exception into an appropriate Java exception.
Subclasses can override this
+ * to translate backend-specific exceptions into Spark-compatible exceptions.
+ */
+ protected RuntimeException translateException(Exception e) {
+ return new GlutenException(e);
+ }
}
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 399661654f..4f7c67daaa 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -322,7 +322,70 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetAvroCompatibilitySuite]
enableSuite[GlutenParquetCommitterSuite]
enableSuite[GlutenParquetFieldIdSchemaSuite]
- // TODO: 4.x enableSuite[GlutenParquetTypeWideningSuite] // 74 failures -
MAJOR ISSUE
+ enableSuite[GlutenParquetTypeWideningSuite]
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22,
2)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 7) ->
Decimal(22, 5)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 5) ->
Decimal(22, 8)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 2) ->
Decimal(22, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 4) ->
Decimal(12, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 6) ->
Decimal(12, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 7) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(12, 4) ->
Decimal(10, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(12, 4) ->
Decimal(10, 6)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 17) ->
Decimal(10, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 17) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(22, 4) ->
Decimal(20, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(22, 5) ->
Decimal(20, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(6, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(7, 4) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 2) ->
Decimal(12, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 2) ->
Decimal(20, 12)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(10, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(20, 17)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(7, 4)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5,
2)")
+ .exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5,
2)")
+ .exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12,
2)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)")
+ .exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2)
overflows with parquet-mr")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
+ .exclude("unsupported parquet conversion LongType -> DateType")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
+ .exclude("unsupported parquet conversion LongType -> IntegerType")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)")
+ .exclude("parquet widening conversion ByteType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion ByteType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion IntegerType -> DoubleType")
+ .exclude("parquet widening conversion LongType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion LongType -> DecimalType(21,1)")
+ .exclude("parquet widening conversion LongType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion ShortType -> DoubleType")
enableSuite[GlutenParquetVariantShreddingSuite]
// Generated suites for org.apache.spark.sql.execution.datasources.text
// TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure
diff --git
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 10802c8892..0dadfa1d0b 100644
---
a/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark41/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -333,7 +333,70 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetAvroCompatibilitySuite]
enableSuite[GlutenParquetCommitterSuite]
enableSuite[GlutenParquetFieldIdSchemaSuite]
- // TODO: 4.x enableSuite[GlutenParquetTypeWideningSuite] // 74 failures -
MAJOR ISSUE
+ enableSuite[GlutenParquetTypeWideningSuite]
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(22,
2)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 7) ->
Decimal(22, 5)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 5) ->
Decimal(22, 8)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 2) ->
Decimal(22, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 4) ->
Decimal(12, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 6) ->
Decimal(12, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 7) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(12, 4) ->
Decimal(10, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(12, 4) ->
Decimal(10, 6)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 17) ->
Decimal(10, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(20, 17) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(22, 4) ->
Decimal(20, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(22, 5) ->
Decimal(20, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(6, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(7, 4) ->
Decimal(5, 2)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 2) ->
Decimal(12, 4)")
+ .exclude("parquet decimal precision and scale change Decimal(10, 2) ->
Decimal(20, 12)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(10, 7)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(20, 17)")
+ .exclude("parquet decimal precision and scale change Decimal(5, 2) ->
Decimal(7, 4)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(5,
2)")
+ .exclude("parquet decimal precision change Decimal(12, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(20, 2) -> Decimal(5,
2)")
+ .exclude("parquet decimal precision change Decimal(22, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(7, 2) -> Decimal(5, 2)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(12,
2)")
+ .exclude("parquet decimal precision change Decimal(10, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(10,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(20,
2)")
+ .exclude("parquet decimal precision change Decimal(5, 2) -> Decimal(7, 2)")
+ .exclude("parquet decimal type change Decimal(5, 2) -> Decimal(3, 2)
overflows with parquet-mr")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(1,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(2,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(3,0)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(3,1)")
+ .exclude("unsupported parquet conversion ByteType -> DecimalType(4,1)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(10,1)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(5,0)")
+ .exclude("unsupported parquet conversion IntegerType -> DecimalType(9,0)")
+ .exclude("unsupported parquet conversion LongType -> DateType")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(10,0)")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(19,0)")
+ .exclude("unsupported parquet conversion LongType -> DecimalType(20,1)")
+ .exclude("unsupported parquet conversion LongType -> IntegerType")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(3,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(4,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(5,0)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(5,1)")
+ .exclude("unsupported parquet conversion ShortType -> DecimalType(6,1)")
+ .exclude("parquet widening conversion ByteType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion ByteType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion IntegerType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion IntegerType -> DoubleType")
+ .exclude("parquet widening conversion LongType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion LongType -> DecimalType(21,1)")
+ .exclude("parquet widening conversion LongType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(11,1)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(20,0)")
+ .exclude("parquet widening conversion ShortType -> DecimalType(38,0)")
+ .exclude("parquet widening conversion ShortType -> DoubleType")
// TODO: 4.x enableSuite[GlutenParquetVariantShreddingSuite] // 1 failure
// Generated suites for org.apache.spark.sql.execution.datasources.text
// TODO: 4.x enableSuite[GlutenWholeTextFileV1Suite] // 1 failure
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]