(incubator-gluten) branch main updated: [GLUTEN-9335][VL] Enable tests ignored because lack metadata and enable spark35 CI (#10496)

chengchengjin Thu, 21 Aug 2025 04:06:00 -0700

This is an automated email from the ASF dual-hosted git repository.

chengchengjin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new 37ab260cec [GLUTEN-9335][VL] Enable tests ignored because lack 
metadata and enable spark35 CI (#10496)
37ab260cec is described below

commit 37ab260cec30ea5d5ef5e6d5c3496dedb77c7583
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Aug 21 11:42:29 2025 +0100

    [GLUTEN-9335][VL] Enable tests ignored because lack metadata and enable 
spark35 CI (#10496)
---
 .../workflows/velox_backend_enhanced_features.yml  | 106 ++++++++++++++++++++-
 .../extensions/TestGlutenMergeOnReadDelete.java    |   5 -
 .../source/TestGlutenIcebergSourceHiveTables.java  |  21 +---
 .../gluten/sql/TestGlutenAggregatePushDown.java    |  96 +++++++++----------
 4 files changed, 152 insertions(+), 76 deletions(-)

diff --git a/.github/workflows/velox_backend_enhanced_features.yml 
b/.github/workflows/velox_backend_enhanced_features.yml
index 5ba5c785ec..10ab11b64e 100644
--- a/.github/workflows/velox_backend_enhanced_features.yml
+++ b/.github/workflows/velox_backend_enhanced_features.yml
@@ -149,4 +149,108 @@ jobs:
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}-golden-files
-          path: /tmp/tpch-approved-plan/**
\ No newline at end of file
+          path: /tmp/tpch-approved-plan/**
+
+  spark-test-spark35:
+    needs: build-native-lib-centos-7
+    runs-on: ubuntu-22.04
+    container: apache/gluten:centos-8-jdk8
+    steps:
+      - uses: actions/checkout@v2
+      - name: Download All Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: velox-native-lib-enhanced-centos-7-${{github.sha}}
+          path: ./cpp/build/releases
+      - name: Download Arrow Jars
+        uses: actions/download-artifact@v4
+        with:
+          name: arrow-jars-enhanced-centos-7-${{github.sha}}
+          path: /root/.m2/repository/org/apache/arrow/
+      - name: Prepare spark.test.home for Spark 3.4.4 (other tests)
+        run: |
+          dnf module -y install python39 && \
+          alternatives --set python3 /usr/bin/python3.9 && \
+          pip3 install setuptools==77.0.3 && \
+          pip3 install pyspark==3.4.4 cython && \
+          pip3 install pandas==2.2.3 pyarrow==20.0.0
+      - name: Build and Run unit test for Spark 3.5.5 (other tests)
+        run: |
+          cd $GITHUB_WORKSPACE/
+          export SPARK_SCALA_VERSION=2.12
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          export SPARK_HOME=/opt/shims/spark34/spark_home/
+          ls -l $SPARK_HOME
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi \
+          
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTest
 \
+          -DargLine="-Dspark.test.home=$SPARK_HOME"
+      - name: Upload test report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ github.job }}-report
+          path: '**/surefire-reports/TEST-*.xml'
+      - name: Upload unit tests log files
+        if: ${{ !success() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ github.job }}-test-log
+          path: |
+            **/target/*.log
+            **/gluten-ut/**/hs_err_*.log
+            **/gluten-ut/**/core.*
+      - name: Upload golden files
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ github.job }}-golden-files
+          path: /tmp/tpch-approved-plan/**
+
+  spark-test-spark35-slow:
+    needs: build-native-lib-centos-7
+    runs-on: ubuntu-22.04
+    container: apache/gluten:centos-8-jdk8
+    steps:
+      - uses: actions/checkout@v2
+      - name: Download All Artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: velox-native-lib-enhanced-centos-7-${{github.sha}}
+          path: ./cpp/build/releases
+      - name: Download Arrow Jars
+        uses: actions/download-artifact@v4
+        with:
+          name: arrow-jars-enhanced-centos-7-${{github.sha}}
+          path: /root/.m2/repository/org/apache/arrow/
+      - name: Prepare Spark Resources for Spark 3.5.5
+        run: |
+          rm -rf /opt/shims/spark35
+          bash .github/workflows/util/install_spark_resources.sh 3.5
+      - name: Build and Run unit test for Spark 3.5.5 (slow tests)
+        run: |
+          cd $GITHUB_WORKSPACE/
+          yum install -y java-17-openjdk-devel
+          export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
+          export PATH=$JAVA_HOME/bin:$PATH
+          java -version
+          $MVN_CMD clean test -Pspark-3.5 -Pjava-17 -Pbackends-velox -Piceberg 
-Pdelta -Phudi -Ppaimon -Pspark-ut \
+          -DargLine="-Dspark.test.home=/opt/shims/spark35/spark_home/" \
+          -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
+      - name: Upload test report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ github.job }}-report
+          path: '**/surefire-reports/TEST-*.xml'
+      - name: Upload unit tests log files
+        if: ${{ !success() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: ${{ github.job }}-test-log
+          path: |
+            **/target/*.log
+            **/gluten-ut/**/hs_err_*.log
+            **/gluten-ut/**/core.*
diff --git 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
index 7133ae2748..f2fe3e3341 100644
--- 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
+++ 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/extensions/TestGlutenMergeOnReadDelete.java
@@ -60,9 +60,4 @@ public class TestGlutenMergeOnReadDelete extends 
TestMergeOnReadDelete {
   public synchronized void testDeleteWithSnapshotIsolation() throws 
ExecutionException {
     System.out.println("Run timeout");
   }
-
-  @Test
-  public void testDeleteFileThenMetadataDelete() {
-    System.out.println("Does not support metadata deletion");
-  }
 }
diff --git 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
index 3e75ecbc50..c3e921e324 100644
--- 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
+++ 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/source/TestGlutenIcebergSourceHiveTables.java
@@ -16,26 +16,7 @@
  */
 package org.apache.gluten.source;
 
-import org.apache.iceberg.PartitionSpec;
-import org.apache.iceberg.Schema;
-import org.apache.iceberg.catalog.TableIdentifier;
-import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.spark.source.TestIcebergSourceHiveTables;
-import org.apache.iceberg.types.Types;
-import org.junit.Test;
-
-import static org.apache.iceberg.types.Types.NestedField.optional;
 
 // Fallback all the table scan because source table is metadata table with 
format avro.
-public class TestGlutenIcebergSourceHiveTables extends 
TestIcebergSourceHiveTables {
-  private static final Schema SCHEMA =
-      new Schema(
-          optional(1, "id", Types.IntegerType.get()), optional(2, "data", 
Types.StringType.get()));
-
-  @Test
-  public void testAllEntriesTable() {
-    TableIdentifier tableIdentifier = TableIdentifier.of("db", "entries_test");
-    createTable(tableIdentifier, SCHEMA, PartitionSpec.unpartitioned(), 
ImmutableMap.of());
-    System.out.println("Ignore because lack metadata");
-  }
-}
+public class TestGlutenIcebergSourceHiveTables extends 
TestIcebergSourceHiveTables {}
diff --git 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
index a62b4f21f2..17a578bad8 100644
--- 
a/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
+++ 
b/backends-velox/src-iceberg-spark34/test/java/org/apache/gluten/sql/TestGlutenAggregatePushDown.java
@@ -16,54 +16,50 @@
  */
 package org.apache.gluten.sql;
 
-// import org.apache.gluten.TestConfUtil;
-//
-// import org.apache.iceberg.CatalogUtil;
-// import org.apache.iceberg.catalog.Namespace;
-// import org.apache.iceberg.exceptions.AlreadyExistsException;
-// import org.apache.iceberg.hive.HiveCatalog;
-// import org.apache.iceberg.hive.TestHiveMetastore;
-// import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
-// import org.apache.iceberg.spark.SparkTestBase;
-// import org.apache.iceberg.spark.sql.TestAggregatePushDown;
-// import org.apache.spark.sql.SparkSession;
-// import org.junit.BeforeClass;
-// import org.junit.Ignore;
-//
-// import java.util.Map;
+import org.apache.gluten.TestConfUtil;
 
-// The aggregate push down is described in 
https://github.com/apache/iceberg/pull/6252, which uses
-// statistic to get the result by LocalTableScan, Now stats is not supported.
-// @Ignore
-// public class TestGlutenAggregatePushDown extends TestAggregatePushDown {
-//  public TestGlutenAggregatePushDown(
-//      String catalogName, String implementation, Map<String, String> config) 
{
-//    super(catalogName, implementation, config);
-//  }
-//
-//  @BeforeClass
-//  public static void startMetastoreAndSpark() {
-//    SparkTestBase.metastore = new TestHiveMetastore();
-//    metastore.start();
-//    SparkTestBase.hiveConf = metastore.hiveConf();
-//
-//    SparkTestBase.spark =
-//        SparkSession.builder()
-//            .master("local[2]")
-//            .config("spark.sql.iceberg.aggregate_pushdown", "true")
-//            .config(TestConfUtil.GLUTEN_CONF)
-//            .enableHiveSupport()
-//            .getOrCreate();
-//
-//    SparkTestBase.catalog =
-//        (HiveCatalog)
-//            CatalogUtil.loadCatalog(
-//                HiveCatalog.class.getName(), "hive", ImmutableMap.of(), 
hiveConf);
-//
-//    try {
-//      catalog.createNamespace(Namespace.of("default"));
-//    } catch (AlreadyExistsException ignored) {
-//      // the default namespace already exists. ignore the create error
-//    }
-//  }
-// }
+import org.apache.iceberg.CatalogUtil;
+import org.apache.iceberg.catalog.Namespace;
+import org.apache.iceberg.exceptions.AlreadyExistsException;
+import org.apache.iceberg.hive.HiveCatalog;
+import org.apache.iceberg.hive.TestHiveMetastore;
+import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
+import org.apache.iceberg.spark.SparkTestBase;
+import org.apache.iceberg.spark.sql.TestAggregatePushDown;
+import org.apache.spark.sql.SparkSession;
+import org.junit.BeforeClass;
+
+import java.util.Map;
+
+public class TestGlutenAggregatePushDown extends TestAggregatePushDown {
+  public TestGlutenAggregatePushDown(
+      String catalogName, String implementation, Map<String, String> config) {
+    super(catalogName, implementation, config);
+  }
+
+  @BeforeClass
+  public static void startMetastoreAndSpark() {
+    SparkTestBase.metastore = new TestHiveMetastore();
+    metastore.start();
+    SparkTestBase.hiveConf = metastore.hiveConf();
+
+    SparkTestBase.spark =
+        SparkSession.builder()
+            .master("local[2]")
+            .config("spark.sql.iceberg.aggregate_pushdown", "true")
+            .config(TestConfUtil.GLUTEN_CONF)
+            .enableHiveSupport()
+            .getOrCreate();
+
+    SparkTestBase.catalog =
+        (HiveCatalog)
+            CatalogUtil.loadCatalog(
+                HiveCatalog.class.getName(), "hive", ImmutableMap.of(), 
hiveConf);
+
+    try {
+      catalog.createNamespace(Namespace.of("default"));
+    } catch (AlreadyExistsException ignored) {
+      // the default namespace already exists. ignore the create error
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-9335][VL] Enable tests ignored because lack metadata and enable spark35 CI (#10496)

Reply via email to