This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new bf820853f [GLUTEN-5309][VL] Enable Spark3.5 UTs with failed ones
excluded (#5342)
bf820853f is described below
commit bf820853f5557511ea06a22fad6cadd438edc0d0
Author: Yan Ma <[email protected]>
AuthorDate: Fri Apr 12 08:36:11 2024 +0800
[GLUTEN-5309][VL] Enable Spark3.5 UTs with failed ones excluded (#5342)
Enable Spark 3.5 UTs in CI/CD with failed ones skipped.
---
.github/workflows/velox_docker.yml | 49 +++++++++++++++++--
.../execution/VeloxAggregateFunctionsSuite.scala | 12 +++--
.../gluten/execution/VeloxColumnarCacheSuite.scala | 3 +-
.../gluten/execution/VeloxHashJoinSuite.scala | 3 +-
.../apache/gluten/expression/VeloxUdfSuite.scala | 6 ++-
.../execution/VeloxParquetWriteForHiveSuite.scala | 12 +++--
.../sql/execution/VeloxParquetWriteSuite.scala | 6 ++-
gluten-delta/pom.xml | 2 +-
.../apache/gluten/execution/VeloxDeltaSuite.scala | 24 +++++++---
.../gluten/execution/VeloxTPCHIcebergSuite.scala | 3 +-
gluten-ut/pom.xml | 6 ++-
gluten-ut/spark35/pom.xml | 14 +++++-
.../gluten/utils/velox/VeloxTestSettings.scala | 55 +++++++++++++++++++---
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 6 ++-
.../expressions/GlutenExpressionMappingSuite.scala | 6 ++-
pom.xml | 19 +++++---
16 files changed, 184 insertions(+), 42 deletions(-)
diff --git a/.github/workflows/velox_docker.yml
b/.github/workflows/velox_docker.yml
index 17a67d535..07960813e 100644
--- a/.github/workflows/velox_docker.yml
+++ b/.github/workflows/velox_docker.yml
@@ -746,16 +746,59 @@ jobs:
tar --strip-components=1 -xf v3.5.1.tar.gz
spark-3.5.1/sql/core/src/test/resources/ && \
mkdir -p shims/spark35/spark_home/ && \
mv sql shims/spark35/spark_home/
- - name: Build for Spark 3.5.1 (other tests)
+ - name: Build and Run unit test for Spark 3.5.1 (other tests)
run: |
cd $GITHUB_WORKSPACE/ && \
export SPARK_SCALA_VERSION=2.12 && \
export MAVEN_HOME=/usr/lib/maven
export PATH=${PATH}:${MAVEN_HOME}/bin
- mvn -ntp clean install -Pspark-3.5 -Pbackends-velox -Prss -Piceberg
-Pdelta -Pspark-ut
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/"
-DskipTests
+ mvn -ntp clean install -Pspark-3.5 -Pbackends-velox -Prss -Piceberg
-Pdelta -Pspark-ut
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/"
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.SkipTestTags
&& \
+ mvn -ntp test -Pspark-3.5 -Pbackends-velox -Piceberg -Pdelta
-DtagsToExclude=None -DtagsToInclude=org.apache.gluten.tags.UDFTest
- name: Upload golden files
if: failure()
uses: actions/upload-artifact@v4
with:
name: golden-files-spark35
- path: /tmp/tpch-approved-plan/**
\ No newline at end of file
+ path: /tmp/tpch-approved-plan/**
+
+ run-spark-test-spark35-slow:
+ runs-on: ubuntu-20.04
+ container: ghcr.io/facebookincubator/velox-dev:circleci-avx
+ steps:
+ - uses: actions/checkout@v2
+ - name: Build Gluten velox third party
+ run: |
+ yum install sudo patch java-1.8.0-openjdk-devel wget -y && \
+ cd ep/build-velox/src && \
+ ./get_velox.sh && \
+ source /opt/rh/gcc-toolset-9/enable && \
+ ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF
--build_test_utils=ON
+ - name: Build Gluten CPP library
+ run: |
+ cd $GITHUB_WORKSPACE/cpp && \
+ source /opt/rh/gcc-toolset-9/enable && \
+ ./compile.sh --build_velox_backend=ON --build_protobuf=ON
--build_tests=ON --build_examples=ON --build_benchmarks=ON
+ - name: Setup java and maven
+ run: |
+ wget
https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz
+ tar -xvf apache-maven-3.8.8-bin.tar.gz
+ mv apache-maven-3.8.8 /usr/lib/maven
+ - name: Prepare spark.test.home for Spark 3.5.1 (other tests)
+ run: |
+ cd $GITHUB_WORKSPACE/ && \
+ wget
https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz
&& \
+ tar --strip-components=1 -xf spark-3.5.1-bin-hadoop3.tgz
spark-3.5.1-bin-hadoop3/jars/ && \
+ rm -rf spark-3.5.1-bin-hadoop3.tgz && \
+ mkdir -p
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
+ mv jars
$GITHUB_WORKSPACE//shims/spark35/spark_home/assembly/target/scala-2.12 && \
+ cd $GITHUB_WORKSPACE// && \
+ wget https://github.com/apache/spark/archive/refs/tags/v3.5.1.tar.gz
&& \
+ tar --strip-components=1 -xf v3.5.1.tar.gz
spark-3.5.1/sql/core/src/test/resources/ && \
+ mkdir -p shims/spark35/spark_home/ && \
+ mv sql shims/spark35/spark_home/
+ - name: Build and Run unit test for Spark 3.5.1 (slow tests)
+ run: |
+ cd $GITHUB_WORKSPACE/
+ export MAVEN_HOME=/usr/lib/maven
+ export PATH=${PATH}:${MAVEN_HOME}/bin
+ mvn -ntp clean install -Pspark-3.5 -Pbackends-velox -Prss -Piceberg
-Pdelta -Pspark-ut
-DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark35/spark_home/"
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
\ No newline at end of file
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
index f78ef3422..2573725a7 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala
@@ -371,7 +371,8 @@ abstract class VeloxAggregateFunctionsSuite extends
VeloxWholeStageTransformerSu
}
}
- testWithSpecifiedSparkVersion("regr_r2", Some("3.3")) {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("regr_r2", Some("3.3"), Some("3.4")) {
runQueryAndCompare("""
|select regr_r2(l_partkey, l_suppkey) from lineitem;
|""".stripMargin) {
@@ -390,7 +391,8 @@ abstract class VeloxAggregateFunctionsSuite extends
VeloxWholeStageTransformerSu
}
}
- testWithSpecifiedSparkVersion("regr_slope", Some("3.4")) {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("regr_slope", Some("3.4"), Some("3.4")) {
runQueryAndCompare("""
|select regr_slope(l_partkey, l_suppkey) from
lineitem;
|""".stripMargin) {
@@ -409,7 +411,8 @@ abstract class VeloxAggregateFunctionsSuite extends
VeloxWholeStageTransformerSu
}
}
- testWithSpecifiedSparkVersion("regr_intercept", Some("3.4")) {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion("regr_intercept", Some("3.4"), Some("3.4")) {
runQueryAndCompare("""
|select regr_intercept(l_partkey, l_suppkey) from
lineitem;
|""".stripMargin) {
@@ -428,7 +431,8 @@ abstract class VeloxAggregateFunctionsSuite extends
VeloxWholeStageTransformerSu
}
}
- testWithSpecifiedSparkVersion("regr_sxy", Some("3.4")) {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion("regr_sxy", Some("3.4"), Some("3.4")) {
runQueryAndCompare("""
|select regr_sxy(l_partkey, l_suppkey) from lineitem;
|""".stripMargin) {
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala
index bfce8653a..33f0f7dfb 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxColumnarCacheSuite.scala
@@ -76,7 +76,8 @@ class VeloxColumnarCacheSuite extends
VeloxWholeStageTransformerSuite with Adapt
}
}
- test("input row") {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion("input row", Some("3.2"), Some("3.4")) {
withTable("t") {
sql("CREATE TABLE t USING json AS SELECT * FROM values(1, 'a', (2, 'b'),
(3, 'c'))")
runQueryAndCompare("SELECT * FROM t", cache = true) {
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
index 3b3b8871a..b0eba7aa2 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala
@@ -67,7 +67,8 @@ class VeloxHashJoinSuite extends
VeloxWholeStageTransformerSuite {
}
}
- test("generate hash join plan - v2") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("generate hash join plan - v2", Some("3.2"),
Some("3.4")) {
withSQLConf(
("spark.sql.autoBroadcastJoinThreshold", "-1"),
("spark.sql.adaptive.enabled", "false"),
diff --git
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
index a1c50f610..fc8d6c0d4 100644
---
a/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/gluten/expression/VeloxUdfSuite.scala
@@ -71,7 +71,8 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with
SQLHelper {
.set("spark.memory.offHeap.size", "1024MB")
}
- test("test udf") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("test udf", Some("3.2"), Some("3.4")) {
val df = spark.sql("""select
| myudf1(1),
| myudf1(1L),
@@ -84,7 +85,8 @@ abstract class VeloxUdfSuite extends GlutenQueryTest with
SQLHelper {
.sameElements(Array(Row(6, 6L, 105, Date.valueOf("2024-03-30")))))
}
- test("test udaf") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("test udaf", Some("3.2"), Some("3.4")) {
val df = spark.sql("""select
| myavg(1),
| myavg(1L),
diff --git
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteForHiveSuite.scala
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteForHiveSuite.scala
index b9644d4e8..b830a2461 100644
---
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteForHiveSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteForHiveSuite.scala
@@ -99,7 +99,11 @@ class VeloxParquetWriteForHiveSuite extends GlutenQueryTest
with SQLTestUtils {
_.getMessage.toString.contains("Use Gluten partition write for hive"))
== native)
}
- test("test hive static partition write table") {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion(
+ "test hive static partition write table",
+ Some("3.2"),
+ Some("3.4")) {
withTable("t") {
spark.sql(
"CREATE TABLE t (c int, d long, e long)" +
@@ -137,7 +141,8 @@ class VeloxParquetWriteForHiveSuite extends GlutenQueryTest
with SQLTestUtils {
}
}
- test("test hive write table") {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion("test hive write table", Some("3.2"),
Some("3.4")) {
withTable("t") {
spark.sql("CREATE TABLE t (c int) STORED AS PARQUET")
withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
@@ -152,7 +157,8 @@ class VeloxParquetWriteForHiveSuite extends GlutenQueryTest
with SQLTestUtils {
}
}
- test("test hive write dir") {
+ // Disable for Sparke3.5.
+ testWithSpecifiedSparkVersion("test hive write dir", Some("3.2"),
Some("3.4")) {
withTempPath {
f =>
// compatible with Spark3.3 and later
diff --git
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala
index 21144d4f4..f2506211f 100644
---
a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala
+++
b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala
@@ -51,7 +51,11 @@ class VeloxParquetWriteSuite extends
VeloxWholeStageTransformerSuite {
}
}
- test("test write parquet with compression codec") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion(
+ "test write parquet with compression codec",
+ Some("3.2"),
+ Some("3.4")) {
// compression codec details see `VeloxParquetDatasource.cc`
Seq("snappy", "gzip", "zstd", "lz4", "none", "uncompressed")
.foreach {
diff --git a/gluten-delta/pom.xml b/gluten-delta/pom.xml
index 45d7b3da2..a66005f2b 100755
--- a/gluten-delta/pom.xml
+++ b/gluten-delta/pom.xml
@@ -27,7 +27,7 @@
</dependency>
<dependency>
<groupId>io.delta</groupId>
- <artifactId>delta-core_${scala.binary.version}</artifactId>
+ <artifactId>${delta.package.name}_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
diff --git
a/gluten-delta/src/test/scala/org/apache/gluten/execution/VeloxDeltaSuite.scala
b/gluten-delta/src/test/scala/org/apache/gluten/execution/VeloxDeltaSuite.scala
index 1e0be6ff1..9347f7fa5 100644
---
a/gluten-delta/src/test/scala/org/apache/gluten/execution/VeloxDeltaSuite.scala
+++
b/gluten-delta/src/test/scala/org/apache/gluten/execution/VeloxDeltaSuite.scala
@@ -42,7 +42,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
// IdMapping is supported in Delta 2.2 (related to Spark3.3.1)
- testWithSpecifiedSparkVersion("column mapping mode = id", Some("3.3.1")) {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("column mapping mode = id", Some("3.3.1"),
Some("3.4")) {
withTable("delta_cm1") {
spark.sql(s"""
|create table delta_cm1 (id int, name string) using delta
@@ -62,7 +63,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
// NameMapping is supported in Delta 2.0 (related to Spark3.2.0)
- testWithSpecifiedSparkVersion("column mapping mode = name", Some("3.2.0")) {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("column mapping mode = name", Some("3.2.0"),
Some("3.4")) {
withTable("delta_cm2") {
spark.sql(s"""
|create table delta_cm2 (id int, name string) using delta
@@ -81,7 +83,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
}
- testWithSpecifiedSparkVersion("delta: time travel", Some("3.3.1")) {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("delta: time travel", Some("3.3.1"),
Some("3.4")) {
withTable("delta_tm") {
spark.sql(s"""
|create table delta_tm (id int, name string) using delta
@@ -106,7 +109,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
}
- test("delta: partition filters") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("delta: partition filters", Some("3.2"),
Some("3.4")) {
withTable("delta_pf") {
spark.sql(s"""
|create table delta_pf (id int, name string) using delta
partitioned by (name)
@@ -125,7 +129,11 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
}
- test("basic test with stats.skipping disabled") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion(
+ "basic test with stats.skipping disabled",
+ Some("3.2"),
+ Some("3.4")) {
withTable("delta_test2") {
withSQLConf("spark.databricks.delta.stats.skipping" -> "false") {
spark.sql(s"""
@@ -145,7 +153,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
}
- test("column mapping with complex type") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("column mapping with complex type",
Some("3.2"), Some("3.4")) {
withTable("t1") {
val simpleNestedSchema = new StructType()
.add("a", StringType, true)
@@ -195,7 +204,8 @@ class VeloxDeltaSuite extends WholeStageTransformerSuite {
}
}
- testWithSpecifiedSparkVersion("deletion vector", Some("3.4.2")) {
+ // Disable for Spark3.5
+ testWithSpecifiedSparkVersion("deletion vector", Some("3.4.2"),
Some("3.4.2")) {
withTempPath {
p =>
import testImplicits._
diff --git
a/gluten-iceberg/src/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
b/gluten-iceberg/src/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
index b452e9340..f997693f8 100644
---
a/gluten-iceberg/src/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
+++
b/gluten-iceberg/src/test/scala/org/apache/gluten/execution/VeloxTPCHIcebergSuite.scala
@@ -41,6 +41,7 @@ class VeloxTPCHIcebergSuite extends VeloxTPCHSuite {
override protected def sparkConf: SparkConf = {
super.sparkConf
+ .set("spark.executor.instances", "1")
.set("spark.executor.memory", "4g")
.set(
"spark.sql.extensions",
@@ -106,7 +107,7 @@ class VeloxPartitionedTableTPCHIcebergSuite extends
VeloxTPCHIcebergSuite {
val tableDF = spark.read.format(fileFormat).load(tablePath)
tableDF
- .repartition(50)
+ .repartition(800)
.write
.format("iceberg")
.partitionBy(table.partitionColumns: _*)
diff --git a/gluten-ut/pom.xml b/gluten-ut/pom.xml
index ace5fd43b..8015b5cec 100644
--- a/gluten-ut/pom.xml
+++ b/gluten-ut/pom.xml
@@ -84,7 +84,7 @@
</dependency>
<dependency>
<groupId>io.delta</groupId>
- <artifactId>delta-core_${scala.binary.version}</artifactId>
+ <artifactId>${delta.package.name}_${scala.binary.version}</artifactId>
<scope>provided</scope>
</dependency>
@@ -119,18 +119,22 @@
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
+ <version>${fasterxml.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-annotations</artifactId>
+ <version>${fasterxml.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-core</artifactId>
+ <version>${fasterxml.version}</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.module</groupId>
<artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
+ <version>${fasterxml.version}</version>
</dependency>
<dependency>
<groupId>org.scalatestplus</groupId>
diff --git a/gluten-ut/spark35/pom.xml b/gluten-ut/spark35/pom.xml
index a731ade17..8f0a5605b 100644
--- a/gluten-ut/spark35/pom.xml
+++ b/gluten-ut/spark35/pom.xml
@@ -25,10 +25,22 @@
<dependency>
<groupId>org.apache.parquet</groupId>
<artifactId>parquet-column</artifactId>
- <version>1.12.3</version>
+ <version>1.13.1</version>
<scope>test</scope>
<classifier>tests</classifier>
</dependency>
+ <dependency>
+ <groupId>org.apache.parquet</groupId>
+ <artifactId>parquet-common</artifactId>
+ <version>1.13.1</version>
+ <scope>test</scope>
+ <classifier>tests</classifier>
+ </dependency>
+ <dependency>
+ <groupId>com.fasterxml.jackson.core</groupId>
+ <artifactId>jackson-core</artifactId>
+ <version>2.15.1</version>
+ </dependency>
</dependencies>
<profiles>
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index c63bd12cf..b33c0780d 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -20,11 +20,11 @@ import org.apache.gluten.utils.{BackendTestSettings,
SQLQueryTestSettings}
import org.apache.spark.sql._
import
org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite,
GlutenBitwiseExpressionsSuite, GlutenCastSuite,
GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite,
GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite,
GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite,
GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite,
GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite,
GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPr [...]
-import
org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite,
GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite,
GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter,
GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite,
GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite,
GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite,
GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapa
[...]
+import
org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite,
GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite,
GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter,
GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite,
GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite,
GlutenLocalScanSuite, GlutenMetadataColumnSuite,
GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWrit
[...]
import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite,
GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite,
GlutenQueryParsingErrorsSuite}
import org.apache.spark.sql.execution.{FallbackStrategiesSuite,
GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite,
GlutenExchangeSuite, GlutenLocalBroadcastExchangeSuite,
GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite,
GlutenSameResultSuite, GlutenSortSuite, GlutenSQLAggregateFunctionSuite,
GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite}
import
org.apache.spark.sql.execution.adaptive.velox.VeloxAdaptiveQueryExecSuite
-import org.apache.spark.sql.execution.datasources.{GlutenBucketingUtilsSuite,
GlutenCSVReadSchemaSuite, GlutenDataSourceStrategySuite, GlutenDataSourceSuite,
GlutenFileFormatWriterSuite, GlutenFileIndexSuite,
GlutenFileMetadataStructSuite, GlutenFileSourceStrategySuite,
GlutenHadoopFileLinesReaderSuite, GlutenHeaderCSVReadSchemaSuite,
GlutenJsonReadSchemaSuite, GlutenMergedOrcReadSchemaSuite,
GlutenMergedParquetReadSchemaSuite, GlutenOrcCodecSuite,
GlutenOrcReadSchemaSuite, GlutenOrcV1Ag [...]
+import org.apache.spark.sql.execution.datasources.{GlutenBucketingUtilsSuite,
GlutenCSVReadSchemaSuite, GlutenDataSourceStrategySuite, GlutenDataSourceSuite,
GlutenFileFormatWriterSuite, GlutenFileIndexSuite,
GlutenFileMetadataStructSuite, GlutenFileSourceStrategySuite,
GlutenHadoopFileLinesReaderSuite, GlutenHeaderCSVReadSchemaSuite,
GlutenMergedOrcReadSchemaSuite, GlutenMergedParquetReadSchemaSuite,
GlutenOrcCodecSuite, GlutenOrcReadSchemaSuite,
GlutenOrcV1AggregatePushDownSuite, Glute [...]
import
org.apache.spark.sql.execution.datasources.binaryfile.GlutenBinaryFileFormatSuite
import
org.apache.spark.sql.execution.datasources.csv.{GlutenCSVLegacyTimeParserSuite,
GlutenCSVv1Suite, GlutenCSVv2Suite}
import
org.apache.spark.sql.execution.datasources.exchange.GlutenValidateRequirementsSuite
@@ -56,7 +56,8 @@ class VeloxTestSettings extends BackendTestSettings {
// Rewrite the following tests in GlutenDataSourceV2Suite.
.exclude("partitioning reporting")
.exclude("ordering and partitioning reporting")
- enableSuite[GlutenDeleteFromTableSuite]
+ // Disable for Spark3.5.
+ // enableSuite[GlutenDeleteFromTableSuite]
enableSuite[GlutenFileDataSourceV2FallBackSuite]
// DISABLED: GLUTEN-4893 Vanilla UT checks scan operator by exactly
matching the class type
.exclude("Fallback Parquet V2 to V1")
@@ -123,6 +124,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenHashExpressionsSuite]
enableSuite[GlutenIntervalExpressionsSuite]
enableSuite[GlutenJsonFunctionsSuite]
+ // Disable for Spark3.5.
+ .exclude("SPARK-42782: Hive compatibility check for get_json_object")
// Velox does not support single quotes in get_json_object function.
.exclude("function get_json_object - support single quotes")
enableSuite[GlutenLiteralExpressionSuite]
@@ -181,6 +184,7 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("column pruning - non-readable file")
enableSuite[GlutenCSVv1Suite]
enableSuite[GlutenCSVv2Suite]
+ .exclude("Gluten - test for FAILFAST parsing mode")
enableSuite[GlutenCSVLegacyTimeParserSuite]
enableSuite[GlutenJsonV1Suite]
// FIXME: Array direct selection fails
@@ -593,6 +597,8 @@ class VeloxTestSettings extends BackendTestSettings {
// Rewrite by just removing test timestamp.
.exclude("test reading unaligned pages - test all types")
enableSuite[GlutenParquetCompressionCodecPrecedenceSuite]
+ // Disable for Spark3.5.
+ .exclude("Create parquet table with compression")
enableSuite[GlutenParquetDeltaByteArrayEncodingSuite]
enableSuite[GlutenParquetDeltaEncodingInteger]
enableSuite[GlutenParquetDeltaEncodingLong]
@@ -663,6 +669,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenParquetV2PartitionDiscoverySuite]
enableSuite[GlutenParquetProtobufCompatibilitySuite]
enableSuite[GlutenParquetV1QuerySuite]
+ // Disable for Spark3.5.
+ .exclude("row group skipping doesn't overflow when reading into larger
type")
// Unsupport spark.sql.files.ignoreCorruptFiles.
.exclude("Enabling/disabling ignoreCorruptFiles")
// decimal failed ut
@@ -678,6 +686,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude(
"SPARK-26677: negated null-safe equality comparison should not filter
matched row groups")
enableSuite[GlutenParquetV2QuerySuite]
+ // Disable for Spark3.5.
+ .exclude("row group skipping doesn't overflow when reading into larger
type")
// Unsupport spark.sql.files.ignoreCorruptFiles.
.exclude("Enabling/disabling ignoreCorruptFiles")
// decimal failed ut
@@ -727,6 +737,10 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFileFormatWriterSuite]
enableSuite[GlutenFileIndexSuite]
enableSuite[GlutenFileMetadataStructSuite]
+ // Disbale as failed in Spark3.5.
+ .exclude("SPARK-41896: Filter on row_index and a stored column at the same
time")
+ .exclude("SPARK-43450: Filter on full _metadata column struct")
+ .exclude("SPARK-43450: Filter on aliased _metadata.row_index")
enableSuite[GlutenParquetV1AggregatePushDownSuite]
enableSuite[GlutenParquetV2AggregatePushDownSuite]
enableSuite[GlutenOrcV1AggregatePushDownSuite]
@@ -745,7 +759,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenPruneFileSourcePartitionsSuite]
enableSuite[GlutenCSVReadSchemaSuite]
enableSuite[GlutenHeaderCSVReadSchemaSuite]
- enableSuite[GlutenJsonReadSchemaSuite]
+ // Disable for Spark3.5.
+ // enableSuite[GlutenJsonReadSchemaSuite]
enableSuite[GlutenOrcReadSchemaSuite]
.exclude("append column into middle")
.exclude("hide column in the middle")
@@ -829,6 +844,9 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenBroadcastExchangeSuite]
enableSuite[GlutenLocalBroadcastExchangeSuite]
enableSuite[GlutenCoalesceShufflePartitionsSuite]
+ // Disable for Spark3.5.
+ .exclude("SPARK-46590 adaptive query execution works correctly with
broadcast join and union")
+ .exclude("SPARK-46590 adaptive query execution works correctly with
cartesian join and union")
.excludeByPrefix("determining the number of reducers")
enableSuite[GlutenExchangeSuite]
// ColumnarShuffleExchangeExec does not support doExecute() method
@@ -876,6 +894,11 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("disable bucketing when the output doesn't contain all bucketing
columns")
.excludeByPrefix("bucket coalescing is applied when join expressions
match")
enableSuite[GlutenBucketedWriteWithoutHiveSupportSuite]
+ .exclude("write bucketed data")
+ .exclude("write bucketed data with sortBy")
+ .exclude("write bucketed data without partitionBy")
+ .exclude("write bucketed data without partitionBy with sortBy")
+ .exclude("write bucketed data with bucketing disabled")
enableSuite[GlutenCreateTableAsSelectSuite]
// TODO Gluten can not catch the spark exception in Driver side.
.exclude("CREATE TABLE USING AS SELECT based on the file without write
permission")
@@ -889,6 +912,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFilteredScanSuite]
enableSuite[GlutenFiltersSuite]
enableSuite[GlutenInsertSuite]
+ // Disable for Spark3.5.
+ .excludeByPrefix("Gluten - SPARK-39557")
// the native write staing dir is differnt with vanilla Spark for coustom
partition paths
.exclude("SPARK-35106: Throw exception when rename custom partition paths
returns false")
.exclude("Stop task set if FileAlreadyExistsException was thrown")
@@ -901,6 +926,8 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("SPARK-39557 INSERT INTO statements with tables with struct
defaults")
.exclude("SPARK-39557 INSERT INTO statements with tables with map
defaults")
enableSuite[GlutenPartitionedWriteSuite]
+ // Disable for Spark3.5.
+ .exclude("SPARK-37231, SPARK-37240: Dynamic writes/reads of ANSI interval
partitions")
enableSuite[GlutenPathOptionSuite]
enableSuite[GlutenPrunedScanSuite]
enableSuite[GlutenResolvedDataSourceSuite]
@@ -911,6 +938,8 @@ class VeloxTestSettings extends BackendTestSettings {
// requires resource files from Vanilla spark jar
.exclude("SPARK-32908: maximum target error in percentile_approx")
enableSuite[GlutenCachedTableSuite]
+ // Disable for Spark3.5.
+ .exclude("A cached table preserves the partitioning and ordering of its
cached SparkPlan")
.exclude("InMemoryRelation statistics")
// Extra ColumnarToRow is needed to transform vanilla columnar data to
gluten columnar data.
.exclude("SPARK-37369: Avoid redundant ColumnarToRow transition on
InMemoryTableScan")
@@ -927,6 +956,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenCTEInlineSuiteAEOff]
enableSuite[GlutenCTEInlineSuiteAEOn]
enableSuite[GlutenDataFrameAggregateSuite]
+ // Disable for Spark3.5.
+ .exclude("SPARK-43876: Enable fast hashmap for distinct queries")
.exclude(
"zero moments", // [velox does not return NaN]
"SPARK-26021: NaN and -0.0 in grouping expressions", // NaN case
@@ -939,6 +970,10 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenDataFrameAsOfJoinSuite]
enableSuite[GlutenDataFrameComplexTypeSuite]
enableSuite[GlutenDataFrameFunctionsSuite]
+ // Disabled as failed in Spark3.5.
+ .exclude("array_size function")
+ .exclude("cardinality function")
+ .exclude("SPARK-40470: array_zip should return field names in
GetArrayStructFields")
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
.exclude("aggregate function - array for non-primitive type")
@@ -1011,6 +1046,8 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenDataFrameWriterV2Suite]
enableSuite[GlutenDatasetAggregatorSuite]
enableSuite[GlutenDatasetCacheSuite]
+ // Disable for Spark3.5.
+ // .exclude("SPARK-27739 Save stats from optimized plan")
enableSuite[GlutenDatasetOptimizationSuite]
enableSuite[GlutenDatasetPrimitiveSuite]
enableSuite[GlutenDatasetSerializerRegistratorSuite]
@@ -1081,6 +1118,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("Merge runtime bloom filters")
enableSuite[GlutenIntervalFunctionsSuite]
enableSuite[GlutenJoinSuite]
+ // Disable for Spark3.5.
+ .exclude(
+ "SPARK-36612: Support left outer join build left or right outer join
build right in shuffled hash join")
// exclude as it check spark plan
.exclude("SPARK-36794: Ignore duplicated key when building relation for
semi/anti hash join")
// exclude as it check for SMJ node
@@ -1097,8 +1137,6 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenReplaceNullWithFalseInPredicateEndToEndSuite]
enableSuite[GlutenScalaReflectionRelationSuite]
enableSuite[GlutenSerializationSuite]
- // following UT is removed in spark3.3.1
- // enableSuite[GlutenSimpleShowCreateTableSuite]
enableSuite[GlutenFileSourceSQLInsertTestSuite]
enableSuite[GlutenDSV2SQLInsertTestSuite]
enableSuite[GlutenSQLQuerySuite]
@@ -1128,9 +1166,14 @@ class VeloxTestSettings extends BackendTestSettings {
"SPARK-27619: When spark.sql.legacy.allowHashOnMapType is true, hash can
be used on Maptype")
enableSuite[GlutenSQLQueryTestSuite]
enableSuite[GlutenStatisticsCollectionSuite]
+ // Disable for Spark3.5.
+ .exclude("statistics collection of a table with zero column")
// The output byte size of Velox is different
.exclude("SPARK-33687: analyze all tables in a specific database")
enableSuite[GlutenSubquerySuite]
+ // Disable for Spark3.5.
+ .exclude("SPARK-40615: Check unsupported data type when decorrelating
subqueries")
+ .exclude("SPARK-40618: Regression test for merging subquery bug with
nested subqueries")
.excludeByPrefix(
"SPARK-26893" // Rewrite this test because it checks Spark's physical
operators.
)
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index e2bcf2f3a..9f96fa0b0 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -224,7 +224,11 @@ class GlutenSQLQueryTestSuite
"array.sql", // blocked by VELOX-5768
"higher-order-functions.sql", // blocked by VELOX-5768
"udf/udf-window.sql", // Local window fixes are not added.
- "window.sql" // Local window fixes are not added.
+ "window.sql", // Local window fixes are not added.
+ // Disable for Spark 3.
+ "group-by.sql",
+ "subquery/scalar-subquery/scalar-subquery-select.sql",
+ "udf/udf-group-by.sql - Scala UDF"
) ++ otherIgnoreList ++ udafIgnoreList
// List of supported cases to run with a certain backend, in lower case.
diff --git
a/gluten-ut/test/src/test/scala/org/apache/gluten/expressions/GlutenExpressionMappingSuite.scala
b/gluten-ut/test/src/test/scala/org/apache/gluten/expressions/GlutenExpressionMappingSuite.scala
index 39e18b0d5..80950e537 100644
---
a/gluten-ut/test/src/test/scala/org/apache/gluten/expressions/GlutenExpressionMappingSuite.scala
+++
b/gluten-ut/test/src/test/scala/org/apache/gluten/expressions/GlutenExpressionMappingSuite.scala
@@ -48,7 +48,8 @@ class GlutenExpressionMappingSuite
conf
}
- test("test expression blacklist") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("test expression blacklist", Some("3.2"),
Some("3.4")) {
val names = ExpressionMappings.expressionsMap.values.toSet
assert(names.contains("regexp_replace"))
assert(names.contains("regexp_extract"))
@@ -69,7 +70,8 @@ class GlutenExpressionMappingSuite
}
}
- test("test blacklisting regexp expressions") {
+ // Disable for Spark3.5.
+ testWithSpecifiedSparkVersion("test blacklisting regexp expressions",
Some("3.2"), Some("3.4")) {
val names = ExpressionMappings.expressionsMap.values.toSet
assert(names.contains("rlike"))
assert(names.contains("regexp_replace"))
diff --git a/pom.xml b/pom.xml
index b65f17314..0707d348d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -136,7 +136,8 @@
<sparkbundle.version>3.2</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark32</sparkshim.artifactId>
<spark.version>3.2.2</spark.version>
- <iceberg.version>1.3.1</iceberg.version>
+ <iceberg.version>1.3.1</iceberg.version>
+ <delta.package.name>delta-core</delta.package.name>
<delta.version>2.0.1</delta.version>
<delta.binary.version>20</delta.binary.version>
</properties>
@@ -148,7 +149,8 @@
<sparkshim.artifactId>spark-sql-columnar-shims-spark33</sparkshim.artifactId>
<spark.version>3.3.1</spark.version>
<!-- keep using iceberg v1.3.1 for parquet compatibilty. -->
- <iceberg.version>1.3.1</iceberg.version>
+ <iceberg.version>1.3.1</iceberg.version>
+ <delta.package.name>delta-core</delta.package.name>
<delta.version>2.2.0</delta.version>
<delta.binary.version>22</delta.binary.version>
</properties>
@@ -159,7 +161,8 @@
<sparkbundle.version>3.4</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark34</sparkshim.artifactId>
<spark.version>3.4.2</spark.version>
- <iceberg.version>1.4.3</iceberg.version>
+ <iceberg.version>1.5.0</iceberg.version>
+ <delta.package.name>delta-core</delta.package.name>
<delta.version>2.4.0</delta.version>
<delta.binary.version>24</delta.binary.version>
</properties>
@@ -170,9 +173,11 @@
<sparkbundle.version>3.5</sparkbundle.version>
<sparkshim.artifactId>spark-sql-columnar-shims-spark35</sparkshim.artifactId>
<spark.version>3.5.1</spark.version>
- <iceberg.version>1.4.3</iceberg.version>
- <delta.version>2.4.0</delta.version>
- <delta.binary.version>24</delta.binary.version>
+ <iceberg.version>1.5.0</iceberg.version>
+ <delta.package.name>delta-spark</delta.package.name>
+ <delta.version>3.1.0</delta.version>
+ <delta.binary.version>31</delta.binary.version>
+ <fasterxml.version>2.15.1</fasterxml.version>
<hadoop.version>3.3.4</hadoop.version>
</properties>
</profile>
@@ -483,7 +488,7 @@
</dependency>
<dependency>
<groupId>io.delta</groupId>
- <artifactId>delta-core_${scala.binary.version}</artifactId>
+ <artifactId>${delta.package.name}_${scala.binary.version}</artifactId>
<version>${delta.version}</version>
<scope>provided</scope>
<exclusions>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]