This is an automated email from the ASF dual-hosted git repository.
yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new d12bf1fa3 [GLUTEN-6980][CORE] In shim poms, use Scala Maven compiler
configuration inherited from parent pom (#6972)
d12bf1fa3 is described below
commit d12bf1fa340761371bf68e5fef2f889b6ba434e3
Author: Hongze Zhang <[email protected]>
AuthorDate: Thu Aug 22 21:03:27 2024 +0800
[GLUTEN-6980][CORE] In shim poms, use Scala Maven compiler configuration
inherited from parent pom (#6972)
This could fix build errors Intellij Idea IDE when scala-2.13 profile and
spark-3.2/spark-3.3/spark-3.4 profiles are toggled on at the same time.
And with essential code cleanups.
---
.../apache/spark/shuffle/GlutenShuffleUtils.scala | 2 +-
.../parquet/GlutenParquetFilterSuite.scala | 1 -
.../parquet/GlutenParquetRowIndexSuite.scala | 1 +
pom.xml | 37 ++--
shims/common/pom.xml | 8 -
.../datasource/GlutenParquetWriterInjects.scala | 16 --
.../org/apache/gluten/sql/shims/SparkShims.scala | 3 +-
shims/spark32/pom.xml | 6 -
.../gluten/sql/shims/spark32/Spark32Shims.scala | 3 +-
.../main/scala/org/apache/spark/ShuffleUtils.scala | 3 +-
.../execution/ExpandOutputPartitioningShim.scala | 1 -
.../sql/execution/FileSourceScanExecShim.scala | 2 +-
.../sql/execution/datasources/WriteFiles.scala | 5 +-
.../execution/datasources/orc/OrcFileFormat.scala | 1 -
.../datasources/parquet/ParquetFileFormat.scala | 1 -
.../datasources/v2/utils/CatalogUtil.scala | 2 +-
shims/spark33/pom.xml | 198 ++++++++++----------
.../gluten/sql/shims/spark33/Spark33Shims.scala | 3 +-
.../main/scala/org/apache/spark/ShuffleUtils.scala | 3 +-
.../execution/ExpandOutputPartitioningShim.scala | 1 -
.../sql/execution/FileSourceScanExecShim.scala | 4 +-
.../sql/execution/datasources/WriteFiles.scala | 5 +-
.../execution/datasources/orc/OrcFileFormat.scala | 1 -
.../datasources/parquet/ParquetFileFormat.scala | 1 -
shims/spark34/pom.xml | 198 ++++++++++----------
.../gluten/sql/shims/spark34/Spark34Shims.scala | 3 +-
.../main/scala/org/apache/spark/ShuffleUtils.scala | 7 +-
.../catalyst/expressions/PromotePrecision.scala | 3 -
.../execution/ExpandOutputPartitioningShim.scala | 1 -
.../sql/execution/FileSourceScanExecShim.scala | 4 +-
.../datasources/v2/BatchScanExecShim.scala | 4 +-
shims/spark35/pom.xml | 200 ++++++++++-----------
.../gluten/sql/shims/spark35/Spark35Shims.scala | 3 +-
.../main/scala/org/apache/spark/ShuffleUtils.scala | 7 +-
.../catalyst/expressions/PromotePrecision.scala | 3 -
.../execution/ExpandOutputPartitioningShim.scala | 1 -
.../sql/execution/FileSourceScanExecShim.scala | 6 +-
.../datasources/v2/BatchScanExecShim.scala | 4 +-
38 files changed, 343 insertions(+), 409 deletions(-)
diff --git
a/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala
b/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala
index 341454470..d55060872 100644
---
a/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala
+++
b/gluten-core/src/main/scala/org/apache/spark/shuffle/GlutenShuffleUtils.scala
@@ -97,7 +97,7 @@ object GlutenShuffleUtils {
endMapIndex: Int,
startPartition: Int,
endPartition: Int
- ): Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])],
Boolean] = {
+ ): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], Boolean] = {
SparkShimLoader.getSparkShims.getShuffleReaderParam(
handle,
startMapIndex,
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala
index 471d88f36..ec326088a 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetFilterSuite.scala
@@ -37,7 +37,6 @@ import org.apache.spark.util.Utils
import org.apache.hadoop.fs.Path
import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate,
Operators}
import org.apache.parquet.filter2.predicate.FilterApi._
-import org.apache.parquet.filter2.predicate.Operators
import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt,
GtEq, Lt, LtEq, NotEq}
import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat,
ParquetOutputFormat}
import org.apache.parquet.hadoop.util.HadoopInputFile
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
index 0113a9228..7a75977bf 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/GlutenParquetRowIndexSuite.scala
@@ -49,6 +49,7 @@ class GlutenParquetRowIndexSuite extends ParquetRowIndexSuite
with GlutenSQLTest
.getBlocks
.asScala
.map(_.getRowCount)
+ .toSeq
}
private def readRowGroupRowCounts(dir: File): Seq[Seq[Long]] = {
diff --git a/pom.xml b/pom.xml
index 4fb327f9f..e95300744 100644
--- a/pom.xml
+++ b/pom.xml
@@ -24,8 +24,8 @@
<description>Apache Gluten(incubating)</description>
<organization>
- <name>Apache</name>
- <url>http://gluten.apache.org</url>
+ <name>Apache</name>
+ <url>http://gluten.apache.org</url>
</organization>
<scm>
@@ -171,6 +171,7 @@
<version>0.1.2</version>
</dependency>
</dependencies> -->
+ <recompileMode>${scala.recompile.mode}</recompileMode>
<args>
<arg>-unchecked</arg>
<arg>-deprecation</arg>
@@ -215,9 +216,24 @@
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
-->
</args>
- <compilerPlugins combine.self="override">
- </compilerPlugins>
</configuration>
+ <executions>
+ <execution>
+ <id>scala-compile-first</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>add-source</goal>
+ <goal>compile</goal>
+ </goals>
+ </execution>
+ <execution>
+ <id>scala-test-compile-first</id>
+ <phase>process-test-resources</phase>
+ <goals>
+ <goal>testCompile</goal>
+ </goals>
+ </execution>
+ </executions>
</plugin>
</plugins>
</pluginManagement>
@@ -471,7 +487,7 @@
<appendAssemblyId>true</appendAssemblyId>
<descriptors>
<descriptor>
- src/assembly/source-assembly.xml
+ src/assembly/source-assembly.xml
</descriptor>
</descriptors>
<finalName>apache-gluten-${project.version}</finalName>
@@ -479,7 +495,7 @@
</execution>
</executions>
</plugin>
- </plugins>
+ </plugins>
</build>
</profile>
</profiles>
@@ -768,7 +784,7 @@
</goals>
<configuration>
<classifier>${scala.binary.version}</classifier>
- <classifier>${sparkbundle.version}</classifier>
+ <classifier>${sparkbundle.version}</classifier>
</configuration>
</execution>
</executions>
@@ -828,6 +844,7 @@
<arg>-Ywarn-unused:imports</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
+ <arg>-Wconf:cat=deprecation:wv,any:e</arg>
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
</args>
</configuration>
@@ -945,7 +962,7 @@
<version>${spotless.version}</version>
<configuration>
<java>
- <toggleOffOn />
+ <toggleOffOn/>
<googleJavaFormat>
<version>1.7</version>
</googleJavaFormat>
@@ -955,7 +972,7 @@
<order>org.apache.gluten,io.substrait.spark,,javax,java,scala,\#</order>
</importOrder>
- <removeUnusedImports />
+ <removeUnusedImports/>
<licenseHeader>
<content>${spotless.license.header}</content>
<delimiter>${spotless.delimiter}</delimiter>
@@ -963,7 +980,7 @@
</java>
<scala>
<!-- make it works `// spotless:off ` -->
- <toggleOffOn />
+ <toggleOffOn/>
<scalafmt>
<version>${spotless.scalafmt.version}</version>
<scalaMajorVersion>${scala.binary.version}</scalaMajorVersion>
diff --git a/shims/common/pom.xml b/shims/common/pom.xml
index 86ee768c3..92cee60b0 100644
--- a/shims/common/pom.xml
+++ b/shims/common/pom.xml
@@ -58,14 +58,6 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
- <configuration>
- <args>
- <arg>-Wconf:cat=deprecation:silent</arg>
- <!--
-
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
- -->
- </args>
- </configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
diff --git
a/shims/common/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala
b/shims/common/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala
index 8144a80b5..ffbec6d89 100644
---
a/shims/common/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala
+++
b/shims/common/src/main/scala/org/apache/gluten/execution/datasource/GlutenParquetWriterInjects.scala
@@ -16,22 +16,6 @@
*/
package org.apache.gluten.execution.datasource
-import org.apache.gluten.GlutenConfig
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.datasources.{BlockStripes, FakeRow,
OutputWriter}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StructType
-
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.mapreduce.TaskAttemptContext
-
-import scala.collection.JavaConverters.mapAsJavaMapConverter
-import scala.collection.mutable
-
object GlutenParquetWriterInjects {
private var INSTANCE: GlutenFormatWriterInjects = _
diff --git
a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
index 5a0946988..fa6ed18e9 100644
--- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala
@@ -179,8 +179,7 @@ trait SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean]
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean]
// Compatible with Spark-3.5 and later
def getShuffleAdvisoryPartitionSize(shuffle: ShuffleExchangeLike):
Option[Long] = None
diff --git a/shims/spark32/pom.xml b/shims/spark32/pom.xml
index 7e9dcb226..9c6322ac5 100644
--- a/shims/spark32/pom.xml
+++ b/shims/spark32/pom.xml
@@ -105,12 +105,6 @@
<plugin>
<groupId>net.alchim31.maven</groupId>
<artifactId>scala-maven-plugin</artifactId>
- <configuration>
- <args>
- <arg>-Wconf:cat=deprecation:silent</arg>
-
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
- </args>
- </configuration>
</plugin>
<plugin>
<groupId>org.scalatest</groupId>
diff --git
a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
index 995d5b087..f62f9031c 100644
---
a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
+++
b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala
@@ -181,8 +181,7 @@ class Spark32Shims extends SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex,
startPartition, endPartition)
}
diff --git a/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala
b/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala
index fc99cc8d1..2cff1a6f5 100644
--- a/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala
+++ b/shims/spark32/src/main/scala/org/apache/spark/ShuffleUtils.scala
@@ -25,8 +25,7 @@ object ShuffleUtils {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
val address = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
handle.shuffleId,
startMapIndex,
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
index b86d8f6ba..7dbad48da 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning,
Partitioning, PartitioningCollection}
import scala.collection.mutable
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
index 6378af3ba..77b3b4809 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -20,7 +20,7 @@ import org.apache.gluten.metrics.GlutenTimeMetric
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, DynamicPruningExpression, Expression,
PlanExpression, Predicate}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, Expression, PlanExpression, Predicate}
import org.apache.spark.sql.execution.datasources.{HadoopFsRelation,
PartitionDirectory}
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
import org.apache.spark.sql.types.StructType
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
index 11d4b7875..356a16942 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
@@ -16,8 +16,7 @@
*/
package org.apache.spark.sql.execution.datasources
-import org.apache.spark.{SparkException, TaskContext}
-import org.apache.spark.internal.io.{FileCommitProtocol,
SparkHadoopWriterUtils}
+import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.BucketSpec
@@ -28,8 +27,6 @@ import
org.apache.spark.sql.connector.write.WriterCommitMessage
import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
import
org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec
-import java.util.Date
-
/**
* This class is copied from Spark 3.4 and modified for Gluten. Spark 3.4
introduced a new operator,
* WriteFiles. In order to support the WriteTransformer in Spark 3.4, we need
to copy the WriteFiles
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 940c47f1a..8ed7614ae 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.orc
-import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenOrcWriterInjects
import org.apache.spark.TaskContext
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index fb03fb5f4..145c36e46 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
-import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenParquetWriterInjects
import org.apache.spark.TaskContext
diff --git
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala
index a5b951c23..810fb2a05 100644
---
a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala
+++
b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/datasources/v2/utils/CatalogUtil.scala
@@ -39,6 +39,6 @@ object CatalogUtil {
throw new UnsupportedOperationException(s"Partitioning by expressions")
}
- (identityCols, bucketSpec)
+ (identityCols.toSeq, bucketSpec)
}
}
diff --git a/shims/spark33/pom.xml b/shims/spark33/pom.xml
index e17a639ba..edab2c870 100644
--- a/shims/spark33/pom.xml
+++ b/shims/spark33/pom.xml
@@ -15,112 +15,106 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.gluten</groupId>
- <artifactId>spark-sql-columnar-shims</artifactId>
- <version>1.3.0-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
+ <parent>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>spark-sql-columnar-shims</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
- <artifactId>spark-sql-columnar-shims-spark33</artifactId>
- <name>Gluten Shims for Spark 3.3</name>
- <packaging>jar</packaging>
+ <artifactId>spark-sql-columnar-shims-spark33</artifactId>
+ <name>Gluten Shims for Spark 3.3</name>
+ <packaging>jar</packaging>
- <dependencies>
- <dependency>
- <groupId>org.apache.gluten</groupId>
- <artifactId>${project.prefix}-shims-common</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>${project.prefix}-shims-common</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
- <!--test-->
- <dependency>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest_${scala.binary.version}</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- </dependency>
+ <!--test-->
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ </dependency>
- </dependencies>
+ </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.scalastyle</groupId>
- <artifactId>scalastyle-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>com.diffplug.spotless</groupId>
- <artifactId>spotless-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <configuration>
- <args>
- <arg>-Wconf:cat=deprecation:silent</arg>
-
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
- </args>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>com.diffplug.spotless</groupId>
+ <artifactId>spotless-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git
a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
index 7b606ea97..168b88275 100644
---
a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
+++
b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala
@@ -273,8 +273,7 @@ class Spark33Shims extends SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex,
startPartition, endPartition)
}
diff --git a/shims/spark33/src/main/scala/org/apache/spark/ShuffleUtils.scala
b/shims/spark33/src/main/scala/org/apache/spark/ShuffleUtils.scala
index a08f0310d..d2b58e67c 100644
--- a/shims/spark33/src/main/scala/org/apache/spark/ShuffleUtils.scala
+++ b/shims/spark33/src/main/scala/org/apache/spark/ShuffleUtils.scala
@@ -25,8 +25,7 @@ object ShuffleUtils {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
val baseShuffleHandle = handle.asInstanceOf[BaseShuffleHandle[K, _, C]]
if (baseShuffleHandle.dependency.isShuffleMergeFinalizedMarked) {
val res =
SparkEnv.get.mapOutputTracker.getPushBasedShuffleMapSizesByExecutorId(
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
index b86d8f6ba..7dbad48da 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning,
Partitioning, PartitioningCollection}
import scala.collection.mutable
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
index 5d3f6275c..ffc8684f5 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -20,8 +20,8 @@ import org.apache.gluten.metrics.GlutenTimeMetric
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, DynamicPruningExpression, Expression,
FileSourceMetadataAttribute, PlanExpression, Predicate}
-import org.apache.spark.sql.execution.datasources.{FileFormat,
HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, Expression, FileSourceMetadataAttribute,
PlanExpression, Predicate}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation,
PartitionDirectory}
import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
import org.apache.spark.sql.types.StructType
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
index 11d4b7875..356a16942 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
@@ -16,8 +16,7 @@
*/
package org.apache.spark.sql.execution.datasources
-import org.apache.spark.{SparkException, TaskContext}
-import org.apache.spark.internal.io.{FileCommitProtocol,
SparkHadoopWriterUtils}
+import org.apache.spark.internal.io.FileCommitProtocol
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.catalog.BucketSpec
@@ -28,8 +27,6 @@ import
org.apache.spark.sql.connector.write.WriterCommitMessage
import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
import
org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec
-import java.util.Date
-
/**
* This class is copied from Spark 3.4 and modified for Gluten. Spark 3.4
introduced a new operator,
* WriteFiles. In order to support the WriteTransformer in Spark 3.4, we need
to copy the WriteFiles
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 08463ab61..979fe9faf 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.orc
-import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenOrcWriterInjects
import org.apache.spark.TaskContext
diff --git
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 8954f5823..7064f1a6f 100644
---
a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++
b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -16,7 +16,6 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
-import org.apache.gluten.GlutenConfig
import org.apache.gluten.execution.datasource.GlutenParquetWriterInjects
import org.apache.spark.TaskContext
diff --git a/shims/spark34/pom.xml b/shims/spark34/pom.xml
index bd1de9fe8..dd4b8d156 100644
--- a/shims/spark34/pom.xml
+++ b/shims/spark34/pom.xml
@@ -15,112 +15,106 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.gluten</groupId>
- <artifactId>spark-sql-columnar-shims</artifactId>
- <version>1.3.0-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
+ <parent>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>spark-sql-columnar-shims</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
- <artifactId>spark-sql-columnar-shims-spark34</artifactId>
- <name>Gluten Shims for Spark 3.4</name>
- <packaging>jar</packaging>
+ <artifactId>spark-sql-columnar-shims-spark34</artifactId>
+ <name>Gluten Shims for Spark 3.4</name>
+ <packaging>jar</packaging>
- <dependencies>
- <dependency>
- <groupId>org.apache.gluten</groupId>
- <artifactId>${project.prefix}-shims-common</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>${project.prefix}-shims-common</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
- <!--test-->
- <dependency>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest_${scala.binary.version}</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- </dependency>
+ <!--test-->
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ </dependency>
- </dependencies>
+ </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.scalastyle</groupId>
- <artifactId>scalastyle-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>com.diffplug.spotless</groupId>
- <artifactId>spotless-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <configuration>
- <args>
- <arg>-Wconf:cat=deprecation:silent</arg>
-
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
- </args>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>com.diffplug.spotless</groupId>
+ <artifactId>spotless-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
index 8b1b7649a..b277139e8 100644
---
a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
+++
b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala
@@ -328,8 +328,7 @@ class Spark34Shims extends SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex,
startPartition, endPartition)
}
diff --git a/shims/spark34/src/main/scala/org/apache/spark/ShuffleUtils.scala
b/shims/spark34/src/main/scala/org/apache/spark/ShuffleUtils.scala
index a08f0310d..c2a6cd5cf 100644
--- a/shims/spark34/src/main/scala/org/apache/spark/ShuffleUtils.scala
+++ b/shims/spark34/src/main/scala/org/apache/spark/ShuffleUtils.scala
@@ -25,8 +25,7 @@ object ShuffleUtils {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
val baseShuffleHandle = handle.asInstanceOf[BaseShuffleHandle[K, _, C]]
if (baseShuffleHandle.dependency.isShuffleMergeFinalizedMarked) {
val res =
SparkEnv.get.mapOutputTracker.getPushBasedShuffleMapSizesByExecutorId(
@@ -35,7 +34,7 @@ object ShuffleUtils {
endMapIndex,
startPartition,
endPartition)
- (res.iter, res.enableBatchFetch)
+ (res.iter.map(b => (b._1, b._2.toSeq)), res.enableBatchFetch)
} else {
val address = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
handle.shuffleId,
@@ -43,7 +42,7 @@ object ShuffleUtils {
endMapIndex,
startPartition,
endPartition)
- (address, true)
+ (address.map(b => (b._1, b._2.toSeq)), true)
}
}
}
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
index 8de5a07fe..b18a79b86 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
@@ -18,9 +18,6 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
-import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
case class PromotePrecision(child: Expression) extends UnaryExpression {
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
index 40fc16d2e..791490064 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioningLike,
Partitioning, PartitioningCollection}
import scala.collection.mutable
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
index bc3893ca2..356438d3c 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -21,8 +21,8 @@ import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, Expression,
FileSourceConstantMetadataAttribute, FileSourceGeneratedMetadataAttribute,
FileSourceMetadataAttribute, PlanExpression, Predicate}
-import org.apache.spark.sql.execution.datasources.{FileFormat,
HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, Expression,
FileSourceConstantMetadataAttribute, FileSourceGeneratedMetadataAttribute,
PlanExpression, Predicate}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation,
PartitionDirectory}
import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.types.StructType
diff --git
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
index bd3b09a01..5fdc3b04d 100644
---
a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
+++
b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
@@ -24,9 +24,7 @@ import
org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
import org.apache.spark.sql.connector.catalog.Table
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
-import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition,
Scan}
-import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
-import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition,
Scan, SupportsRuntimeV2Filtering}
import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.execution.metric.SQLMetric
diff --git a/shims/spark35/pom.xml b/shims/spark35/pom.xml
index 66fbb9100..ed1b59991 100644
--- a/shims/spark35/pom.xml
+++ b/shims/spark35/pom.xml
@@ -15,114 +15,106 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
- <modelVersion>4.0.0</modelVersion>
+ <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.gluten</groupId>
- <artifactId>spark-sql-columnar-shims</artifactId>
- <version>1.3.0-SNAPSHOT</version>
- <relativePath>../pom.xml</relativePath>
- </parent>
+ <parent>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>spark-sql-columnar-shims</artifactId>
+ <version>1.3.0-SNAPSHOT</version>
+ <relativePath>../pom.xml</relativePath>
+ </parent>
- <artifactId>spark-sql-columnar-shims-spark35</artifactId>
- <name>Gluten Shims for Spark 3.5</name>
- <packaging>jar</packaging>
+ <artifactId>spark-sql-columnar-shims-spark35</artifactId>
+ <name>Gluten Shims for Spark 3.5</name>
+ <packaging>jar</packaging>
- <dependencies>
- <dependency>
- <groupId>org.apache.gluten</groupId>
- <artifactId>${project.prefix}-shims-common</artifactId>
- <version>${project.version}</version>
- <scope>compile</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- <optional>true</optional>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-common</artifactId>
- <version>${hadoop.version}</version>
- <scope>provided</scope>
- </dependency>
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.gluten</groupId>
+ <artifactId>${project.prefix}-shims-common</artifactId>
+ <version>${project.version}</version>
+ <scope>compile</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.hadoop</groupId>
+ <artifactId>hadoop-common</artifactId>
+ <version>${hadoop.version}</version>
+ <scope>provided</scope>
+ </dependency>
- <!--test-->
- <dependency>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest_${scala.binary.version}</artifactId>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-core_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
- <type>test-jar</type>
- </dependency>
- <dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-hive_${scala.binary.version}</artifactId>
- <scope>provided</scope>
- </dependency>
+ <!--test-->
+ <dependency>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest_${scala.binary.version}</artifactId>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-core_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+ <type>test-jar</type>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.spark</groupId>
+ <artifactId>spark-hive_${scala.binary.version}</artifactId>
+ <scope>provided</scope>
+ </dependency>
- </dependencies>
+ </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-checkstyle-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.scalastyle</groupId>
- <artifactId>scalastyle-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>com.diffplug.spotless</groupId>
- <artifactId>spotless-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>net.alchim31.maven</groupId>
- <artifactId>scala-maven-plugin</artifactId>
- <configuration>
- <args>
- <arg>-Wconf:cat=deprecation:silent</arg>
- <!--
-
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
- -->
- </args>
- </configuration>
- </plugin>
- <plugin>
- <groupId>org.scalatest</groupId>
- <artifactId>scalatest-maven-plugin</artifactId>
- </plugin>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- </plugin>
- </plugins>
- </build>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalastyle</groupId>
+ <artifactId>scalastyle-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>com.diffplug.spotless</groupId>
+ <artifactId>spotless-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>net.alchim31.maven</groupId>
+ <artifactId>scala-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.scalatest</groupId>
+ <artifactId>scalatest-maven-plugin</artifactId>
+ </plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-compiler-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
index 93785d7a2..6474c74fe 100644
---
a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
+++
b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala
@@ -353,8 +353,7 @@ class Spark35Shims extends SparkShims {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
ShuffleUtils.getReaderParam(handle, startMapIndex, endMapIndex,
startPartition, endPartition)
}
diff --git a/shims/spark35/src/main/scala/org/apache/spark/ShuffleUtils.scala
b/shims/spark35/src/main/scala/org/apache/spark/ShuffleUtils.scala
index a08f0310d..c2a6cd5cf 100644
--- a/shims/spark35/src/main/scala/org/apache/spark/ShuffleUtils.scala
+++ b/shims/spark35/src/main/scala/org/apache/spark/ShuffleUtils.scala
@@ -25,8 +25,7 @@ object ShuffleUtils {
startMapIndex: Int,
endMapIndex: Int,
startPartition: Int,
- endPartition: Int)
- : Tuple2[Iterator[(BlockManagerId, collection.Seq[(BlockId, Long,
Int)])], Boolean] = {
+ endPartition: Int): Tuple2[Iterator[(BlockManagerId, Seq[(BlockId, Long,
Int)])], Boolean] = {
val baseShuffleHandle = handle.asInstanceOf[BaseShuffleHandle[K, _, C]]
if (baseShuffleHandle.dependency.isShuffleMergeFinalizedMarked) {
val res =
SparkEnv.get.mapOutputTracker.getPushBasedShuffleMapSizesByExecutorId(
@@ -35,7 +34,7 @@ object ShuffleUtils {
endMapIndex,
startPartition,
endPartition)
- (res.iter, res.enableBatchFetch)
+ (res.iter.map(b => (b._1, b._2.toSeq)), res.enableBatchFetch)
} else {
val address = SparkEnv.get.mapOutputTracker.getMapSizesByExecutorId(
handle.shuffleId,
@@ -43,7 +42,7 @@ object ShuffleUtils {
endMapIndex,
startPartition,
endPartition)
- (address, true)
+ (address.map(b => (b._1, b._2.toSeq)), true)
}
}
}
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
index 8de5a07fe..b18a79b86 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/catalyst/expressions/PromotePrecision.scala
@@ -18,9 +18,6 @@ package org.apache.spark.sql.catalyst.expressions
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
-import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
case class PromotePrecision(child: Expression) extends UnaryExpression {
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
index 40fc16d2e..791490064 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/ExpandOutputPartitioningShim.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.{InnerLike, JoinType}
import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioningLike,
Partitioning, PartitioningCollection}
import scala.collection.mutable
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
index c8795e31c..5ec4499ec 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/FileSourceScanExecShim.scala
@@ -21,9 +21,9 @@ import org.apache.gluten.sql.shims.SparkShimLoader
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, DynamicPruningExpression, Expression,
FileSourceConstantMetadataAttribute, FileSourceGeneratedMetadataAttribute,
FileSourceMetadataAttribute, PlanExpression, Predicate}
-import org.apache.spark.sql.execution.datasources.{FileFormat,
HadoopFsRelation, PartitionDirectory}
-import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat,
ParquetUtils}
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute,
AttributeReference, BoundReference, Expression,
FileSourceConstantMetadataAttribute, FileSourceGeneratedMetadataAttribute,
PlanExpression, Predicate}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation,
PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.types.StructType
import org.apache.spark.util.collection.BitSet
diff --git
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
index 343070d7f..fb3078d39 100644
---
a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
+++
b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExecShim.scala
@@ -24,9 +24,7 @@ import
org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
import org.apache.spark.sql.connector.catalog.Table
import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
-import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition,
Scan}
-import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
-import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition,
Scan, SupportsRuntimeV2Filtering}
import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
import org.apache.spark.sql.execution.metric.SQLMetric
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]