This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new ecf61ee2f [CORE] Add a compilation-time check to forbid case-class
inheritance (#5729)
ecf61ee2f is described below
commit ecf61ee2f4bb8fefb4a48635c293cb5d7fe18fa1
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue May 14 13:05:50 2024 +0800
[CORE] Add a compilation-time check to forbid case-class inheritance (#5729)
---
.../org/apache/spark/sql/delta/DeltaLog.scala | 54 +++++++++++++++-------
.../org/apache/spark/sql/delta/DeltaLog.scala | 52 ++++++++++++++-------
.../spark/shuffle/HashPartitioningWrapper.scala | 1 +
.../sql/delta/catalog/ClickHouseTableV2.scala | 2 +
.../v2/clickhouse/metadata/AddFileTags.scala | 1 +
pom.xml | 18 +++++++-
6 files changed, 93 insertions(+), 35 deletions(-)
diff --git
a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
index 00820a006..0f6455997 100644
---
a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
+++
b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
@@ -81,9 +81,10 @@ class DeltaLog private (
with SnapshotManagement
with DeltaFileFormat
with ReadChecksum {
-
import org.apache.spark.sql.delta.util.FileNames._
+ import DeltaLog._
+
implicit private lazy val _clock = clock
protected def spark = SparkSession.active
@@ -442,8 +443,8 @@ class DeltaLog private (
val fileIndex =
TahoeLogFileIndex(spark, this, dataPath, snapshotToUse,
partitionFilters, isTimeTravelQuery)
- var bucketSpec: Option[BucketSpec] =
ClickHouseTableV2.getTable(this).bucketOption
- new HadoopFsRelation(
+ val bucketSpec: Option[BucketSpec] =
ClickHouseTableV2.getTable(this).bucketOption
+ new DeltaHadoopFsRelation(
fileIndex,
partitionSchema =
DeltaColumnMapping.dropColumnMappingMetadata(snapshotToUse.metadata.partitionSchema),
@@ -460,20 +461,9 @@ class DeltaLog private (
// conflict with `DeltaLog.options`.
snapshotToUse.metadata.format.options ++ options
)(
- spark
- ) with InsertableRelation {
- def insert(data: DataFrame, overwrite: Boolean): Unit = {
- val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
- WriteIntoDelta(
- deltaLog = DeltaLog.this,
- mode = mode,
- new DeltaOptions(Map.empty[String, String], spark.sessionState.conf),
- partitionColumns = Seq.empty,
- configuration = Map.empty,
- data = data
- ).run(spark)
- }
- }
+ spark,
+ this
+ )
}
override def fileFormat(metadata: Metadata = metadata): FileFormat =
@@ -482,6 +472,36 @@ class DeltaLog private (
}
object DeltaLog extends DeltaLogging {
+
@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+ private class DeltaHadoopFsRelation(
+ location: FileIndex,
+ partitionSchema: StructType,
+ // The top-level columns in `dataSchema` should match the actual
physical file schema, otherwise
+ // the ORC data source may not work with the by-ordinal mode.
+ dataSchema: StructType,
+ bucketSpec: Option[BucketSpec],
+ fileFormat: FileFormat,
+ options: Map[String, String])(sparkSession: SparkSession, deltaLog:
DeltaLog)
+ extends HadoopFsRelation(
+ location,
+ partitionSchema,
+ dataSchema,
+ bucketSpec,
+ fileFormat,
+ options)(sparkSession)
+ with InsertableRelation {
+ def insert(data: DataFrame, overwrite: Boolean): Unit = {
+ val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+ WriteIntoDelta(
+ deltaLog = deltaLog,
+ mode = mode,
+ new DeltaOptions(Map.empty[String, String],
sparkSession.sessionState.conf),
+ partitionColumns = Seq.empty,
+ configuration = Map.empty,
+ data = data
+ ).run(sparkSession)
+ }
+ }
/**
* The key type of `DeltaLog` cache. It's a pair of the canonicalized table
path and the file
diff --git
a/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
b/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
index bbf0bdc91..4cab6454d 100644
---
a/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
+++
b/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
@@ -85,9 +85,10 @@ class DeltaLog private (
with SnapshotManagement
with DeltaFileFormat
with ReadChecksum {
-
import org.apache.spark.sql.delta.util.FileNames._
+ import DeltaLog._
+
implicit private lazy val _clock = clock
protected def spark = SparkSession.active
@@ -483,7 +484,7 @@ class DeltaLog private (
val fileIndex =
TahoeLogFileIndex(spark, this, dataPath, snapshotToUse,
partitionFilters, isTimeTravelQuery)
var bucketSpec: Option[BucketSpec] =
ClickHouseTableV2.getTable(this).bucketOption
- new HadoopFsRelation(
+ new DeltaHadoopFsRelation(
fileIndex,
partitionSchema =
DeltaColumnMapping.dropColumnMappingMetadata(snapshotToUse.metadata.partitionSchema),
@@ -500,20 +501,9 @@ class DeltaLog private (
// conflict with `DeltaLog.options`.
snapshotToUse.metadata.format.options ++ options
)(
- spark
- ) with InsertableRelation {
- def insert(data: DataFrame, overwrite: Boolean): Unit = {
- val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
- WriteIntoDelta(
- deltaLog = DeltaLog.this,
- mode = mode,
- new DeltaOptions(Map.empty[String, String], spark.sessionState.conf),
- partitionColumns = Seq.empty,
- configuration = Map.empty,
- data = data
- ).run(spark)
- }
- }
+ spark,
+ this
+ )
}
/**
@@ -566,6 +556,36 @@ class DeltaLog private (
}
object DeltaLog extends DeltaLogging {
+
@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+ private class DeltaHadoopFsRelation(
+ location: FileIndex,
+ partitionSchema: StructType,
+ // The top-level columns in `dataSchema` should match the actual
physical file schema, otherwise
+ // the ORC data source may not work with the by-ordinal mode.
+ dataSchema: StructType,
+ bucketSpec: Option[BucketSpec],
+ fileFormat: FileFormat,
+ options: Map[String, String])(sparkSession: SparkSession, deltaLog:
DeltaLog)
+ extends HadoopFsRelation(
+ location,
+ partitionSchema,
+ dataSchema,
+ bucketSpec,
+ fileFormat,
+ options)(sparkSession)
+ with InsertableRelation {
+ def insert(data: DataFrame, overwrite: Boolean): Unit = {
+ val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+ WriteIntoDelta(
+ deltaLog = deltaLog,
+ mode = mode,
+ new DeltaOptions(Map.empty[String, String],
sparkSession.sessionState.conf),
+ partitionColumns = Seq.empty,
+ configuration = Map.empty,
+ data = data
+ ).run(sparkSession)
+ }
+ }
/**
* The key type of `DeltaLog` cache. It's a pair of the canonicalized table
path and the file
diff --git
a/gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
similarity index 94%
rename from
gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
rename to
backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
index 06ce8fe0f..bf1cbe4a8 100644
---
a/gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
@@ -21,6 +21,7 @@ import
org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
// A wrapper for HashPartitioning to remain original hash expressions.
// Only used by CH backend when shuffle hash expressions contains non-field
expression.
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
class HashPartitioningWrapper(
original: Seq[Expression],
newExpr: Seq[Expression],
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
index 8c09ae7c7..1107c6a2e 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
@@ -39,6 +39,7 @@ import java.{util => ju}
import scala.collection.JavaConverters._
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
class ClickHouseTableV2(
override val spark: SparkSession,
override val path: Path,
@@ -268,6 +269,7 @@ class ClickHouseTableV2(
}
}
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
class TempClickHouseTableV2(
override val spark: SparkSession,
override val catalogTable: Option[CatalogTable] = None)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
index bdb3a30e9..0680663eb 100644
---
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
@@ -28,6 +28,7 @@ import java.util.{List => JList}
import scala.collection.JavaConverters._
import scala.collection.mutable.ArrayBuffer
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
class AddMergeTreeParts(
val database: String,
val table: String,
diff --git a/pom.xml b/pom.xml
index dbf46ac17..0f37bcbf1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -615,13 +615,27 @@
<artifactId>scala-maven-plugin</artifactId>
<version>${scala.compiler.version}</version>
<configuration>
+ <compilerPlugins>
+ <compilerPlugin>
+ <groupId>org.wartremover</groupId>
+ <artifactId>wartremover_${scala.binary.version}</artifactId>
+ <version>3.1.6</version>
+ </compilerPlugin>
+ </compilerPlugins>
+ <dependencies>
+ <dependency>
+ <groupId>io.github.zhztheplayer.scalawarts</groupId>
+ <artifactId>scalawarts</artifactId>
+ <version>0.1.1</version>
+ </dependency>
+ </dependencies>
<recompileMode>${scala.recompile.mode}</recompileMode>
<args>
- <arg>-Wconf:msg=While parsing annotations in:silent</arg>
+ <arg>-Wconf:msg=While parsing annotations in:silent,any:e</arg>
<arg>-Ywarn-unused:imports</arg>
- <arg>-Xfatal-warnings</arg>
<arg>-deprecation</arg>
<arg>-feature</arg>
+
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
</args>
</configuration>
<executions>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]