(incubator-gluten) branch main updated: [CORE] Add a compilation-time check to forbid case-class inheritance (#5729)

hongze Mon, 13 May 2024 22:05:59 -0700

This is an automated email from the ASF dual-hosted git repository.

hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new ecf61ee2f [CORE] Add a compilation-time check to forbid case-class 
inheritance (#5729)
ecf61ee2f is described below

commit ecf61ee2f4bb8fefb4a48635c293cb5d7fe18fa1
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue May 14 13:05:50 2024 +0800

    [CORE] Add a compilation-time check to forbid case-class inheritance (#5729)
---
 .../org/apache/spark/sql/delta/DeltaLog.scala      | 54 +++++++++++++++-------
 .../org/apache/spark/sql/delta/DeltaLog.scala      | 52 ++++++++++++++-------
 .../spark/shuffle/HashPartitioningWrapper.scala    |  1 +
 .../sql/delta/catalog/ClickHouseTableV2.scala      |  2 +
 .../v2/clickhouse/metadata/AddFileTags.scala       |  1 +
 pom.xml                                            | 18 +++++++-
 6 files changed, 93 insertions(+), 35 deletions(-)

diff --git 
a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
 
b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
index 00820a006..0f6455997 100644
--- 
a/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
+++ 
b/backends-clickhouse/src/main/delta-20/org/apache/spark/sql/delta/DeltaLog.scala
@@ -81,9 +81,10 @@ class DeltaLog private (
   with SnapshotManagement
   with DeltaFileFormat
   with ReadChecksum {
-
   import org.apache.spark.sql.delta.util.FileNames._
 
+  import DeltaLog._
+
   implicit private lazy val _clock = clock
 
   protected def spark = SparkSession.active
@@ -442,8 +443,8 @@ class DeltaLog private (
 
     val fileIndex =
       TahoeLogFileIndex(spark, this, dataPath, snapshotToUse, 
partitionFilters, isTimeTravelQuery)
-    var bucketSpec: Option[BucketSpec] = 
ClickHouseTableV2.getTable(this).bucketOption
-    new HadoopFsRelation(
+    val bucketSpec: Option[BucketSpec] = 
ClickHouseTableV2.getTable(this).bucketOption
+    new DeltaHadoopFsRelation(
       fileIndex,
       partitionSchema =
         
DeltaColumnMapping.dropColumnMappingMetadata(snapshotToUse.metadata.partitionSchema),
@@ -460,20 +461,9 @@ class DeltaLog private (
       // conflict with `DeltaLog.options`.
       snapshotToUse.metadata.format.options ++ options
     )(
-      spark
-    ) with InsertableRelation {
-      def insert(data: DataFrame, overwrite: Boolean): Unit = {
-        val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
-        WriteIntoDelta(
-          deltaLog = DeltaLog.this,
-          mode = mode,
-          new DeltaOptions(Map.empty[String, String], spark.sessionState.conf),
-          partitionColumns = Seq.empty,
-          configuration = Map.empty,
-          data = data
-        ).run(spark)
-      }
-    }
+      spark,
+      this
+    )
   }
 
   override def fileFormat(metadata: Metadata = metadata): FileFormat =
@@ -482,6 +472,36 @@ class DeltaLog private (
 }
 
 object DeltaLog extends DeltaLogging {
+  
@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+  private class DeltaHadoopFsRelation(
+      location: FileIndex,
+      partitionSchema: StructType,
+      // The top-level columns in `dataSchema` should match the actual 
physical file schema, otherwise
+      // the ORC data source may not work with the by-ordinal mode.
+      dataSchema: StructType,
+      bucketSpec: Option[BucketSpec],
+      fileFormat: FileFormat,
+      options: Map[String, String])(sparkSession: SparkSession, deltaLog: 
DeltaLog)
+    extends HadoopFsRelation(
+      location,
+      partitionSchema,
+      dataSchema,
+      bucketSpec,
+      fileFormat,
+      options)(sparkSession)
+    with InsertableRelation {
+    def insert(data: DataFrame, overwrite: Boolean): Unit = {
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+      WriteIntoDelta(
+        deltaLog = deltaLog,
+        mode = mode,
+        new DeltaOptions(Map.empty[String, String], 
sparkSession.sessionState.conf),
+        partitionColumns = Seq.empty,
+        configuration = Map.empty,
+        data = data
+      ).run(sparkSession)
+    }
+  }
 
   /**
    * The key type of `DeltaLog` cache. It's a pair of the canonicalized table 
path and the file
diff --git 
a/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
 
b/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
index bbf0bdc91..4cab6454d 100644
--- 
a/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
+++ 
b/backends-clickhouse/src/main/delta-22/org/apache/spark/sql/delta/DeltaLog.scala
@@ -85,9 +85,10 @@ class DeltaLog private (
   with SnapshotManagement
   with DeltaFileFormat
   with ReadChecksum {
-
   import org.apache.spark.sql.delta.util.FileNames._
 
+  import DeltaLog._
+
   implicit private lazy val _clock = clock
 
   protected def spark = SparkSession.active
@@ -483,7 +484,7 @@ class DeltaLog private (
     val fileIndex =
       TahoeLogFileIndex(spark, this, dataPath, snapshotToUse, 
partitionFilters, isTimeTravelQuery)
     var bucketSpec: Option[BucketSpec] = 
ClickHouseTableV2.getTable(this).bucketOption
-    new HadoopFsRelation(
+    new DeltaHadoopFsRelation(
       fileIndex,
       partitionSchema =
         
DeltaColumnMapping.dropColumnMappingMetadata(snapshotToUse.metadata.partitionSchema),
@@ -500,20 +501,9 @@ class DeltaLog private (
       // conflict with `DeltaLog.options`.
       snapshotToUse.metadata.format.options ++ options
     )(
-      spark
-    ) with InsertableRelation {
-      def insert(data: DataFrame, overwrite: Boolean): Unit = {
-        val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
-        WriteIntoDelta(
-          deltaLog = DeltaLog.this,
-          mode = mode,
-          new DeltaOptions(Map.empty[String, String], spark.sessionState.conf),
-          partitionColumns = Seq.empty,
-          configuration = Map.empty,
-          data = data
-        ).run(spark)
-      }
-    }
+      spark,
+      this
+    )
   }
 
   /**
@@ -566,6 +556,36 @@ class DeltaLog private (
 }
 
 object DeltaLog extends DeltaLogging {
+  
@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
+  private class DeltaHadoopFsRelation(
+      location: FileIndex,
+      partitionSchema: StructType,
+      // The top-level columns in `dataSchema` should match the actual 
physical file schema, otherwise
+      // the ORC data source may not work with the by-ordinal mode.
+      dataSchema: StructType,
+      bucketSpec: Option[BucketSpec],
+      fileFormat: FileFormat,
+      options: Map[String, String])(sparkSession: SparkSession, deltaLog: 
DeltaLog)
+    extends HadoopFsRelation(
+      location,
+      partitionSchema,
+      dataSchema,
+      bucketSpec,
+      fileFormat,
+      options)(sparkSession)
+    with InsertableRelation {
+    def insert(data: DataFrame, overwrite: Boolean): Unit = {
+      val mode = if (overwrite) SaveMode.Overwrite else SaveMode.Append
+      WriteIntoDelta(
+        deltaLog = deltaLog,
+        mode = mode,
+        new DeltaOptions(Map.empty[String, String], 
sparkSession.sessionState.conf),
+        partitionColumns = Seq.empty,
+        configuration = Map.empty,
+        data = data
+      ).run(sparkSession)
+    }
+  }
 
   /**
    * The key type of `DeltaLog` cache. It's a pair of the canonicalized table 
path and the file
diff --git 
a/gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
similarity index 94%
rename from 
gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
rename to 
backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
index 06ce8fe0f..bf1cbe4a8 100644
--- 
a/gluten-core/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/shuffle/HashPartitioningWrapper.scala
@@ -21,6 +21,7 @@ import 
org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 
 // A wrapper for HashPartitioning to remain original hash expressions.
 // Only used by CH backend when shuffle hash expressions contains non-field 
expression.
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
 class HashPartitioningWrapper(
     original: Seq[Expression],
     newExpr: Seq[Expression],
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
index 8c09ae7c7..1107c6a2e 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/catalog/ClickHouseTableV2.scala
@@ -39,6 +39,7 @@ import java.{util => ju}
 
 import scala.collection.JavaConverters._
 
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
 class ClickHouseTableV2(
     override val spark: SparkSession,
     override val path: Path,
@@ -268,6 +269,7 @@ class ClickHouseTableV2(
   }
 }
 
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
 class TempClickHouseTableV2(
     override val spark: SparkSession,
     override val catalogTable: Option[CatalogTable] = None)
diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
index bdb3a30e9..0680663eb 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/datasources/v2/clickhouse/metadata/AddFileTags.scala
@@ -28,6 +28,7 @@ import java.util.{List => JList}
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
+@SuppressWarnings(Array("io.github.zhztheplayer.scalawarts.InheritFromCaseClass"))
 class AddMergeTreeParts(
     val database: String,
     val table: String,
diff --git a/pom.xml b/pom.xml
index dbf46ac17..0f37bcbf1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -615,13 +615,27 @@
           <artifactId>scala-maven-plugin</artifactId>
           <version>${scala.compiler.version}</version>
           <configuration>
+            <compilerPlugins>
+              <compilerPlugin>
+                <groupId>org.wartremover</groupId>
+                <artifactId>wartremover_${scala.binary.version}</artifactId>
+                <version>3.1.6</version>
+              </compilerPlugin>
+            </compilerPlugins>
+            <dependencies>
+              <dependency>
+                <groupId>io.github.zhztheplayer.scalawarts</groupId>
+                <artifactId>scalawarts</artifactId>
+                <version>0.1.1</version>
+              </dependency>
+            </dependencies>
             <recompileMode>${scala.recompile.mode}</recompileMode>
             <args>
-              <arg>-Wconf:msg=While parsing annotations in:silent</arg>
+              <arg>-Wconf:msg=While parsing annotations in:silent,any:e</arg>
               <arg>-Ywarn-unused:imports</arg>
-              <arg>-Xfatal-warnings</arg>
               <arg>-deprecation</arg>
               <arg>-feature</arg>
+              
<arg>-P:wartremover:traverser:io.github.zhztheplayer.scalawarts.InheritFromCaseClass</arg>
             </args>
           </configuration>
           <executions>


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [CORE] Add a compilation-time check to forbid case-class inheritance (#5729)

Reply via email to