Re: [PR] [HUDI-7918] Remove support of Spark 3.0, 3.1, and 3.2 [hudi]

via GitHub Wed, 14 Aug 2024 15:39:50 -0700


yihua commented on code in PR #11692:
URL: https://github.com/apache/hudi/pull/11692#discussion_r1717619959



##########
hudi-spark-datasource/hudi-spark3.5.x/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/Spark35LegacyHoodieParquetFileFormat.scala:
##########
@@ -186,35 +185,20 @@ class Spark35LegacyHoodieParquetFileFormat(private val 
shouldAppendPartitionValu
       // Try to push down filters when filter push-down is enabled.
       val pushed = if (enableParquetFilterPushDown) {
         val parquetSchema = footerFileMetaData.getSchema
-        val parquetFilters = if (HoodieSparkUtils.gteqSpark3_2_1) {
-          // NOTE: Below code could only be compiled against >= Spark 3.2.1,
-          //       and unfortunately won't compile against Spark 3.2.0
-          //       However this code is runtime-compatible w/ both Spark 3.2.0 
and >= Spark 3.2.1
-          val datetimeRebaseSpec =
-          
DataSourceUtils.datetimeRebaseSpec(footerFileMetaData.getKeyValueMetaData.get, 
datetimeRebaseModeInRead)
-          new ParquetFilters(
-            parquetSchema,
-            pushDownDate,
-            pushDownTimestamp,
-            pushDownDecimal,
-            pushDownStringStartWith,
-            pushDownInFilterThreshold,
-            isCaseSensitive,
-            datetimeRebaseSpec)
-        } else {
-          // Spark 3.2.0
-          val datetimeRebaseMode =
-            
Spark35DataSourceUtils.datetimeRebaseMode(footerFileMetaData.getKeyValueMetaData.get,
 datetimeRebaseModeInRead)
-          createParquetFilters(
-            parquetSchema,
-            pushDownDate,
-            pushDownTimestamp,
-            pushDownDecimal,
-            pushDownStringStartWith,
-            pushDownInFilterThreshold,
-            isCaseSensitive,
-            datetimeRebaseMode)
-        }
+        // NOTE: Below code could only be compiled against >= Spark 3.2.1,
+        //       and unfortunately won't compile against Spark 3.2.0
+        //       However this code is runtime-compatible w/ both Spark 3.2.0 
and >= Spark 3.2.1

Review Comment:
   This note can be removed.



##########
pom.xml:
##########
@@ -163,18 +163,14 @@
     
<flink.connector.kafka.artifactId>flink-connector-kafka</flink.connector.kafka.artifactId>
     
<flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_2.12</flink.hadoop.compatibility.artifactId>
     <rocksdbjni.version>7.5.3</rocksdbjni.version>
-    <spark30.version>3.0.2</spark30.version>
-    <spark31.version>3.1.3</spark31.version>
-    <spark32.version>3.2.3</spark32.version>
     <spark33.version>3.3.4</spark33.version>
     <spark34.version>3.4.3</spark34.version>
     <spark35.version>3.5.1</spark35.version>
-    <hudi.spark.module>hudi-spark3.2.x</hudi.spark.module>
+    <hudi.spark.module>hudi-spark3.5.x</hudi.spark.module>
     <!-- NOTE: Different Spark versions might require different number of 
shared
                modules being incorporated, hence we're creating multiple 
placeholders
                (hudi.spark.common.modules.*) -->
     
<hudi.spark.common.modules.1>hudi-spark3-common</hudi.spark.common.modules.1>

Review Comment:
   ```suggestion
       <hudi.spark.common.module>hudi-spark3-common</hudi.spark.common.module>
   ```



##########
pom.xml:
##########
@@ -163,18 +163,14 @@
     
<flink.connector.kafka.artifactId>flink-connector-kafka</flink.connector.kafka.artifactId>
     
<flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_2.12</flink.hadoop.compatibility.artifactId>
     <rocksdbjni.version>7.5.3</rocksdbjni.version>
-    <spark30.version>3.0.2</spark30.version>
-    <spark31.version>3.1.3</spark31.version>
-    <spark32.version>3.2.3</spark32.version>
     <spark33.version>3.3.4</spark33.version>
     <spark34.version>3.4.3</spark34.version>
     <spark35.version>3.5.1</spark35.version>
-    <hudi.spark.module>hudi-spark3.2.x</hudi.spark.module>
+    <hudi.spark.module>hudi-spark3.5.x</hudi.spark.module>
     <!-- NOTE: Different Spark versions might require different number of 
shared
                modules being incorporated, hence we're creating multiple 
placeholders
                (hudi.spark.common.modules.*) -->
     
<hudi.spark.common.modules.1>hudi-spark3-common</hudi.spark.common.modules.1>

Review Comment:
   The same for other Spark profiles



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] [HUDI-7918] Remove support of Spark 3.0, 3.1, and 3.2 [hudi]

Reply via email to