alexeykudinkin commented on code in PR #6550:
URL: https://github.com/apache/hudi/pull/6550#discussion_r967470284


##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala:
##########
@@ -79,10 +79,6 @@ object HoodieAnalysis {
       val spark3Analysis: RuleBuilder =
         session => ReflectionUtils.loadClass(spark3AnalysisClass, 
session).asInstanceOf[Rule[LogicalPlan]]
 
-      val spark3ResolveReferencesClass = 
"org.apache.spark.sql.hudi.analysis.HoodieSpark3ResolveReferences"

Review Comment:
   This class is actually deleted



##########
hudi-spark-datasource/hudi-spark3.1.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
##########
@@ -16,4 +16,4 @@
 # limitations under the License.
 
 
-org.apache.hudi.Spark3xDefaultSource
\ No newline at end of file
+org.apache.hudi.Spark31DefaultSource

Review Comment:
   We're using 2 digits nomenclature to designate Spark version in class names 
(`Spark31`, `Spark32`, etc)



##########
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroDeserializer.scala:
##########
@@ -18,12 +18,19 @@
 package org.apache.spark.sql.avro
 
 import org.apache.avro.Schema
+import org.apache.spark.sql.catalyst.NoopFilters
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.types.DataType
 
 class HoodieSpark3_1AvroDeserializer(rootAvroType: Schema, rootCatalystType: 
DataType)
   extends HoodieAvroDeserializer {
 
-  private val avroDeserializer = new AvroDeserializer(rootAvroType, 
rootCatalystType)
+  private val avroDeserializer = {
+    val avroRebaseModeInRead = 
LegacyBehaviorPolicy.withName(SQLConf.get.getConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ))

Review Comment:
   This is intentional. This was missed in the original fix (author was likely 
using just Spark 3.2)



##########
pom.xml:
##########
@@ -377,9 +377,17 @@
                     <exclude>org.sl4fj:slf4j-jcl</exclude>
                     <exclude>log4j:log4j</exclude>
                     <exclude>ch.qos.logback:logback-classic</exclude>
+                    <!-- NOTE: We're banning any HBase deps versions other 
than the approved ${hbase.version},
+                               which is aimed at preventing the classpath 
collisions w/ transitive deps usually) -->
+                    <exclude>org.apache.hbase:hbase-common:*</exclude>

Review Comment:
   These should resolve issues for all Spark versions



##########
hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala:
##########
@@ -346,20 +346,25 @@ class HoodieCatalog extends DelegatingCatalogExtension
 }
 
 object HoodieCatalog {
-  def convertTransforms(partitions: Seq[Transform]): (Seq[String], 
Option[BucketSpec]) = {
+  def convertTransforms(transforms: Seq[Transform]): (Seq[String], 
Option[BucketSpec]) = {
     val identityCols = new mutable.ArrayBuffer[String]
     var bucketSpec = Option.empty[BucketSpec]
 
-    partitions.map {
+    transforms.foreach {
       case IdentityTransform(FieldReference(Seq(col))) =>
         identityCols += col
 
+      case MatchBucketTransform(numBuckets, col, sortCol) =>

Review Comment:
   This is fix for `BucketTransform` changed b/w Spark 3.2 and 3.3



##########
packaging/hudi-spark-bundle/pom.xml:
##########
@@ -140,14 +140,14 @@
                 <!-- NOTE: We have to relocate all classes w/in 
org.apache.spark.sql.avro to avoid
                            potential classpath collisions in case users would 
like to also use "spark-avro" w/in
                            their runtime, since Hudi carries some of the same 
classes as "spark-avro" -->
-                <relocation>
-                  <pattern>javax.servlet.</pattern>
-                  <shadedPattern>org.apache.hudi.javax.servlet.</shadedPattern>
-                </relocation>
                 <relocation>
                   <pattern>org.apache.spark.sql.avro.</pattern>
                   
<shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
                 </relocation>
+                <relocation>
+                  <pattern>javax.servlet.</pattern>
+                  <shadedPattern>org.apache.hudi.javax.servlet.</shadedPattern>
+                </relocation>

Review Comment:
   These lines were inserted b/w the ones above and the comment attached to 
them, messing it up



##########
hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala:
##########
@@ -29,7 +30,7 @@ import org.apache.spark.sql.types.StructType
 
 import java.net.URI
 import java.util
-import scala.collection.JavaConverters.{mapAsScalaMapConverter, 
setAsJavaSetConverter}
+import scala.jdk.CollectionConverters.{mapAsScalaMapConverter, 
setAsJavaSetConverter}

Review Comment:
   `JavaConverters` are not available in Scala 2.11



##########
pom.xml:
##########
@@ -377,9 +377,17 @@
                     <exclude>org.sl4fj:slf4j-jcl</exclude>
                     <exclude>log4j:log4j</exclude>
                     <exclude>ch.qos.logback:logback-classic</exclude>
+                    <!-- NOTE: We're banning any HBase deps versions other 
than the approved ${hbase.version},
+                               which is aimed at preventing the classpath 
collisions w/ transitive deps usually) -->
+                    <exclude>org.apache.hbase:hbase-common:*</exclude>
+                    <exclude>org.apache.hbase:hbase-client:*</exclude>
+                    <exclude>org.apache.hbase:hbase-server:*</exclude>
                   </excludes>
                   <includes>
                     <include>org.slf4j:slf4j-simple:*:*:test</include>
+                    
<exclude>org.apache.hbase:hbase-common:${hbase.version}</exclude>
+                    
<exclude>org.apache.hbase:hbase-client:${hbase.version}</exclude>
+                    
<exclude>org.apache.hbase:hbase-server:${hbase.version}</exclude>
                   </includes>

Review Comment:
   Good catch! I'm not actually sure how Maven is working w/ these though (i 
checked enforcer and it worked) 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to