alexeykudinkin commented on code in PR #6550:
URL: https://github.com/apache/hudi/pull/6550#discussion_r967470284
##########
hudi-spark-datasource/hudi-spark/src/main/scala/org/apache/spark/sql/hudi/analysis/HoodieAnalysis.scala:
##########
@@ -79,10 +79,6 @@ object HoodieAnalysis {
val spark3Analysis: RuleBuilder =
session => ReflectionUtils.loadClass(spark3AnalysisClass,
session).asInstanceOf[Rule[LogicalPlan]]
- val spark3ResolveReferencesClass =
"org.apache.spark.sql.hudi.analysis.HoodieSpark3ResolveReferences"
Review Comment:
This class is actually deleted
##########
hudi-spark-datasource/hudi-spark3.1.x/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister:
##########
@@ -16,4 +16,4 @@
# limitations under the License.
-org.apache.hudi.Spark3xDefaultSource
\ No newline at end of file
+org.apache.hudi.Spark31DefaultSource
Review Comment:
We're using 2 digits nomenclature to designate Spark version in class names
(`Spark31`, `Spark32`, etc)
##########
hudi-spark-datasource/hudi-spark3.1.x/src/main/scala/org/apache/spark/sql/avro/HoodieSpark3_1AvroDeserializer.scala:
##########
@@ -18,12 +18,19 @@
package org.apache.spark.sql.avro
import org.apache.avro.Schema
+import org.apache.spark.sql.catalyst.NoopFilters
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
import org.apache.spark.sql.types.DataType
class HoodieSpark3_1AvroDeserializer(rootAvroType: Schema, rootCatalystType:
DataType)
extends HoodieAvroDeserializer {
- private val avroDeserializer = new AvroDeserializer(rootAvroType,
rootCatalystType)
+ private val avroDeserializer = {
+ val avroRebaseModeInRead =
LegacyBehaviorPolicy.withName(SQLConf.get.getConf(SQLConf.LEGACY_AVRO_REBASE_MODE_IN_READ))
Review Comment:
This is intentional. This was missed in the original fix (author was likely
using just Spark 3.2)
##########
pom.xml:
##########
@@ -377,9 +377,17 @@
<exclude>org.sl4fj:slf4j-jcl</exclude>
<exclude>log4j:log4j</exclude>
<exclude>ch.qos.logback:logback-classic</exclude>
+ <!-- NOTE: We're banning any HBase deps versions other
than the approved ${hbase.version},
+ which is aimed at preventing the classpath
collisions w/ transitive deps usually) -->
+ <exclude>org.apache.hbase:hbase-common:*</exclude>
Review Comment:
These should resolve issues for all Spark versions
##########
hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala:
##########
@@ -346,20 +346,25 @@ class HoodieCatalog extends DelegatingCatalogExtension
}
object HoodieCatalog {
- def convertTransforms(partitions: Seq[Transform]): (Seq[String],
Option[BucketSpec]) = {
+ def convertTransforms(transforms: Seq[Transform]): (Seq[String],
Option[BucketSpec]) = {
val identityCols = new mutable.ArrayBuffer[String]
var bucketSpec = Option.empty[BucketSpec]
- partitions.map {
+ transforms.foreach {
case IdentityTransform(FieldReference(Seq(col))) =>
identityCols += col
+ case MatchBucketTransform(numBuckets, col, sortCol) =>
Review Comment:
This is fix for `BucketTransform` changed b/w Spark 3.2 and 3.3
##########
packaging/hudi-spark-bundle/pom.xml:
##########
@@ -140,14 +140,14 @@
<!-- NOTE: We have to relocate all classes w/in
org.apache.spark.sql.avro to avoid
potential classpath collisions in case users would
like to also use "spark-avro" w/in
their runtime, since Hudi carries some of the same
classes as "spark-avro" -->
- <relocation>
- <pattern>javax.servlet.</pattern>
- <shadedPattern>org.apache.hudi.javax.servlet.</shadedPattern>
- </relocation>
<relocation>
<pattern>org.apache.spark.sql.avro.</pattern>
<shadedPattern>org.apache.hudi.org.apache.spark.sql.avro.</shadedPattern>
</relocation>
+ <relocation>
+ <pattern>javax.servlet.</pattern>
+ <shadedPattern>org.apache.hudi.javax.servlet.</shadedPattern>
+ </relocation>
Review Comment:
These lines were inserted b/w the ones above and the comment attached to
them, messing it up
##########
hudi-spark-datasource/hudi-spark3.2plus-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieStagedTable.scala:
##########
@@ -29,7 +30,7 @@ import org.apache.spark.sql.types.StructType
import java.net.URI
import java.util
-import scala.collection.JavaConverters.{mapAsScalaMapConverter,
setAsJavaSetConverter}
+import scala.jdk.CollectionConverters.{mapAsScalaMapConverter,
setAsJavaSetConverter}
Review Comment:
`JavaConverters` are not available in Scala 2.11
##########
pom.xml:
##########
@@ -377,9 +377,17 @@
<exclude>org.sl4fj:slf4j-jcl</exclude>
<exclude>log4j:log4j</exclude>
<exclude>ch.qos.logback:logback-classic</exclude>
+ <!-- NOTE: We're banning any HBase deps versions other
than the approved ${hbase.version},
+ which is aimed at preventing the classpath
collisions w/ transitive deps usually) -->
+ <exclude>org.apache.hbase:hbase-common:*</exclude>
+ <exclude>org.apache.hbase:hbase-client:*</exclude>
+ <exclude>org.apache.hbase:hbase-server:*</exclude>
</excludes>
<includes>
<include>org.slf4j:slf4j-simple:*:*:test</include>
+
<exclude>org.apache.hbase:hbase-common:${hbase.version}</exclude>
+
<exclude>org.apache.hbase:hbase-client:${hbase.version}</exclude>
+
<exclude>org.apache.hbase:hbase-server:${hbase.version}</exclude>
</includes>
Review Comment:
Good catch! I'm not actually sure how Maven is working w/ these though (i
checked enforcer and it worked)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]