alexeykudinkin commented on code in PR #5428:
URL: https://github.com/apache/hudi/pull/5428#discussion_r924056799
##########
hudi-client/hudi-spark-client/src/main/scala/org/apache/hudi/HoodieSparkUtils.scala:
##########
@@ -44,25 +44,24 @@ import org.apache.hudi.avro.HoodieAvroUtils
import scala.collection.JavaConverters._
-object HoodieSparkUtils extends SparkAdapterSupport {
-
- def isSpark2: Boolean = SPARK_VERSION.startsWith("2.")
-
- def isSpark3: Boolean = SPARK_VERSION.startsWith("3.")
-
- def isSpark3_0: Boolean = SPARK_VERSION.startsWith("3.0")
-
- def isSpark3_1: Boolean = SPARK_VERSION.startsWith("3.1")
-
- def gteqSpark3_1: Boolean = SPARK_VERSION > "3.1"
-
- def gteqSpark3_1_3: Boolean = SPARK_VERSION >= "3.1.3"
-
- def isSpark3_2: Boolean = SPARK_VERSION.startsWith("3.2")
+private[hudi] trait SparkVersionsSupport {
+ def getSparkVersion: String
+
+ def isSpark2: Boolean = getSparkVersion.startsWith("2.")
+ def isSpark3: Boolean = getSparkVersion.startsWith("3.")
+ def isSpark3_0: Boolean = getSparkVersion.startsWith("3.0")
+ def isSpark3_1: Boolean = getSparkVersion.startsWith("3.1")
+ def isSpark3_2: Boolean = getSparkVersion.startsWith("3.2")
+
+ def gteqSpark3_1: Boolean = getSparkVersion >= "3.1"
Review Comment:
Yeah, it's just to resolve cognitive dissonance that it was GT, but called
GTEQ
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala:
##########
@@ -187,23 +189,46 @@ private[sql] object SchemaConverters {
.values(toAvroType(vt, valueContainsNull, recordName, nameSpace))
case st: StructType =>
val childNameSpace = if (nameSpace != "") s"$nameSpace.$recordName"
else recordName
- val fieldsAssembler =
builder.record(recordName).namespace(nameSpace).fields()
- st.foreach { f =>
- val fieldAvroType =
- toAvroType(f.dataType, f.nullable, f.name, childNameSpace)
- fieldsAssembler.name(f.name).`type`(fieldAvroType).noDefault()
+ if (canBeUnion(st)) {
Review Comment:
This is our own. Not sure i understood your other q regarding compatibility
issues? Can you elaborate?
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -109,6 +110,11 @@ class DefaultSource extends RelationProvider
Option(schema)
}
+ // NOTE: We have to handle explicitly case of the Metadata Table (MT)
since by default all of Hudi
+ // relations will try to apply schema pruning techniques (like
nested schema pruning) which couldn't
+ // be applied to MT
+ val canPruneRelationSchema = !isMetadataTable(tablePath)
Review Comment:
Correct. There's no point in applying it here
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]