vinothchandar commented on code in PR #5352:
URL: https://github.com/apache/hudi/pull/5352#discussion_r852467806
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala:
##########
@@ -206,4 +208,32 @@ class DefaultSource extends RelationProvider
parameters: Map[String, String]): Source = {
new HoodieStreamSource(sqlContext, metadataPath, schema, parameters)
}
+
+ private def resolveBaseFileOnlyRelation(sqlContext: SQLContext,
+ globPaths: Seq[Path],
+ userSchema: Option[StructType],
+ metaClient: HoodieTableMetaClient,
+ optParams: Map[String, String]) = {
+ val baseRelation = new BaseFileOnlyRelation(sqlContext, metaClient,
optParams, userSchema, globPaths)
+ val enableSchemaOnRead: Boolean =
!tryFetchInternalSchema(metaClient).isEmptySchema
+
+ // NOTE: We fallback to [[HadoopFsRelation]] in all of the cases except
ones requiring usage of
+ // [[BaseFileOnlyRelation]] to function correctly. This is necessary
to maintain performance parity w/
+ // vanilla Spark, since some of the Spark optimizations are
predicated on the using of [[HadoopFsRelation]].
+ //
+ // You can check out HUDI-3896 for more details
+ if (enableSchemaOnRead) {
+ baseRelation
+ } else {
+ baseRelation.toHadoopFsRelation
+ }
+ }
+
+ private def tryFetchInternalSchema(metaClient: HoodieTableMetaClient) =
Review Comment:
if we write with evolution on, and then turn off, then table may not
readable. So this does not apply here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]