xiarixiaoyao commented on a change in pull request #4910:
URL: https://github.com/apache/hudi/pull/4910#discussion_r833491559
##########
File path:
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/DefaultSource.scala
##########
@@ -107,8 +113,21 @@ class DefaultSource extends RelationProvider
case (COPY_ON_WRITE, QUERY_TYPE_SNAPSHOT_OPT_VAL, false) |
(COPY_ON_WRITE, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) |
(MERGE_ON_READ, QUERY_TYPE_READ_OPTIMIZED_OPT_VAL, false) =>
- new BaseFileOnlyRelation(sqlContext, metaClient, parameters,
userSchema, globPaths)
-
+ val internalSchema = new
TableSchemaResolver(metaClient).getTableInternalSchemaFromCommitMetadata
+ val sparkSchema = SchemaConverters.toSqlType(new
TableSchemaResolver(metaClient).getTableAvroSchema).dataType.asInstanceOf[StructType]
+ val newParameters = parameters ++
Map(SparkInternalSchemaConverter.HOODIE_QUERY_SCHEMA ->
SerDeHelper.toJson(internalSchema.orElse(null)),
+ SparkInternalSchemaConverter.HOODIE_TABLE_PATH ->
metaClient.getBasePath)
+ if (internalSchema.isPresent) {
+ // Use the HoodieFileIndex only if the 'path' is not globbed.
+ // Or else we use the original way to read hoodie table.
Review comment:
this is a historical issue,hoodieFileIndex only availabe when the ‘path’
is not globbed, we can find those code from hudi0.9.0
maybe we can fixed this in another pr
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]