linliu-code commented on code in PR #10144:
URL: https://github.com/apache/hudi/pull/10144#discussion_r1411402217
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieFileGroupReaderBasedParquetFileFormat.scala:
##########
@@ -162,31 +167,35 @@ class
HoodieFileGroupReaderBasedParquetFileFormat(tableState: HoodieTableState,
// CDC queries.
case hoodiePartitionCDCFileGroupSliceMapping:
HoodiePartitionCDCFileGroupMapping =>
val filePath: Path =
sparkAdapter.getSparkPartitionedFileUtils.getPathFromPartitionedFile(file)
- val fileGroupId: HoodieFileGroupId = new
HoodieFileGroupId(filePath.getParent.toString, filePath.getName)
+ val nullPartitionPath =
filePath.getParent.toString.equals("null_partition_path")
+ val fileGroupId: HoodieFileGroupId = new HoodieFileGroupId(
+ if (nullPartitionPath) "" else filePath.getParent.toString,
filePath.getName)
val fileSplits =
hoodiePartitionCDCFileGroupSliceMapping.getFileSplitsFor(fileGroupId).get.toArray
val fileGroupSplit: HoodieCDCFileGroupSplit =
HoodieCDCFileGroupSplit(fileSplits)
- buildCDCRecordIterator(fileGroupSplit, preMergeBaseFileReader,
broadcastedHadoopConf.value.value, requiredSchema, props)
+ if (cdcFileReader.isEmpty) {
+ throw new HoodieException("CDC file reader cannot be null")
+ }
+ buildCDCRecordIterator(fileGroupSplit, cdcFileReader.get,
broadcastedHadoopConf.value.value, props)
// TODO: Use FileGroupReader here: HUDI-6942.
case _ => baseFileReader(file)
}
}
}
protected def buildCDCRecordIterator(cdcFileGroupSplit:
HoodieCDCFileGroupSplit,
- preMergeBaseFileReader: PartitionedFile
=> Iterator[InternalRow],
+ cdcFileReader: PartitionedFile =>
Iterator[InternalRow],
hadoopConf: Configuration,
- requiredSchema: StructType,
props: TypedProperties):
Iterator[InternalRow] = {
- val metaClient =
HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf,
tableState.tablePath, props)
+ props.setProperty("hoodie.table.name", tableName)
Review Comment:
Will import the key.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]