HuangZhenQiu commented on code in PR #18074:
URL: https://github.com/apache/hudi/pull/18074#discussion_r2767338113
##########
hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/source/HoodieSource.java:
##########
@@ -130,37 +141,76 @@ private SplitEnumerator<HoodieSourceSplit,
HoodieSplitEnumeratorState> createEnu
splitProvider = new DefaultHoodieSplitProvider(new
HoodieSourceSplitComparator());
} else {
LOG.info(
- "Hoodie source restored {} splits from state for table {}",
- enumeratorState.getPendingSplitStates().size(),
- metaClient.getTableConfig().getTableName());
+ "Hoodie source restored {} splits from state for table {}",
+ enumeratorState.getPendingSplitStates().size(),
+ metaClient.getTableConfig().getTableName());
List<HoodieSourceSplit> pendingSplits =
-
enumeratorState.getPendingSplitStates().stream().map(HoodieSourceSplitState::split).collect(Collectors.toList());
+
enumeratorState.getPendingSplitStates().stream().map(HoodieSourceSplitState::split).collect(Collectors.toList());
splitProvider = new DefaultHoodieSplitProvider();
splitProvider.onDiscoveredSplits(pendingSplits);
}
if (scanContext.isStreaming()) {
HoodieContinuousSplitDiscover discover = new DefaultHoodieSplitDiscover(
- scanContext, metaClient);
+ scanContext, metaClient);
return new HoodieContinuousSplitEnumerator(enumContext, splitProvider,
discover, scanContext, enumeratorState == null ? Option.empty() :
Option.of(enumeratorState));
} else {
if (enumeratorState == null) {
+ List<HoodieSourceSplit> splits = createBatchHoodieSplits();
+ splitProvider.onDiscoveredSplits(splits);
+ }
+ return new HoodieStaticSplitEnumerator(enumContext, splitProvider);
+ }
+ }
+
+ @VisibleForTesting
+ List<HoodieSourceSplit> createBatchHoodieSplits() {
+ final Configuration flinkConf = this.scanContext.getConf();
+ final String queryType = flinkConf.get(FlinkOptions.QUERY_TYPE);
+ switch (queryType) {
+ case FlinkOptions.QUERY_TYPE_SNAPSHOT:
+ final HoodieTableType tableType =
HoodieTableType.valueOf(flinkConf.get(FlinkOptions.TABLE_TYPE));
+ switch (tableType) {
+ case MERGE_ON_READ:
+ List<HoodieSourceSplit> splits =
SplitUtils.buildHoodieSplits(metaClient, flinkConf, createFileIndex());
+ if (splits.isEmpty()) {
+ // When there is no input splits, just return an empty source.
+ LOG.info("No input splits generate for MERGE_ON_READ input
format. Returning empty collection");
+ }
+ return splits;
+ case COPY_ON_WRITE:
+ return SplitUtils.baseFileOnlyHoodieSourceSplit(metaClient,
storagePath, createFileIndex(), flinkConf.get(FlinkOptions.MERGE_TYPE));
+ default:
+ throw new HoodieException("Unexpected table type: " +
flinkConf.get(FlinkOptions.TABLE_TYPE));
+ }
+ case FlinkOptions.QUERY_TYPE_READ_OPTIMIZED:
+ return SplitUtils.baseFileOnlyHoodieSourceSplit(metaClient,
storagePath, createFileIndex(), flinkConf.get(FlinkOptions.MERGE_TYPE));
+ case FlinkOptions.QUERY_TYPE_INCREMENTAL: {
// Only do scan planning if nothing is restored from checkpoint state
IncrementalInputSplits incrementalInputSplits =
IncrementalInputSplits.builder()
- .conf(scanContext.getConf())
- .path(scanContext.getPath())
- .rowType(scanContext.getRowType())
-
.maxCompactionMemoryInBytes(scanContext.getMaxCompactionMemoryInBytes())
- .skipCompaction(scanContext.skipCompaction())
- .skipClustering(scanContext.skipClustering())
- .skipInsertOverwrite(scanContext.skipInsertOverwrite()).build();
-
- HoodieContinuousSplitBatch batch =
incrementalInputSplits.inputHoodieSourceSplits(metaClient, null,
scanContext.cdcEnabled());
- splitProvider.onDiscoveredSplits(batch.getSplits());
+ .conf(scanContext.getConf())
+ .path(scanContext.getPath())
+ .rowType(scanContext.getRowType())
+
.maxCompactionMemoryInBytes(scanContext.getMaxCompactionMemoryInBytes())
+ .skipCompaction(scanContext.skipCompaction())
+ .skipClustering(scanContext.skipClustering())
+ .partitionPruner(scanContext.partitionPruner())
+
.skipInsertOverwrite(scanContext.skipInsertOverwrite()).build();
+ return new
ArrayList<>(incrementalInputSplits.inputHoodieSourceSplits(metaClient, null,
scanContext.cdcEnabled()).getSplits());
Review Comment:
Updated.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]