linliu-code commented on code in PR #12384:
URL: https://github.com/apache/hudi/pull/12384#discussion_r1864747430
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/compact/HoodieCompactor.java:
##########
@@ -278,4 +368,123 @@ public CompactionExecutionHelper
getCompactionExecutionStrategy(HoodieCompaction
}
}
+ List<WriteStatus> compactWithFileGroupReader(HoodieReaderContext
readerContext,
+ String instantTime,
+ HoodieTableMetaClient
metaClient,
+ CompactionOperation operation,
+ List<HoodieLogFile> logFiles,
+ Schema readerSchema,
+ Option<InternalSchema>
internalSchemaOpt,
+ TypedProperties props,
+ HoodieWriteConfig config,
+ TaskContextSupplier
taskContextSupplier) throws IOException {
+ HoodieTimer timer = HoodieTimer.start();
+ timer.startTimer();
+ Option<HoodieBaseFile> baseFileOpt =
+ operation.getBaseFile(metaClient.getBasePath().toString(),
operation.getPartitionPath());
+ FileSlice fileSlice = new FileSlice(
+ operation.getFileGroupId(),
+ operation.getBaseInstantTime(),
+ baseFileOpt.isPresent() ? baseFileOpt.get() : null,
+ logFiles);
+
+ // 1. Generate the input for fg reader.
+ boolean usePosition =
config.getBooleanOrDefault(MERGE_USE_RECORD_POSITIONS);
+ HoodieFileGroupReader<T> fileGroupReader = new HoodieFileGroupReader<>(
+ readerContext,
+ metaClient.getStorage(),
+ metaClient.getBasePath().toString(),
+ instantTime,
+ fileSlice,
+ readerSchema,
+ readerSchema,
+ internalSchemaOpt,
+ metaClient,
+ props,
+ 0,
+ Long.MAX_VALUE,
+ usePosition);
+
+ // 2. Get the `HoodieFileGroupReaderIterator` from the fg reader.
+ fileGroupReader.initRecordIterators();
+ HoodieFileGroupReader.HoodieFileGroupReaderIterator<T> recordIterator
+ = fileGroupReader.getClosableIterator();
+
+ // 3. Write the record using parquet writer.
+ String writeToken = FSUtils.makeWriteToken(
Review Comment:
CC: @yihua
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]