nsivabalan commented on code in PR #12888:
URL: https://github.com/apache/hudi/pull/12888#discussion_r1978590066
##########
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java:
##########
@@ -527,19 +529,39 @@ public static String getTableBasePath(InputSplit split,
JobConf jobConf) throws
return realtimeSplit.getBasePath();
} else {
Path inputPath = ((FileSplit) split).getPath();
- FileSystem fs = inputPath.getFileSystem(jobConf);
- HoodieStorage storage = new HoodieHadoopStorage(fs);
- Option<StoragePath> tablePath = TablePathUtils.getTablePath(storage,
convertToStoragePath(inputPath));
- return tablePath.get().toString();
+ return getTablePath(jobConf, inputPath);
}
}
+ private static String getTablePath(JobConf jobConf, Path inputPath) throws
IOException {
+ FileSystem fs = inputPath.getFileSystem(jobConf);
+ HoodieStorage storage = new HoodieHadoopStorage(fs);
+ Option<StoragePath> tablePath = TablePathUtils.getTablePath(storage,
convertToStoragePath(inputPath));
+ return tablePath.get().toString();
+ }
+
/**
* `schema.on.read` and skip merge not implemented
*/
- public static boolean shouldUseFilegroupReader(final JobConf jobConf, final
InputSplit split) {
- return
jobConf.getBoolean(HoodieReaderConfig.FILE_GROUP_READER_ENABLED.key(),
HoodieReaderConfig.FILE_GROUP_READER_ENABLED.defaultValue())
- &&
!jobConf.getBoolean(HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.key(),
HoodieCommonConfig.SCHEMA_EVOLUTION_ENABLE.defaultValue())
- && !(split instanceof BootstrapBaseFileSplit);
+ public static boolean shouldUseFilegroupReader(final JobConf jobConf, final
InputSplit split) throws IOException {
+ if (split instanceof FileSplit || split instanceof RealtimeSplit) {
+ HoodieTableMetaClient metaClient =
HoodieTableMetaClient.builder().setConf(getStorageConf(jobConf)).setBasePath(getTableBasePath(split,
jobConf)).build();
Review Comment:
can you file a follow up jira to get this reviewed by ethan. for now, we can
go ahead and land this patch.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]