nsivabalan commented on code in PR #12105:
URL: https://github.com/apache/hudi/pull/12105#discussion_r1802117548
##########
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java:
##########
@@ -1242,27 +1243,27 @@ private static Stream<HoodieRecord>
getColumnStatsRecords(String partitionPath,
}
List<HoodieColumnRangeMetadata<Comparable>> columnRangeMetadata =
- readColumnRangeMetadataFrom(filePartitionPath, datasetMetaClient,
columnsToIndex, false, Option.empty());
+ readColumnRangeMetadataFrom(filePartitionPath, datasetMetaClient,
columnsToIndex, fetchStatsForLogFiles, writerSchemaOpt, maxBufferSize);
return HoodieMetadataPayload.createColumnStatsRecords(partitionPath,
columnRangeMetadata, false);
}
private static List<HoodieColumnRangeMetadata<Comparable>>
readColumnRangeMetadataFrom(String filePath,
HoodieTableMetaClient datasetMetaClient,
List<String> columnsToIndex,
-
boolean shouldReadColumnStatsForLogFiles,
-
Option<Schema> writerSchemaOpt) {
+
boolean fetchStatsForLogFiles,
+
Option<Schema> writerSchemaOpt,
+
int maxBufferSize) {
try {
StoragePath fullFilePath = new
StoragePath(datasetMetaClient.getBasePath(), filePath);
if (filePath.endsWith(HoodieFileFormat.PARQUET.getFileExtension())) {
return HoodieIOFactory.getIOFactory(datasetMetaClient.getStorage())
.getFileFormatUtils(HoodieFileFormat.PARQUET)
.readColumnStatsFromMetadata(datasetMetaClient.getStorage(),
fullFilePath, columnsToIndex);
- } else if (FSUtils.isLogFile(fullFilePath) &&
shouldReadColumnStatsForLogFiles) {
- LOG.warn("Reading log file: {}, to build column range metadata.",
fullFilePath);
- return getLogFileColumnRangeMetadata(fullFilePath.toString(),
datasetMetaClient, columnsToIndex, writerSchemaOpt);
+ } else if
(FSUtils.isLogFile(filePath.substring(filePath.lastIndexOf("/") + 1)) &&
fetchStatsForLogFiles) {
+ LOG.warn("Reading log file: {}, to build column range metadata.",
filePath);
+ return getLogFileColumnRangeMetadata(new
StoragePath(datasetMetaClient.getBasePath(), filePath).toString(),
datasetMetaClient, columnsToIndex, writerSchemaOpt, maxBufferSize);
Review Comment:
We could do better w/ variable naming.
for eg, the input arg to this method is file with partition path value
(filePath variable).
but for unmerged log record reader we need full path (absolute path).
I have made some variable naming fixes in my latest commit. you can check it
out.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]