[
https://issues.apache.org/jira/browse/HUDI-2005?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17408885#comment-17408885
]
sivabalan narayanan edited comment on HUDI-2005 at 9/2/21, 2:39 PM:
--------------------------------------------------------------------
{code:java}
grep -irl ".getFileStatus" hudi-*/* | grep -v Test | grep .java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/AbstractMarkerBasedRollbackStrategy.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/heartbeat/HoodieHeartbeatClient.java
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/CompactionAdminClient.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/io/FlinkMergeHandle.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/rollback/FlinkMarkerBasedRollbackStrategy.java
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
hudi-client/hudi-java-client/target/classes/org/apache/hudi/table/action/rollback/JavaListingBasedRollbackHelper.class
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/rollback/JavaListingBasedRollbackHelper.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/rollback/SparkMarkerBasedRollbackStrategy.java
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackHelper.java
hudi-common/src/main/java/org/apache/hudi/common/util/TablePathUtils.java
hudi-common/src/main/java/org/apache/hudi/common/table/log/HoodieLogFileReader.java
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/BaseFileDTO.java
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/dto/LogFileDTO.java
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieLogFile.java
hudi-common/src/main/java/org/apache/hudi/common/model/BaseFile.java
hudi-common/src/main/java/org/apache/hudi/common/fs/HoodieWrapperFileSystem.java
hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InLineFileSystem.java
hudi-common/src/main/java/org/apache/hudi/common/fs/inline/InMemoryFileSystem.java
hudi-common/src/main/java/org/apache/hudi/common/fs/FailSafeConsistencyGuard.java
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java
hudi-common/src/main/java/org/apache/hudi/exception/TableNotFoundException.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataMetrics.java
hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieMetadataPayload.java
hudi-flink/src/main/java/org/apache/hudi/sink/bootstrap/BootstrapOperator.java
hudi-flink/src/main/java/org/apache/hudi/sink/partitioner/profile/WriteProfiles.java
hudi-flink/src/main/java/org/apache/hudi/table/HoodieTableSource.java
hudi-flink/src/main/java/org/apache/hudi/table/format/FilePathUtils.java
hudi-flink/src/main/java/org/apache/hudi/table/format/cow/CopyOnWriteInputFormat.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/utils/HoodieInputFormatUtils.java
hudi-hadoop-mr/src/main/java/org/apache/hudi/hadoop/hive/HoodieCombineHiveInputFormat.java
hudi-integ-test/src/main/java/org/apache/hudi/integ/testsuite/reader/DFSDeltaInputReader.java
hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
{code}
1. ListingBasedRollbackHelper.
{code:java}
// collect all log files that is supposed to be deleted with this rollback
Map<FileStatus, Long> writtenLogFileSizeMap =
FSUtils.getAllLogFiles(metaClient.getFs(),
FSUtils.getPartitionPath(config.getBasePath(),
rollbackRequest.getPartitionPath()),
fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), latestBaseInstant)
.collect(Collectors.toMap(HoodieLogFile::getFileStatus, value ->
value.getFileStatus().getLen()));{code}
{code:java}
// This step is intentionally done after writer is closed. Guarantees that
// getFileStatus would reflect correct stats and FileNotFoundException is not
thrown in
// cloud-storage : HUDI-168
Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(
metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()),
1L
);
{code}
2. SparkMarkerBasedRollbackStrategy
{code:java}
protected Map<FileStatus, Long> getWrittenLogFileSizeMap(String
partitionPathStr, String baseCommitTime, String fileId) throws IOException {
// collect all log files that is supposed to be deleted with this rollback
return FSUtils.getAllLogFiles(table.getMetaClient().getFs(),
FSUtils.getPartitionPath(config.getBasePath(), partitionPathStr), fileId,
HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime)
.collect(Collectors.toMap(HoodieLogFile::getFileStatus, value ->
value.getFileStatus().getLen()));
}
{code}
3. HoodieLogFileReader fetches log file position using
{code:java}
if (this.reverseReader) {
this.reverseLogFilePosition = this.lastReverseLogFilePosition =
fs.getFileStatus(logFile.getPath()).getLen();
}
{code}
As of now, HoodieLogFileReader only has FileSystem to access. Not sure if we
can leak Metadata to this layer.
4. HoodieWrapperFileSystem
{code:java}
@Override
public FileStatus getFileStatus(Path f) throws IOException {
return executeFuncWithTimeMetrics(MetricName.getFileStatus.name(), f, () -> {
try {
consistencyGuard.waitTillFileAppears(convertToDefaultPath(f));
} catch (TimeoutException e) {
// pass
}
return fileSystem.getFileStatus(convertToDefaultPath(f));
});
}{code}
5. FSUtils. All write handles use this to fetch size. should be fine.
{code:java}
public static long getFileSize(FileSystem fs, Path path) throws IOException {
return fs.getFileStatus(path).getLen();
}
{code}
was (Author: shivnarayan):
1. ListingBasedRollbackHelper.
{code:java}
// collect all log files that is supposed to be deleted with this rollback
Map<FileStatus, Long> writtenLogFileSizeMap =
FSUtils.getAllLogFiles(metaClient.getFs(),
FSUtils.getPartitionPath(config.getBasePath(),
rollbackRequest.getPartitionPath()),
fileId, HoodieFileFormat.HOODIE_LOG.getFileExtension(), latestBaseInstant)
.collect(Collectors.toMap(HoodieLogFile::getFileStatus, value ->
value.getFileStatus().getLen()));{code}
{code:java}
// This step is intentionally done after writer is closed. Guarantees that
// getFileStatus would reflect correct stats and FileNotFoundException is not
thrown in
// cloud-storage : HUDI-168
Map<FileStatus, Long> filesToNumBlocksRollback = Collections.singletonMap(
metaClient.getFs().getFileStatus(Objects.requireNonNull(writer).getLogFile().getPath()),
1L
);
{code}
2. SparkMarkerBasedRollbackStrategy
{code:java}
protected Map<FileStatus, Long> getWrittenLogFileSizeMap(String
partitionPathStr, String baseCommitTime, String fileId) throws IOException {
// collect all log files that is supposed to be deleted with this rollback
return FSUtils.getAllLogFiles(table.getMetaClient().getFs(),
FSUtils.getPartitionPath(config.getBasePath(), partitionPathStr), fileId,
HoodieFileFormat.HOODIE_LOG.getFileExtension(), baseCommitTime)
.collect(Collectors.toMap(HoodieLogFile::getFileStatus, value ->
value.getFileStatus().getLen()));
}
{code}
3. HoodieLogFileReader fetches log file position using
{code:java}
if (this.reverseReader) {
this.reverseLogFilePosition = this.lastReverseLogFilePosition =
fs.getFileStatus(logFile.getPath()).getLen();
}
{code}
As of now, HoodieLogFileReader only has FileSystem to access. Not sure if we
can leak Metadata to this layer.
> Audit and remove references of fs.listStatus() and fs.getFileStatus() or
> fs.exists()
> ------------------------------------------------------------------------------------
>
> Key: HUDI-2005
> URL: https://issues.apache.org/jira/browse/HUDI-2005
> Project: Apache Hudi
> Issue Type: Sub-task
> Reporter: Nishith Agarwal
> Assignee: sivabalan narayanan
> Priority: Major
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)