This is an automated email from the ASF dual-hosted git repository. danny0405 pushed a commit to branch release-0.10.0 in repository https://gitbox.apache.org/repos/asf/hudi.git
commit a49e5ac7160a5937ebfe2f72deb36dc0ab119f2e Author: Manoj Govindassamy <[email protected]> AuthorDate: Fri Dec 3 11:18:10 2021 -0800 [HUDI-2894][HUDI-2905] Metadata table - avoiding key lookup failures on base files over S3 (#4185) - Fetching partition files or all partitions from the metadata table is failing when run over S3. Metadata table uses HFile format for the base files and the record lookup uses HFile.Reader and HFileScanner interfaces to get records by partition keys. When the backing storage is S3, this record lookup from HFiles is failing with IOException, in turn failing the caller commit/update operations. - Metadata table looks up HFile records with positional read enabled so as to perform better for random lookups. But this positional read key lookup is returning with partial read sizes over S3 leading to HFile scanner throwing IOException. This doesn't happen over HDFS. Metadata table though uses the HFile for random key lookups, the positional read is not mandatory as we sort the keys when doing a lookup for multiple keys. - The fix is to disable HFile positional read for all HFile scanner based key lookups. (cherry picked from commit 383d5edc169b79c4022f81a1580ab1bc2afebc30) --- .../apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java | 8 ++++---- .../main/java/org/apache/hudi/io/storage/HoodieHFileReader.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java index d4a77b0..3700d01 100644 --- a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java +++ b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java @@ -291,13 +291,13 @@ public class HFileBootstrapIndex extends BootstrapIndex { @Override public List<String> getIndexedPartitionPaths() { - HFileScanner scanner = partitionIndexReader().getScanner(true, true); + HFileScanner scanner = partitionIndexReader().getScanner(true, false); return getAllKeys(scanner, HFileBootstrapIndex::getPartitionFromKey); } @Override public List<HoodieFileGroupId> getIndexedFileGroupIds() { - HFileScanner scanner = fileIdIndexReader().getScanner(true, true); + HFileScanner scanner = fileIdIndexReader().getScanner(true, false); return getAllKeys(scanner, HFileBootstrapIndex::getFileGroupFromKey); } @@ -319,7 +319,7 @@ public class HFileBootstrapIndex extends BootstrapIndex { @Override public List<BootstrapFileMapping> getSourceFileMappingForPartition(String partition) { try { - HFileScanner scanner = partitionIndexReader().getScanner(true, true); + HFileScanner scanner = partitionIndexReader().getScanner(true, false); KeyValue keyValue = new KeyValue(Bytes.toBytes(getPartitionKey(partition)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); if (scanner.seekTo(keyValue) == 0) { @@ -352,7 +352,7 @@ public class HFileBootstrapIndex extends BootstrapIndex { List<HoodieFileGroupId> fileGroupIds = new ArrayList<>(ids); Collections.sort(fileGroupIds); try { - HFileScanner scanner = fileIdIndexReader().getScanner(true, true); + HFileScanner scanner = fileIdIndexReader().getScanner(true, false); for (HoodieFileGroupId fileGroupId : fileGroupIds) { KeyValue keyValue = new KeyValue(Bytes.toBytes(getFileGroupKey(fileGroupId)), new byte[0], new byte[0], HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]); diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java index 7b80d1a..e3e38ec 100644 --- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java +++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java @@ -246,7 +246,7 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea synchronized (this) { if (keyScanner == null) { - keyScanner = reader.getScanner(false, true); + keyScanner = reader.getScanner(false, false); } if (keyScanner.seekTo(kv) == 0) {
