This is an automated email from the ASF dual-hosted git repository.

danny0405 pushed a commit to branch release-0.10.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit a49e5ac7160a5937ebfe2f72deb36dc0ab119f2e
Author: Manoj Govindassamy <[email protected]>
AuthorDate: Fri Dec 3 11:18:10 2021 -0800

    [HUDI-2894][HUDI-2905] Metadata table - avoiding key lookup failures on 
base files over S3 (#4185)
    
    - Fetching partition files or all partitions from the metadata table is 
failing
       when run over S3. Metadata table uses HFile format for the base files 
and the
       record lookup uses HFile.Reader and HFileScanner interfaces to get 
records by
       partition keys. When the backing storage is S3, this record lookup from 
HFiles
       is failing with IOException, in turn failing the caller commit/update 
operations.
    
     - Metadata table looks up HFile records with positional read enabled so as 
to
       perform better for random lookups. But this positional read key lookup is
       returning with partial read sizes over S3 leading to HFile scanner 
throwing
       IOException. This doesn't happen over HDFS. Metadata table though uses 
the HFile
       for random key lookups, the positional read is not mandatory as we sort 
the keys
       when doing a lookup for multiple keys.
    
     - The fix is to disable HFile positional read for all HFile scanner based
       key lookups.
    
    (cherry picked from commit 383d5edc169b79c4022f81a1580ab1bc2afebc30)
---
 .../apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java   | 8 ++++----
 .../main/java/org/apache/hudi/io/storage/HoodieHFileReader.java   | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git 
a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
 
b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
index d4a77b0..3700d01 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/common/bootstrap/index/HFileBootstrapIndex.java
@@ -291,13 +291,13 @@ public class HFileBootstrapIndex extends BootstrapIndex {
 
     @Override
     public List<String> getIndexedPartitionPaths() {
-      HFileScanner scanner = partitionIndexReader().getScanner(true, true);
+      HFileScanner scanner = partitionIndexReader().getScanner(true, false);
       return getAllKeys(scanner, HFileBootstrapIndex::getPartitionFromKey);
     }
 
     @Override
     public List<HoodieFileGroupId> getIndexedFileGroupIds() {
-      HFileScanner scanner = fileIdIndexReader().getScanner(true, true);
+      HFileScanner scanner = fileIdIndexReader().getScanner(true, false);
       return getAllKeys(scanner, HFileBootstrapIndex::getFileGroupFromKey);
     }
 
@@ -319,7 +319,7 @@ public class HFileBootstrapIndex extends BootstrapIndex {
     @Override
     public List<BootstrapFileMapping> getSourceFileMappingForPartition(String 
partition) {
       try {
-        HFileScanner scanner = partitionIndexReader().getScanner(true, true);
+        HFileScanner scanner = partitionIndexReader().getScanner(true, false);
         KeyValue keyValue = new 
KeyValue(Bytes.toBytes(getPartitionKey(partition)), new byte[0], new byte[0],
             HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]);
         if (scanner.seekTo(keyValue) == 0) {
@@ -352,7 +352,7 @@ public class HFileBootstrapIndex extends BootstrapIndex {
       List<HoodieFileGroupId> fileGroupIds = new ArrayList<>(ids);
       Collections.sort(fileGroupIds);
       try {
-        HFileScanner scanner = fileIdIndexReader().getScanner(true, true);
+        HFileScanner scanner = fileIdIndexReader().getScanner(true, false);
         for (HoodieFileGroupId fileGroupId : fileGroupIds) {
           KeyValue keyValue = new 
KeyValue(Bytes.toBytes(getFileGroupKey(fileGroupId)), new byte[0], new byte[0],
               HConstants.LATEST_TIMESTAMP, KeyValue.Type.Put, new byte[0]);
diff --git 
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java 
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index 7b80d1a..e3e38ec 100644
--- 
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ 
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -246,7 +246,7 @@ public class HoodieHFileReader<R extends IndexedRecord> 
implements HoodieFileRea
 
     synchronized (this) {
       if (keyScanner == null) {
-        keyScanner = reader.getScanner(false, true);
+        keyScanner = reader.getScanner(false, false);
       }
 
       if (keyScanner.seekTo(kv) == 0) {

Reply via email to