This is an automated email from the ASF dual-hosted git repository.
mengtao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 73b0be3c96 [HUDI-4192] HoodieHFileReader scan top cells after bottom
cells throw NullPointerException (#5755)
73b0be3c96 is described below
commit 73b0be3c962112efe541ae04fe0ea6f298558f17
Author: marchpure <[email protected]>
AuthorDate: Mon Jun 6 12:07:26 2022 +0800
[HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw
NullPointerException (#5755)
SeekTo top cells avoid NullPointerException
---
.../io/storage/TestHoodieHFileReaderWriter.java | 32 ++++++++++++++++++++++
.../apache/hudi/io/storage/HoodieHFileReader.java | 6 ++++
2 files changed, 38 insertions(+)
diff --git
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index da6f717258..baede154c9 100644
---
a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++
b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -294,6 +294,38 @@ public class TestHoodieHFileReaderWriter extends
TestHoodieReaderWriterBase {
StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED), false)
.collect(Collectors.toList());
assertEquals(Collections.emptyList(), recordsByPrefix);
+
+ // filter for "key50" and "key1" : entries from key50 and 'key10 to key19'
should be matched.
+ List<GenericRecord> expectedKey50and1s = allRecords.stream().filter(entry
-> (entry.get("_row_key").toString()).contains("key1")
+ ||
(entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
+ iterator =
+ hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50",
"key1"), avroSchema);
+ recordsByPrefix =
+ StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED), false)
+ .collect(Collectors.toList());
+ assertEquals(expectedKey50and1s, recordsByPrefix);
+
+ // filter for "key50" and "key0" : entries from key50 and 'key00 to key09'
should be matched.
+ List<GenericRecord> expectedKey50and0s = allRecords.stream().filter(entry
-> (entry.get("_row_key").toString()).contains("key0")
+ ||
(entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
+ iterator =
+ hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50",
"key0"), avroSchema);
+ recordsByPrefix =
+ StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED), false)
+ .collect(Collectors.toList());
+ assertEquals(expectedKey50and0s, recordsByPrefix);
+
+ // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00
to key09' should be matched.
+ List<GenericRecord> expectedKey1sand0s = expectedKey1s;
+ expectedKey1sand0s.addAll(allRecords.stream()
+ .filter(entry -> (entry.get("_row_key").toString()).contains("key0"))
+ .collect(Collectors.toList()));
+ iterator =
+ hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1",
"key0"), avroSchema);
+ recordsByPrefix =
+ StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator,
Spliterator.ORDERED), false)
+ .collect(Collectors.toList());
+ assertEquals(expectedKey1sand0s, recordsByPrefix);
}
@ParameterizedTest
diff --git
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index 899c2475da..0bf31d2a25 100644
---
a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++
b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -258,6 +258,12 @@ public class HoodieHFileReader<R extends IndexedRecord>
implements HoodieFileRea
if (!scanner.next()) {
return Collections.emptyIterator();
}
+ } else if (val == -1) {
+ // If scanner is aleady on the top of hfile. avoid trigger seekTo again.
+ Option<Cell> headerCell =
Option.fromJavaOptional(scanner.getReader().getFirstKey());
+ if (headerCell.isPresent() &&
!headerCell.get().equals(scanner.getCell())) {
+ scanner.seekTo();
+ }
}
class KeyPrefixIterator implements Iterator<GenericRecord> {