nsivabalan commented on code in PR #13179:
URL: https://github.com/apache/hudi/pull/13179#discussion_r2051846958
##########
hudi-io/src/test/java/org/apache/hudi/io/hfile/TestHFileReader.java:
##########
@@ -130,14 +139,122 @@ static Stream<Arguments>
testArgsReadHFilePointAndPrefixLookup() {
new KeyLookUpInfo("hudi-key-000019999b", SEEK_TO_EOF, "", "")
)
),
+ // This HFile has fake first keys, i.e., a key that does not exist in
the data block,
+ // stored in the entries in the Meta Index Block
+ Arguments.of(
+ "/hfile/hudi_1_0_hbase_2_4_13_16KB_GZ_20000_fake_first_key.hfile",
+ 20000,
+ KEY_CREATOR_WITH_SUFFIX,
+ Arrays.asList(
+ // before first key
+ new KeyLookUpInfo("", SEEK_TO_BEFORE_FILE_FIRST_KEY,
+ "hudi-key-000000000-abcdefghij", "hudi-value-000000000"),
+ new KeyLookUpInfo("as", SEEK_TO_BEFORE_FILE_FIRST_KEY,
+ "hudi-key-000000000-abcdefghij", "hudi-value-000000000"),
+ // backward seekTo before first key is allowed and safe
+ new KeyLookUpInfo("aa", SEEK_TO_BEFORE_FILE_FIRST_KEY,
+ "hudi-key-000000000-abcdefghij", "hudi-value-000000000"),
+ new KeyLookUpInfo("hudi-key-0000000",
SEEK_TO_BEFORE_FILE_FIRST_KEY,
+ "hudi-key-000000000-abcdefghij", "hudi-value-000000000"),
+ // first key
+ new KeyLookUpInfo("hudi-key-000000000-abcdefghij",
SEEK_TO_FOUND,
+ "hudi-key-000000000-abcdefghij", "hudi-value-000000000"),
+ // key in the block 0
+ new KeyLookUpInfo("hudi-key-000000099-abcdefghij",
SEEK_TO_FOUND,
+ "hudi-key-000000099-abcdefghij", "hudi-value-000000099"),
+ // backward seek not supported in a block
+ new KeyLookUpInfo("hudi-key-000000098",
SEEK_TO_THROW_EXCEPTION, "", ""),
+ // prefix lookup, the pointer should not move
+ new KeyLookUpInfo("hudi-key-00000010", SEEK_TO_IN_RANGE,
+ "hudi-key-000000099-abcdefghij", "hudi-value-000000099"),
+ // non-exact lookup, the pointer should move
+ new KeyLookUpInfo("hudi-key-000000100a", SEEK_TO_IN_RANGE,
+ "hudi-key-000000100-abcdefghij", "hudi-value-000000100"),
+ new KeyLookUpInfo("hudi-key-000000100b", SEEK_TO_IN_RANGE,
+ "hudi-key-000000100-abcdefghij", "hudi-value-000000100"),
+ // prefix lookup with a jump, the pointer should not go beyond
the lookup key
+ new KeyLookUpInfo("hudi-key-00000030", SEEK_TO_IN_RANGE,
+ "hudi-key-000000299-abcdefghij", "hudi-value-000000299"),
+ new KeyLookUpInfo("hudi-key-000000300", SEEK_TO_IN_RANGE,
+ "hudi-key-000000299-abcdefghij", "hudi-value-000000299"),
+ new KeyLookUpInfo("hudi-key-000000300a", SEEK_TO_IN_RANGE,
+ "hudi-key-000000300-abcdefghij", "hudi-value-000000300"),
+ // last key of the block 1
+ new KeyLookUpInfo("hudi-key-000000469-abcdefghij",
SEEK_TO_FOUND,
+ "hudi-key-000000469-abcdefghij", "hudi-value-000000469"),
+ new KeyLookUpInfo("hudi-key-000000469a", SEEK_TO_IN_RANGE,
+ "hudi-key-000000469-abcdefghij", "hudi-value-000000469"),
+ new KeyLookUpInfo("hudi-key-000000469b", SEEK_TO_IN_RANGE,
+ "hudi-key-000000469-abcdefghij", "hudi-value-000000469"),
+ // Block 2:
+ // fake first key in block index: hudi-key-00000047
+ // actual first key: hudi-key-000000470-abcdefghij
+ // Lookup key is smaller than actual first key
+ new KeyLookUpInfo("hudi-key-00000047",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000000470-abcdefghij", "hudi-value-000000470"),
+ new KeyLookUpInfo("hudi-key-000000470",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000000470-abcdefghij", "hudi-value-000000470"),
+ // Lookup key is greater than actual first key
+ new KeyLookUpInfo("hudi-key-000000473", SEEK_TO_IN_RANGE,
+ "hudi-key-000000472-abcdefghij", "hudi-value-000000472"),
+ // Block 29:
+ // fake first key in block index: hudi-key-000006815
+ // actual first key: hudi-key-000006815-abcdefghij
+ new KeyLookUpInfo("hudi-key-000006815-aaa",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ new KeyLookUpInfo("hudi-key-000006815-aaa",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ // Backward seekTo within the range [fake first key, actual
first key) is OK
+ new KeyLookUpInfo("hudi-key-000006815-aa",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ new KeyLookUpInfo("hudi-key-000006815",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ // Backward seekTo before the fake first key is not supported
+ new KeyLookUpInfo("hudi-key-000006814",
SEEK_TO_THROW_EXCEPTION, "", ""),
+ new KeyLookUpInfo("hudi-key-000006815-ab",
SEEK_TO_BEFORE_BLOCK_FIRST_KEY,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ new KeyLookUpInfo("hudi-key-000006815-ac", SEEK_TO_IN_RANGE,
+ "hudi-key-000006815-abcdefghij", "hudi-value-000006815"),
+ new KeyLookUpInfo("hudi-key-000006816", SEEK_TO_IN_RANGE,
Review Comment:
can we also do one exact match lookup.
i.e. hudi-key-000006815-abcdefghij
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]