Copilot commented on code in PR #8424: URL: https://github.com/apache/hbase/pull/8424#discussion_r3486274002
########## hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterWoUnsafe.java: ########## @@ -0,0 +1,346 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.filter; + +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.mockito.Mockito.mockStatic; + +import java.lang.reflect.Field; +import java.util.Arrays; +import java.util.Collections; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValueUtil; +import org.apache.hadoop.hbase.testclassification.FilterTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.apache.hadoop.hbase.unsafe.HBasePlatformDependent; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.Pair; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; + +import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesPair; + +/** + * Exercises {@link FuzzyRowFilter} on the "no-unsafe" code path + * ({@code HBasePlatformDependent.unaligned() == false}) end-to-end, i.e. through the constructor, + * {@link FuzzyRowFilter#filterCell} and {@link FuzzyRowFilter#getNextCellHint}. On this path the + * mask uses the {0 (fixed), 1 (non-fixed)} encoding, which differs from the unsafe {-1, 2} encoding + * that the rest of the scan machinery assumes. + */ +@Tag(FilterTests.TAG) +@Tag(SmallTests.TAG) +public class TestFuzzyRowFilterWoUnsafe { + + @BeforeAll + public static void disableUnsafe() throws Exception { + // Force the no-unsafe path: make HBasePlatformDependent.unaligned() return false and trigger + // FuzzyRowFilter's static initializer while the mock is active, so its private static final + // UNSAFE_UNALIGNED is captured as false for this JVM fork. + try (MockedStatic<HBasePlatformDependent> mocked = mockStatic(HBasePlatformDependent.class)) { + mocked.when(HBasePlatformDependent::isUnsafeAvailable).thenReturn(false); + mocked.when(HBasePlatformDependent::unaligned).thenReturn(false); + Field field = FuzzyRowFilter.class.getDeclaredField("UNSAFE_UNALIGNED"); + field.setAccessible(true); + assertFalse(field.getBoolean(null), "expected FuzzyRowFilter to use the no-unsafe path"); + } + } + + /** + * A row that matches the fuzzy rule only thanks to a wildcard position must be INCLUDEd. On the + * broken no-unsafe path the mask was shifted to all-zeroes (all positions treated as fixed), so + * the wildcard row was wrongly rejected. + */ + @Test + public void testForwardMatchesWildcardRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + + KeyValue match = KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }); + assertEquals(Filter.ReturnCode.INCLUDE, filter.filterCell(match)); + } + + /** + * For a non-matching row the next-cell hint must be the smallest row that can satisfy the rule. + * With fixed positions 5 and 5 and a wildcard in the middle, the smallest row at or after {3,0,0} + * is {5,0,5}. On the broken no-unsafe path the wildcard key byte was never cleared and the mask + * was corrupted, producing a wrong hint. + */ + @Test + public void testForwardHintSkipsToSmallestMatchingRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 5, 100, 5 }, new byte[] { 0, 1, 0 }))); + + KeyValue current = KeyValueUtil.createFirstOnRow(new byte[] { 3, 0, 0 }); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(current)); + + Cell hint = filter.getNextCellHint(current); + assertRow(new byte[] { 5, 0, 5 }, hint); + } + + /** + * No-unsafe analogue of {@code TestFuzzyRowFilter#testReverseFilterCellSkipsSameRowHint}. This is + * the most subtle composition point this change touches: the reverse next-cell hint is computed + * from the no-unsafe mask (updateWith -> preprocessMaskForHinting({0,1}->{-1,0}) -> + * getNextForFuzzyRule(reverse)) and must still play with the HBASE-30226 same-row short-circuit. + * A non-matching row seeks back to "abb"; revisiting "abb" must be skipped with NEXT_ROW instead + * of recreating the same hint. + */ + @Test + public void testReverseHintSkipsSameRow() { + FuzzyRowFilter filter = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(Bytes.toBytes("aaa"), new byte[] { 0, 1, 0 }))); + filter.setReversed(true); + + KeyValue abc = KeyValueUtil.createFirstOnRow(Bytes.toBytes("abc")); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(abc)); + assertRow(Bytes.toBytes("abb"), filter.getNextCellHint(abc)); + + KeyValue abb = KeyValueUtil.createFirstOnRow(Bytes.toBytes("abb")); + assertEquals(Filter.ReturnCode.NEXT_ROW, filter.filterCell(abb)); + } + + /** + * With multiple fuzzy keys the RowTracker priority queue holds one (separately converted) hint + * mask per key and must return the smallest matching row across all of them. Here key1 {5,*,5} + * hints {5,0,5} and key2 {4,9,9} hints {4,9,9}; the smaller {4,9,9} must win. + */ + @Test + public void testForwardHintWithMultipleKeysReturnsSmallest() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 5, 100, 5 }, new byte[] { 0, 1, 0 }), + new Pair<>(new byte[] { 4, 9, 9 }, new byte[] { 0, 0, 0 }))); + + KeyValue current = KeyValueUtil.createFirstOnRow(new byte[] { 3, 0, 0 }); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, filter.filterCell(current)); + assertRow(new byte[] { 4, 9, 9 }, filter.getNextCellHint(current)); + } + + /** + * A FuzzyRowFilter serialized by an unsafe peer puts the mask on the wire in its preprocessed {-1 + * (fixed), 2 (non-fixed)} form. A no-unsafe server must still interpret it correctly: this is + * exactly the {key, mask} pair {@link FuzzyRowFilter#parseFrom} hands to the constructor for such + * a filter. Before the fix the no-unsafe constructor left {-1, 2} untouched, so + * {@link FuzzyRowFilter#satisfiesNoUnsafe} (which keys off {0, 1}) treated every position as a + * wildcard and stopped enforcing the fixed bytes, wrongly INCLUDEing non-matching rows. + */ + @Test + public void testUnsafeEncodedMaskFromPeerEnforcesFixedPositions() { + // {-1, 2, -1} == fixed, non-fixed, fixed -> equivalent no-unsafe mask {0, 1, 0}. + FuzzyRowFilter match = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 0, 3 }, new byte[] { -1, 2, -1 }))); + assertEquals(Filter.ReturnCode.INCLUDE, + match.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + + // A row that differs at a FIXED position (pos 2: 9 != 3) must not be INCLUDEd. + FuzzyRowFilter reject = new FuzzyRowFilter( + Collections.singletonList(new Pair<>(new byte[] { 1, 0, 3 }, new byte[] { -1, 2, -1 }))); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, + reject.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * Faithful wire-level companion to {@link #testUnsafeEncodedMaskFromPeerEnforcesFixedPositions}: + * build the exact protobuf bytes an unsafe peer emits (mask in the preprocessed {-1, 2} form, + * with the {@code is_mask_v2} flag set, as HBASE-26537 requires) and run them through + * {@link FuzzyRowFilter#parseFrom}. A no-unsafe server must normalize the mask back to {0, 1} on + * deserialization and still enforce the fixed positions. + */ + @Test + public void testParseFromUnsafeEncodedFilterEnforcesFixedPositions() throws Exception { + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true) + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + + assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + // A row that differs at a FIXED position (pos 2: 9 != 3) must not be INCLUDEd. + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * branch-2 keeps the legacy HBASE-15676 v1 wire encoding for backwards compatibility: a peer that + * predates the {@code is_mask_v2} flag puts the mask on the wire in its v1 internal form + * {-1 (fixed), 0 (non-fixed)} with no flag, so {@link FuzzyRowFilter#parseFrom} reads it as v1 + * (processed wildcard = 0). A no-unsafe server must still normalize it back to {0, 1} and enforce + * the fixed positions, exactly like the v2 case above. + */ + @Test + public void testParseFromLegacyV1EncodedFilterEnforcesFixedPositions() throws Exception { + // No is_mask_v2 flag -> v1; mask {-1, 0, -1} == fixed, non-fixed, fixed. + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + + assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + // A row that differs at a FIXED position (pos 2: 9 != 3) must not be INCLUDEd. + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * Legacy public form is the other half of the {@code 0} ambiguity: a pre-HBASE-26537 no-unsafe + * peer serialized the mask in its public {0 (fixed), 1 (non-fixed)} form with no is_mask_v2 flag, + * so an all-fixed rule went on the wire as all-zeros {0, 0, 0}. Since that carries no -1 fixed + * marker it must be read as public all-fixed (NOT as a legacy unsafe all-wildcard mask, which is + * also all-zeros), so a row differing at a fixed position is rejected rather than wrongly + * INCLUDEd. This pins the chosen tradeoff for the inherently ambiguous all-zeros wire. + */ + @Test + public void testParseFromLegacyNoUnsafeAllFixedMaskEnforcesFixedPositions() throws Exception { + // No is_mask_v2 flag, public all-fixed mask {0, 0, 0} from a legacy no-unsafe peer. + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 2, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { 0, 0, 0 }))) + .build().toByteArray(); + + // Exact-match row is INCLUDEd ... + assertEquals(Filter.ReturnCode.INCLUDE, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 2, 3 }))); + // ... but a row differing at a fixed position must NOT be (all-zeros is all-fixed, not + // all-wildcard). + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, FuzzyRowFilter.parseFrom(wire) + .filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * A v2 unsafe peer encodes an all-wildcard rule as the internal all-non-fixed mask {2, 2, 2} (with + * is_mask_v2 set). It carries no -1 fixed marker but is still internal form, since the public {0, + * 1} form never uses 2. A no-unsafe server must normalize it back to the public all-wildcard {1, + * 1, 1}, so getFuzzyKeys / toByteArray / equals never leak the internal {2} encoding (scan + * matching is unaffected either way, but the wire/equality contract is public-form). Review Comment: This Javadoc line exceeds the repo's Checkstyle 100-character LineLength limit and will fail the build. Please wrap it onto multiple lines. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java: ########## @@ -420,6 +429,128 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException { } } + /** + * Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored + * mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0, + * 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is + * reparsed as an all-fixed mask). Processing a cell first must not change this. + */ + @Test + public void testSerializationAfterFilterCellPreservesBehavior() throws Exception { + FuzzyRowFilter original = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + // Process a cell first to prove serialization is unaffected by scanning. + original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + // A row matching only via the wildcard position must still be INCLUDED after the round-trip. + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * Two filters built from the same rule (distinct array instances) must be equal and hash equally, + * i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent + * with each other and with the serialized ({0, 1}) form. This must also hold after one of them + * has processed a cell. + */ + @Test + public void testEqualsConsistentAfterFilterCell() { + FuzzyRowFilter fresh = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter scanned = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + assertEquals(fresh, scanned); + assertEquals(fresh.hashCode(), scanned.hashCode()); + } + + /** + * Unsafe-path coverage for branch-2's is_mask_v2 wire plumbing (only exercised on the no-unsafe + * path otherwise): the flag must round-trip (v1-parsed stays v1, v2/client stays v2) and + * {@code toByteArray} must always emit the public {0, 1} mask, never the internal {-1, 0/2}. + */ + @Test + public void testIsMaskV2WireRoundTrip() throws Exception { + // v1 in (no flag) -> stays v1 on the wire, mask normalized to public {0, 1, 0}. + byte[] v1Wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v1Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v1Wire).toByteArray()); + assertFalse(v1Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v1Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // v2 in -> stays v2, mask normalized to public {0, 1, 0}. + byte[] v2Wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true) + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v2Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v2Wire).toByteArray()); + assertTrue(v2Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v2Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // A client-built filter is always v2 on the wire. + FuzzyRowFilter client = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertTrue(FilterProtos.FuzzyRowFilter.parseFrom(client.toByteArray()).getIsMaskV2()); + } + + /** + * getFuzzyKeys must expose the public {0, 1} form, never the internal stored encoding (on the + * unsafe path the stored mask is {-1, 0}). This is the form REST ScannerModel feeds back into the + * constructor, so a leak would corrupt a round-tripped REST filter. + */ + @Test + public void testGetFuzzyKeysReturnsPublicForm() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertArrayEquals(new byte[] { 0, 1, 0 }, filter.getFuzzyKeys().get(0).getSecond()); + } + + /** + * Unsafe-path companion to + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyV1EncodedFilterEnforcesFixedPositions: a legacy + * pre-HBASE-26537 v1 wire (no is_mask_v2 flag, internal {-1, 0} form) must still enforce its fixed + * positions after parseFrom. Review Comment: This Javadoc line exceeds the repo's Checkstyle 100-character LineLength limit and will fail the build. Please wrap it onto multiple lines. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java: ########## @@ -420,6 +429,128 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException { } } + /** + * Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored + * mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0, + * 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is + * reparsed as an all-fixed mask). Processing a cell first must not change this. + */ + @Test + public void testSerializationAfterFilterCellPreservesBehavior() throws Exception { + FuzzyRowFilter original = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + // Process a cell first to prove serialization is unaffected by scanning. + original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + // A row matching only via the wildcard position must still be INCLUDED after the round-trip. + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * Two filters built from the same rule (distinct array instances) must be equal and hash equally, + * i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent + * with each other and with the serialized ({0, 1}) form. This must also hold after one of them + * has processed a cell. + */ + @Test + public void testEqualsConsistentAfterFilterCell() { + FuzzyRowFilter fresh = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter scanned = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + assertEquals(fresh, scanned); + assertEquals(fresh.hashCode(), scanned.hashCode()); + } + + /** + * Unsafe-path coverage for branch-2's is_mask_v2 wire plumbing (only exercised on the no-unsafe + * path otherwise): the flag must round-trip (v1-parsed stays v1, v2/client stays v2) and + * {@code toByteArray} must always emit the public {0, 1} mask, never the internal {-1, 0/2}. + */ + @Test + public void testIsMaskV2WireRoundTrip() throws Exception { + // v1 in (no flag) -> stays v1 on the wire, mask normalized to public {0, 1, 0}. + byte[] v1Wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v1Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v1Wire).toByteArray()); + assertFalse(v1Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v1Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // v2 in -> stays v2, mask normalized to public {0, 1, 0}. + byte[] v2Wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true) + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v2Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v2Wire).toByteArray()); + assertTrue(v2Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v2Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // A client-built filter is always v2 on the wire. + FuzzyRowFilter client = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertTrue(FilterProtos.FuzzyRowFilter.parseFrom(client.toByteArray()).getIsMaskV2()); + } + + /** + * getFuzzyKeys must expose the public {0, 1} form, never the internal stored encoding (on the + * unsafe path the stored mask is {-1, 0}). This is the form REST ScannerModel feeds back into the + * constructor, so a leak would corrupt a round-tripped REST filter. + */ + @Test + public void testGetFuzzyKeysReturnsPublicForm() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertArrayEquals(new byte[] { 0, 1, 0 }, filter.getFuzzyKeys().get(0).getSecond()); + } + + /** + * Unsafe-path companion to + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyV1EncodedFilterEnforcesFixedPositions: a legacy + * pre-HBASE-26537 v1 wire (no is_mask_v2 flag, internal {-1, 0} form) must still enforce its fixed + * positions after parseFrom. + */ + @Test + public void testParseFromLegacyV1EncodedFilterEnforcesFixedPositions() throws Exception { + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + assertEquals(Filter.ReturnCode.INCLUDE, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * Inherent twin of + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyNoUnsafeAllFixedMaskEnforcesFixedPositions: a + * no-flag all-zeros wire {0, 0, 0} is the legacy unsafe v1 all-wildcard encoding (HBASE-15676), so + * on the unsafe path it matches every row -- the deliberate opposite of the no-unsafe all-fixed + * reading. Pins that cross-version asymmetry on the unsafe side; guarded to the unsafe path. + */ + @Test + public void testParseFromLegacyV1AllZerosMaskIsAllWildcardOnUnsafe() throws Exception { + assumeTrue(HBasePlatformDependent.unaligned()); + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 2, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { 0, 0, 0 }))) + .build().toByteArray(); + assertEquals(Filter.ReturnCode.INCLUDE, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 9, 9, 9 }))); Review Comment: This assertion exceeds the repo's Checkstyle 100-character LineLength limit and will fail the build. Wrap the chained calls across lines. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java: ########## @@ -420,6 +429,128 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException { } } + /** + * Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored + * mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0, + * 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is + * reparsed as an all-fixed mask). Processing a cell first must not change this. + */ + @Test + public void testSerializationAfterFilterCellPreservesBehavior() throws Exception { + FuzzyRowFilter original = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + // Process a cell first to prove serialization is unaffected by scanning. + original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + // A row matching only via the wildcard position must still be INCLUDED after the round-trip. + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * Two filters built from the same rule (distinct array instances) must be equal and hash equally, + * i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent + * with each other and with the serialized ({0, 1}) form. This must also hold after one of them + * has processed a cell. + */ + @Test + public void testEqualsConsistentAfterFilterCell() { + FuzzyRowFilter fresh = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter scanned = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + assertEquals(fresh, scanned); + assertEquals(fresh.hashCode(), scanned.hashCode()); + } + + /** + * Unsafe-path coverage for branch-2's is_mask_v2 wire plumbing (only exercised on the no-unsafe + * path otherwise): the flag must round-trip (v1-parsed stays v1, v2/client stays v2) and + * {@code toByteArray} must always emit the public {0, 1} mask, never the internal {-1, 0/2}. + */ + @Test + public void testIsMaskV2WireRoundTrip() throws Exception { + // v1 in (no flag) -> stays v1 on the wire, mask normalized to public {0, 1, 0}. + byte[] v1Wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v1Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v1Wire).toByteArray()); + assertFalse(v1Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v1Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // v2 in -> stays v2, mask normalized to public {0, 1, 0}. + byte[] v2Wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true) + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v2Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v2Wire).toByteArray()); + assertTrue(v2Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v2Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // A client-built filter is always v2 on the wire. + FuzzyRowFilter client = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertTrue(FilterProtos.FuzzyRowFilter.parseFrom(client.toByteArray()).getIsMaskV2()); + } + + /** + * getFuzzyKeys must expose the public {0, 1} form, never the internal stored encoding (on the + * unsafe path the stored mask is {-1, 0}). This is the form REST ScannerModel feeds back into the + * constructor, so a leak would corrupt a round-tripped REST filter. + */ + @Test + public void testGetFuzzyKeysReturnsPublicForm() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertArrayEquals(new byte[] { 0, 1, 0 }, filter.getFuzzyKeys().get(0).getSecond()); + } + + /** + * Unsafe-path companion to + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyV1EncodedFilterEnforcesFixedPositions: a legacy + * pre-HBASE-26537 v1 wire (no is_mask_v2 flag, internal {-1, 0} form) must still enforce its fixed + * positions after parseFrom. + */ + @Test + public void testParseFromLegacyV1EncodedFilterEnforcesFixedPositions() throws Exception { + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + assertEquals(Filter.ReturnCode.INCLUDE, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); + } + + /** + * Inherent twin of + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyNoUnsafeAllFixedMaskEnforcesFixedPositions: a + * no-flag all-zeros wire {0, 0, 0} is the legacy unsafe v1 all-wildcard encoding (HBASE-15676), so + * on the unsafe path it matches every row -- the deliberate opposite of the no-unsafe all-fixed + * reading. Pins that cross-version asymmetry on the unsafe side; guarded to the unsafe path. Review Comment: This Javadoc block contains a line that exceeds the repo's Checkstyle 100-character LineLength limit and will fail the build. Wrap the sentence so each line stays within the limit. ########## hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilter.java: ########## @@ -420,6 +429,128 @@ public void testReverseFilterListSkipsSameRowFuzzyHint() throws IOException { } } + /** + * Serializing a filter must not leak the internal mask encoding. On the unsafe path the stored + * mask is {-1 (fixed), 0 (non-fixed)}; {@code toByteArray} must emit the public constructor {0, + * 1} form so the round-tripped filter behaves identically (otherwise the leaked {-1, 0} is + * reparsed as an all-fixed mask). Processing a cell first must not change this. + */ + @Test + public void testSerializationAfterFilterCellPreservesBehavior() throws Exception { + FuzzyRowFilter original = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + // Process a cell first to prove serialization is unaffected by scanning. + original.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + FuzzyRowFilter parsed = FuzzyRowFilter.parseFrom(original.toByteArray()); + // A row matching only via the wildcard position must still be INCLUDED after the round-trip. + assertEquals(Filter.ReturnCode.INCLUDE, + parsed.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + } + + /** + * Two filters built from the same rule (distinct array instances) must be equal and hash equally, + * i.e. {@code equals}/{@code hashCode} must be content-based, not identity-based, and consistent + * with each other and with the serialized ({0, 1}) form. This must also hold after one of them + * has processed a cell. + */ + @Test + public void testEqualsConsistentAfterFilterCell() { + FuzzyRowFilter fresh = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + FuzzyRowFilter scanned = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + scanned.filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 50, 3 })); + + assertEquals(fresh, scanned); + assertEquals(fresh.hashCode(), scanned.hashCode()); + } + + /** + * Unsafe-path coverage for branch-2's is_mask_v2 wire plumbing (only exercised on the no-unsafe + * path otherwise): the flag must round-trip (v1-parsed stays v1, v2/client stays v2) and + * {@code toByteArray} must always emit the public {0, 1} mask, never the internal {-1, 0/2}. + */ + @Test + public void testIsMaskV2WireRoundTrip() throws Exception { + // v1 in (no flag) -> stays v1 on the wire, mask normalized to public {0, 1, 0}. + byte[] v1Wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v1Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v1Wire).toByteArray()); + assertFalse(v1Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v1Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // v2 in -> stays v2, mask normalized to public {0, 1, 0}. + byte[] v2Wire = FilterProtos.FuzzyRowFilter.newBuilder().setIsMaskV2(true) + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 2, -1 }))) + .build().toByteArray(); + FilterProtos.FuzzyRowFilter v2Out = + FilterProtos.FuzzyRowFilter.parseFrom(FuzzyRowFilter.parseFrom(v2Wire).toByteArray()); + assertTrue(v2Out.getIsMaskV2()); + assertArrayEquals(new byte[] { 0, 1, 0 }, v2Out.getFuzzyKeysData(0).getSecond().toByteArray()); + + // A client-built filter is always v2 on the wire. + FuzzyRowFilter client = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertTrue(FilterProtos.FuzzyRowFilter.parseFrom(client.toByteArray()).getIsMaskV2()); + } + + /** + * getFuzzyKeys must expose the public {0, 1} form, never the internal stored encoding (on the + * unsafe path the stored mask is {-1, 0}). This is the form REST ScannerModel feeds back into the + * constructor, so a leak would corrupt a round-tripped REST filter. + */ + @Test + public void testGetFuzzyKeysReturnsPublicForm() { + FuzzyRowFilter filter = + new FuzzyRowFilter(Arrays.asList(new Pair<>(new byte[] { 1, 2, 3 }, new byte[] { 0, 1, 0 }))); + assertArrayEquals(new byte[] { 0, 1, 0 }, filter.getFuzzyKeys().get(0).getSecond()); + } + + /** + * Unsafe-path companion to + * TestFuzzyRowFilterWoUnsafe#testParseFromLegacyV1EncodedFilterEnforcesFixedPositions: a legacy + * pre-HBASE-26537 v1 wire (no is_mask_v2 flag, internal {-1, 0} form) must still enforce its fixed + * positions after parseFrom. + */ + @Test + public void testParseFromLegacyV1EncodedFilterEnforcesFixedPositions() throws Exception { + byte[] wire = FilterProtos.FuzzyRowFilter.newBuilder() + .addFuzzyKeysData(BytesBytesPair.newBuilder() + .setFirst(UnsafeByteOperations.unsafeWrap(new byte[] { 1, 0, 3 })) + .setSecond(UnsafeByteOperations.unsafeWrap(new byte[] { -1, 0, -1 }))) + .build().toByteArray(); + assertEquals(Filter.ReturnCode.INCLUDE, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 3 }))); + assertEquals(Filter.ReturnCode.SEEK_NEXT_USING_HINT, + FuzzyRowFilter.parseFrom(wire).filterCell(KeyValueUtil.createFirstOnRow(new byte[] { 1, 99, 9 }))); Review Comment: These assertions exceed the repo's Checkstyle 100-character LineLength limit and will fail the build. Wrap the chained calls across lines (the project already uses this style elsewhere in this file). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
