This is an automated email from the ASF dual-hosted git repository.
zhangduo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 382681e2d6b HBASE-26967 FilterList with FuzzyRowFilter and
SingleColumnValueFilter evaluated with operator MUST_PASS_ONE doesn't work as
expected(#4820)
382681e2d6b is described below
commit 382681e2d6b6c417145022d7df81390bf42f2773
Author: chaijunjie0101 <[email protected]>
AuthorDate: Sun Jan 29 17:18:18 2023 +0800
HBASE-26967 FilterList with FuzzyRowFilter and SingleColumnValueFilter
evaluated with operator MUST_PASS_ONE doesn't work as expected(#4820)
Close #4820
Co-authored-by: Duo Zhang <[email protected]>
Signed-off-by: Duo Zhang <[email protected]>
---
.../org/apache/hadoop/hbase/filter/FilterBase.java | 2 +-
.../apache/hadoop/hbase/filter/FuzzyRowFilter.java | 43 +++++++++++++---
.../hbase/filter/TestFuzzyRowFilterEndToEnd.java | 57 +++++++++++++++++++++-
3 files changed, 91 insertions(+), 11 deletions(-)
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
index 713c4acb270..c80da159b7e 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
@@ -71,7 +71,7 @@ public abstract class FilterBase extends Filter {
}
/**
- * Fitlers that never filter by modifying the returned List of Cells can
inherit this
+ * Filters that never filter by modifying the returned List of Cells can
inherit this
* implementation that does nothing. {@inheritDoc}
*/
@Override
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java
index 2feac5527f7..fd5a81d694e 100644
---
a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java
+++
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FuzzyRowFilter.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hbase.filter;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@@ -48,18 +49,34 @@ import
org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos.BytesBytesP
* <li>1 - means that this byte in provided row key is NOT fixed, i.e. row
key's byte at this
* position can be different from the one in provided row key</li>
* </ul>
- * Example: Let's assume row key format is userId_actionId_year_month. Length
of userId is fixed and
- * is 4, length of actionId is 2 and year and month are 4 and 2 bytes long
respectively. Let's
- * assume that we need to fetch all users that performed certain action
(encoded as "99") in Jan of
- * any year. Then the pair (row key, fuzzy info) would be the following: row
key = "????_99_????_01"
- * (one can use any value instead of "?") fuzzy info =
- * "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00" I.e. fuzzy
info tells the matching
- * mask is "????_99_????_01", where at ? can be any value.
+ * Example:
+ * <p/>
+ * Let's assume row key format is userId_actionId_year_month. Length of userId
is fixed and is 4,
+ * length of actionId is 2 and year and month are 4 and 2 bytes long
respectively.
+ * <p/>
+ * Let's assume that we need to fetch all users that performed certain action
(encoded as "99") in
+ * Jan of any year. Then the pair (row key, fuzzy info) would be the following:
+ *
+ * <pre>
+ * row key = "????_99_????_01" (one can use any value instead of "?")
+ * fuzzy info = "\x01\x01\x01\x01\x00\x00\x00\x00\x01\x01\x01\x01\x00\x00\x00"
+ * </pre>
+ *
+ * I.e. fuzzy info tells the matching mask is "????_99_????_01", where at ?
can be any value.
*/
@InterfaceAudience.Public
public class FuzzyRowFilter extends FilterBase {
private static final boolean UNSAFE_UNALIGNED =
HBasePlatformDependent.unaligned();
private List<Pair<byte[], byte[]>> fuzzyKeysData;
+ // Used to record whether we want to skip the current row.
+ // Usually we should use filterRowKey here but in the current scan
implementation, if filterRowKey
+ // returns true, we will just skip to next row, instead of calling
getNextCellHint to determine
+ // the actual next row, so we need to implement filterCell and return
SEEK_NEXT_USING_HINT to let
+ // upper layer call getNextCellHint.
+ // And if we do not implement filterRow, sometimes we will get incorrect
result when using
+ // FuzzyRowFilter together with other filters, please see the description
for HBASE-26967 for more
+ // details.
+ private boolean filterRow;
private boolean done = false;
/**
@@ -144,6 +161,16 @@ public class FuzzyRowFilter extends FilterBase {
return true;
}
+ @Override
+ public void reset() throws IOException {
+ filterRow = false;
+ }
+
+ @Override
+ public boolean filterRow() throws IOException {
+ return filterRow;
+ }
+
@Override
public ReturnCode filterCell(final Cell c) {
final int startIndex = lastFoundIndex >= 0 ? lastFoundIndex : 0;
@@ -164,7 +191,7 @@ public class FuzzyRowFilter extends FilterBase {
}
// NOT FOUND -> seek next using hint
lastFoundIndex = -1;
-
+ filterRow = true;
return ReturnCode.SEEK_NEXT_USING_HINT;
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java
index 9bc0ed0cb94..872b6c8b541 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFuzzyRowFilterEndToEnd.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.filter;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import java.io.IOException;
import java.nio.ByteBuffer;
@@ -27,6 +28,7 @@ import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.CompareOperator;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtil;
import org.apache.hadoop.hbase.HConstants;
@@ -353,7 +355,6 @@ public class TestFuzzyRowFilterEndToEnd {
assertEquals(expectedSize, found);
}
- @SuppressWarnings("deprecation")
@Test
public void testFilterList() throws Exception {
String cf = "f";
@@ -396,7 +397,6 @@ public class TestFuzzyRowFilterEndToEnd {
}
- @SuppressWarnings("unchecked")
private void runTest(Table hTable, int expectedSize) throws IOException {
// [0, 2, ?, ?, ?, ?, 0, 0, 0, 1]
byte[] fuzzyKey1 = new byte[10];
@@ -454,4 +454,57 @@ public class TestFuzzyRowFilterEndToEnd {
assertEquals(expectedSize, results.size());
}
+
+ @Test
+ public void testHBASE26967() throws IOException {
+ byte[] row1 = Bytes.toBytes("1");
+ byte[] row2 = Bytes.toBytes("2");
+ String cf1 = "f1";
+ String cf2 = "f2";
+ String cq1 = "col1";
+ String cq2 = "col2";
+
+ Table ht =
+ TEST_UTIL.createTable(TableName.valueOf(name.getMethodName()), new
String[] { cf1, cf2 });
+
+ // Put data
+ List<Put> puts = Lists.newArrayList();
+ puts.add(new Put(row1).addColumn(Bytes.toBytes(cf1), Bytes.toBytes(cq1),
Bytes.toBytes("a1")));
+ puts.add(new Put(row1).addColumn(Bytes.toBytes(cf2), Bytes.toBytes(cq2),
Bytes.toBytes("a2")));
+ puts.add(new Put(row2).addColumn(Bytes.toBytes(cf1), Bytes.toBytes(cq1),
Bytes.toBytes("b1")));
+ puts.add(new Put(row2).addColumn(Bytes.toBytes(cf2), Bytes.toBytes(cq2),
Bytes.toBytes("b2")));
+ ht.put(puts);
+
+ TEST_UTIL.flush();
+
+ // FuzzyRowFilter
+ List<Pair<byte[], byte[]>> data = Lists.newArrayList();
+ byte[] fuzzyKey = Bytes.toBytes("1");
+ byte[] mask = new byte[] { 0 };
+ data.add(new Pair<>(fuzzyKey, mask));
+ FuzzyRowFilter fuzzyRowFilter = new FuzzyRowFilter(data);
+
+ // SingleColumnValueFilter
+ Filter singleColumnValueFilter = new
SingleColumnValueFilter(Bytes.toBytes(cf2),
+ Bytes.toBytes(cq2), CompareOperator.EQUAL, Bytes.toBytes("x"));
+
+ // FilterList
+ FilterList filterList = new FilterList(Operator.MUST_PASS_ONE);
+ filterList.addFilter(Lists.newArrayList(fuzzyRowFilter,
singleColumnValueFilter));
+
+ Scan scan = new Scan();
+ scan.setFilter(filterList);
+
+ ResultScanner scanner = ht.getScanner(scan);
+ Result rs = scanner.next();
+ assertEquals(0, Bytes.compareTo(row1, rs.getRow()));
+
+ // The two cells (1,f1,col1,a1) (1,f2,col2,a2)
+ assertEquals(2, rs.listCells().size());
+
+ // Only one row who's rowKey=1
+ assertNull(scanner.next());
+
+ TEST_UTIL.deleteTable(TableName.valueOf(name.getMethodName()));
+ }
}