This is an automated email from the ASF dual-hosted git repository.

zhangduo pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 1c54e132474 HBASE-29112 Apply KeyOnlyFilter to RowCounter (#6666)
1c54e132474 is described below

commit 1c54e132474e533a37e2d252a95b74295f3c83c7
Author: Junegunn Choi <[email protected]>
AuthorDate: Fri Apr 11 23:41:35 2025 +0900

    HBASE-29112 Apply KeyOnlyFilter to RowCounter (#6666)
    
    Signed-off-by: Duo Zhang <[email protected]>
    (cherry picked from commit 42efe9f768542029980bf2859bc5a32f5890dded)
---
 .../apache/hadoop/hbase/mapreduce/RowCounter.java  | 35 +++++++++++++++++-----
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
index 88337ebedb7..66c9f3f9166 100644
--- 
a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
+++ 
b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/RowCounter.java
@@ -27,8 +27,11 @@ import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.FilterBase;
+import org.apache.hadoop.hbase.filter.FilterList;
 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
+import org.apache.hadoop.hbase.filter.KeyOnlyFilter;
 import org.apache.hadoop.hbase.filter.MultiRowRangeFilter;
 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
 import org.apache.hadoop.hbase.util.AbstractHBaseTool;
@@ -311,24 +314,40 @@ public class RowCounter extends AbstractHBaseTool {
 
   /**
    * Sets filter {@link FilterBase} to the {@link Scan} instance. If provided 
rowRangeList contains
-   * more than one element, method sets filter which is instance of {@link 
MultiRowRangeFilter}.
-   * Otherwise, method sets filter which is instance of {@link 
FirstKeyOnlyFilter}. If rowRangeList
-   * contains exactly one element, startRow and stopRow are set to the scan.
+   * more than one element, method sets filter which is instance of {@link 
MultiRowRangeFilter}. If
+   * rowRangeList contains exactly one element, startRow and stopRow are set 
to the scan. Also,
+   * method may apply {@link FirstKeyOnlyFilter} an {@link KeyOnlyFilter} for 
better performance.
    */
   private static void setScanFilter(Scan scan, 
List<MultiRowRangeFilter.RowRange> rowRangeList,
     boolean countDeleteMarkers) {
-    final int size = rowRangeList == null ? 0 : rowRangeList.size();
-    // all cells will be needed if --countDeleteMarkers flag is set, hence, 
skipping filter
-    if (size <= 1 && !countDeleteMarkers) {
-      scan.setFilter(new FirstKeyOnlyFilter());
+    List<Filter> filters = new ArrayList<>();
+
+    // Apply filters for better performance.
+    if (!countDeleteMarkers) {
+      // We only need one cell per row, unless --countDeleteMarkers flag is 
set.
+      filters.add(new FirstKeyOnlyFilter());
+
+      // We're not interested in values. Use KeyOnlyFilter.
+      // NOTE: Logically, KeyOnlyFilter should be okay for 
--countDeleteMarkers, because it should
+      // only empty the values, but currently, having a filter changes the 
behavior of a raw scan.
+      // So we don't use it. See {@link 
UserScanQueryMatcher#mergeFilterResponse} for more details.
+      filters.add(new KeyOnlyFilter());
     }
+
+    // Depending on the number of ranges, set start/stop row or apply 
MultiRowRangeFilter.
+    final int size = rowRangeList == null ? 0 : rowRangeList.size();
     if (size == 1) {
       MultiRowRangeFilter.RowRange range = rowRangeList.get(0);
       scan.setStartRow(range.getStartRow()); // inclusive
       scan.setStopRow(range.getStopRow()); // exclusive
     } else if (size > 1) {
-      scan.setFilter(new MultiRowRangeFilter(rowRangeList));
+      filters.add(new MultiRowRangeFilter(rowRangeList));
+    }
+
+    if (filters.isEmpty()) {
+      return;
     }
+    scan.setFilter(filters.size() == 1 ? filters.get(0) : new 
FilterList(filters));
   }
 
   @Override

Reply via email to