HBASE-15014 Fix filterCellByStore in WALsplitter is awful for performance

Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/6e2c5d21
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/6e2c5d21
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/6e2c5d21

Branch: refs/heads/hbase-12439
Commit: 6e2c5d216eb1f4cacad7c5d7ed43b67785cabb67
Parents: 53e5d27
Author: Elliott Clark <[email protected]>
Authored: Fri Dec 18 14:14:25 2015 -0800
Committer: Elliott Clark <[email protected]>
Committed: Mon Dec 21 22:50:38 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/regionserver/wal/WALEdit.java  | 14 +++++++++++++-
 .../apache/hadoop/hbase/wal/WALSplitter.java    | 20 +++++++++++++-------
 2 files changed, 26 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/6e2c5d21/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
index 3b774ef..cea2ee7 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/wal/WALEdit.java
@@ -99,7 +99,7 @@ public class WALEdit implements Writable, HeapSize {
   private final int VERSION_2 = -1;
   private final boolean isReplay;
 
-  private final ArrayList<Cell> cells = new ArrayList<Cell>(1);
+  private ArrayList<Cell> cells = new ArrayList<Cell>(1);
 
   public static final WALEdit EMPTY_WALEDIT = new WALEdit();
 
@@ -170,6 +170,18 @@ public class WALEdit implements Writable, HeapSize {
     return cells;
   }
 
+  /**
+   * This is not thread safe.
+   * This will change the WALEdit and shouldn't be used unless you are sure 
that nothing
+   * else depends on the contents being immutable.
+   *
+   * @param cells the list of cells that this WALEdit now contains.
+   */
+  @InterfaceAudience.Private
+  public void setCells(ArrayList<Cell> cells) {
+    this.cells = cells;
+  }
+
   public NavigableMap<byte[], Integer> getAndRemoveScopes() {
     NavigableMap<byte[], Integer> result = scopes;
     scopes = null;

http://git-wip-us.apache.org/repos/asf/hbase/blob/6e2c5d21/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
index 04438fd..8abd950 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WALSplitter.java
@@ -1505,21 +1505,27 @@ public class WALSplitter {
       if (maxSeqIdInStores == null || maxSeqIdInStores.isEmpty()) {
         return;
       }
-      List<Cell> skippedCells = new ArrayList<Cell>();
+      // Create the array list for the cells that aren't filtered.
+      // We make the assumption that most cells will be kept.
+      ArrayList<Cell> keptCells = new 
ArrayList<Cell>(logEntry.getEdit().getCells().size());
       for (Cell cell : logEntry.getEdit().getCells()) {
-        if (!CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
+        if (CellUtil.matchingFamily(cell, WALEdit.METAFAMILY)) {
+          keptCells.add(cell);
+        } else {
           byte[] family = CellUtil.cloneFamily(cell);
           Long maxSeqId = maxSeqIdInStores.get(family);
           // Do not skip cell even if maxSeqId is null. Maybe we are in a 
rolling upgrade,
           // or the master was crashed before and we can not get the 
information.
-          if (maxSeqId != null && maxSeqId.longValue() >= 
logEntry.getKey().getLogSeqNum()) {
-            skippedCells.add(cell);
+          if (maxSeqId == null || maxSeqId.longValue() < 
logEntry.getKey().getLogSeqNum()) {
+            keptCells.add(cell);
           }
         }
       }
-      if (!skippedCells.isEmpty()) {
-        logEntry.getEdit().getCells().removeAll(skippedCells);
-      }
+
+      // Anything in the keptCells array list is still live.
+      // So rather than removing the cells from the array list
+      // which would be an O(n^2) operation, we just replace the list
+      logEntry.getEdit().setCells(keptCells);
     }
 
     @Override

Reply via email to