Author: stack
Date: Fri Jun 10 19:21:41 2011
New Revision: 1134419

URL: http://svn.apache.org/viewvc?rev=1134419&view=rev
Log:
HBASE-3855 Performance degradation of memstore because reseek is linear

Modified:
    hbase/branches/0.90/CHANGES.txt
    
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java

Modified: hbase/branches/0.90/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/CHANGES.txt?rev=1134419&r1=1134418&r2=1134419&view=diff
==============================================================================
--- hbase/branches/0.90/CHANGES.txt (original)
+++ hbase/branches/0.90/CHANGES.txt Fri Jun 10 19:21:41 2011
@@ -35,6 +35,8 @@ Release 0.90.4 - Unreleased
    HBASE-3919  More places output binary data to text (Dave Latham)
    HBASE-3906  When HMaster is running,there are a lot of RegionLoad instances
                far greater than the regions),it has risk of OOME (Jian Zhang)
+   HBASE-3855  Performance degradation of memstore because reseek is linear
+               (dhruba borthakur)
 
 Release 0.90.3 - May 19th, 2011
   BUG FIXES

Modified: 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
URL: 
http://svn.apache.org/viewvc/hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java?rev=1134419&r1=1134418&r2=1134419&view=diff
==============================================================================
--- 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
 (original)
+++ 
hbase/branches/0.90/src/main/java/org/apache/hadoop/hbase/regionserver/MemStore.java
 Fri Jun 10 19:21:41 2011
@@ -62,6 +62,9 @@ public class MemStore implements HeapSiz
     "hbase.hregion.memstore.mslab.enabled";
   private static final boolean USEMSLAB_DEFAULT = false;
 
+  static final String RESEEKMAX_KEY =
+    "hbase.hregion.memstore.reseek.maxkeys";
+  private static final int RESEEKMAX_DEFAULT = 32;
 
   private Configuration conf;
 
@@ -93,6 +96,11 @@ public class MemStore implements HeapSiz
   
   MemStoreLAB allocator;
 
+  // if a reseek has to scan over more than these number of keys, then
+  // it morphs into a seek. A seek does a tree map-search while 
+  // reseek does a linear scan.
+  int reseekNumKeys;
+
   /**
    * Default constructor. Used for tests.
    */
@@ -121,6 +129,7 @@ public class MemStore implements HeapSiz
     } else {
       this.allocator = null;
     }
+    this.reseekNumKeys = conf.getInt(RESEEKMAX_KEY, RESEEKMAX_DEFAULT);
   }
 
   void dump() {
@@ -643,6 +652,9 @@ public class MemStore implements HeapSiz
     Iterator<KeyValue> kvsetIt;
     Iterator<KeyValue> snapshotIt;
 
+    // number of iterations in this reseek operation
+    int numIterReseek;
+    
     /*
     Some notes...
 
@@ -678,6 +690,10 @@ public class MemStore implements HeapSiz
           // keep it.
           ret = v;
         }
+        numIterReseek--;
+        if (numIterReseek == 0) {
+          break;
+        }
       }
       return ret;
     }
@@ -687,6 +703,7 @@ public class MemStore implements HeapSiz
         close();
         return false;
       }
+      numIterReseek = 0;
 
       // kvset and snapshot will never be empty.
       // if tailSet cant find anything, SS is empty (not null).
@@ -715,14 +732,27 @@ public class MemStore implements HeapSiz
 
     @Override
     public boolean reseek(KeyValue key) {
+      numIterReseek = reseekNumKeys;
       while (kvsetNextRow != null &&
           comparator.compare(kvsetNextRow, key) < 0) {
         kvsetNextRow = getNext(kvsetIt);
+        // if we scanned enough entries but still not able to find the
+        // kv we are looking for, better cut our costs and do a tree
+        // scan using seek.
+        if (kvsetNextRow == null && numIterReseek == 0) {
+          return seek(key);
+        }
       }
 
       while (snapshotNextRow != null &&
           comparator.compare(snapshotNextRow, key) < 0) {
         snapshotNextRow = getNext(snapshotIt);
+        // if we scanned enough entries but still not able to find the
+        // kv we are looking for, better cut our costs and do a tree
+        // scan using seek.
+        if (snapshotNextRow == null && numIterReseek == 0) {
+          return seek(key);
+        }
       }
       return (kvsetNextRow != null || snapshotNextRow != null);
     }


Reply via email to