ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save

syuanjiang Wed, 30 Mar 2016 09:04:01 -0700

Revert "HBASE-15392 Single Cell Get reads two HFileBlocks M 
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
 moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save 
compares if no non-default stopRow."


Revert mistaken commit

This reverts commit 7073f69993e446c2a63308f7827639d675a2da58.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a13d6e00
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a13d6e00
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a13d6e00

Branch: refs/heads/hbase-12439
Commit: a13d6e000ddef2c524b641e04f7969950b2b1811
Parents: c49d0ca
Author: stack <[email protected]>
Authored: Tue Mar 22 18:39:22 2016 -0700
Committer: stack <[email protected]>
Committed: Tue Mar 22 18:39:22 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/CellComparator.java |   2 +-
 .../hbase/io/hfile/CombinedBlockCache.java      |  13 +-
 .../hadoop/hbase/io/hfile/HFileWriterImpl.java  |   2 +-
 .../hbase/regionserver/KeyValueScanner.java     |  10 +-
 .../hbase/regionserver/ScanQueryMatcher.java    |  60 ++---
 .../hadoop/hbase/regionserver/StoreScanner.java |  87 +------
 .../hbase/util/CollectionBackedScanner.java     |   3 +-
 .../hbase/regionserver/KeyValueScanFixture.java |   8 +-
 .../regionserver/TestKeyValueScanFixture.java   |   3 +-
 .../hbase/regionserver/TestStoreScanner.java    | 244 +------------------
 10 files changed, 62 insertions(+), 370 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java
----------------------------------------------------------------------
diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java
index 4a5c0b7..a5e26cf 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java
@@ -433,7 +433,7 @@ public class CellComparator implements Comparator<Cell>, 
Serializable {
   /**
    * Used to compare two cells based on the column hint provided. This is 
specifically
    * used when we need to optimize the seeks based on the next indexed key. 
This is an
-   * advanced usage API specifically needed for some optimizations.
+   * advance usage API specifically needed for some optimizations.
    * @param nextIndexedCell the next indexed cell 
    * @param currentCell the cell to be compared
    * @param foff the family offset of the currentCell

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index 666b357..22bffee 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -63,8 +63,8 @@ public class CombinedBlockCache implements 
ResizableBlockCache, HeapSize {
   @Override
   public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean 
inMemory,
       final boolean cacheDataInL1) {
-    boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA;
-    if (metaBlock || cacheDataInL1) {
+    boolean isMetaBlock = buf.getBlockType().getCategory() != 
BlockCategory.DATA;
+    if (isMetaBlock || cacheDataInL1) {
       lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1);
     } else {
       l2Cache.cacheBlock(cacheKey, buf, inMemory, false);
@@ -81,9 +81,12 @@ public class CombinedBlockCache implements 
ResizableBlockCache, HeapSize {
       boolean repeat, boolean updateCacheMetrics) {
     // TODO: is there a hole here, or just awkwardness since in the lruCache 
getBlock
     // we end up calling l2Cache.getBlock.
-    return lruCache.containsBlock(cacheKey)?
-        lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics):
-        l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
+    if (lruCache.containsBlock(cacheKey)) {
+      return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
+    }
+    Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, 
updateCacheMetrics);
+
+    return result;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
index 9ab46cf..d310d13 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -416,7 +416,7 @@ public class HFileWriterImpl implements HFile.Writer {
     // No opportunity for optimization. Just return right key.
     return right;
   }
-
+  
   /**
    * @param leftArray
    * @param leftOffset

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
index e26022e..eae713f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java
@@ -29,9 +29,6 @@ import org.apache.hadoop.hbase.client.Scan;
  * Scanner that returns the next KeyValue.
  */
 @InterfaceAudience.Private
-// TODO: Change name from KeyValueScanner to CellScanner only we already have 
a simple CellScanner
-// so this should be something else altogether, a decoration on our base 
CellScanner. TODO.
-// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0.
 public interface KeyValueScanner extends Shipper {
   /**
    * The byte array represents for NO_NEXT_INDEXED_KEY;
@@ -164,9 +161,8 @@ public interface KeyValueScanner extends Shipper {
   public boolean seekToLastRow() throws IOException;
 
   /**
-   * @return the next key in the index, usually the first key of next block OR 
a key that falls
-   * between last key of current block and first key of next block..
-   * see HFileWriterImpl#getMidpoint, or null if not known.
+   * @return the next key in the index (the key to seek to the next block)
+   * if known, or null otherwise
    */
   public Cell getNextIndexedKey();
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
index 706fc5b..c220b5c 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java
@@ -93,7 +93,7 @@ public class ScanQueryMatcher {
   /* row is not private for tests */
   /** Row the query is on */
   Cell curCell;
-
+  
   /**
    * Oldest put in any of the involved store files
    * Used to decide whether it is ok to delete
@@ -119,7 +119,7 @@ public class ScanQueryMatcher {
    * first column.
    * */
   private boolean hasNullColumn = true;
-
+  
   private RegionCoprocessorHost regionCoprocessorHost= null;
 
   // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete
@@ -140,22 +140,22 @@ public class ScanQueryMatcher {
   // currently influencing. This is because Puts, that this delete can
   // influence.  may appear out of order.
   private final long timeToPurgeDeletes;
-
+  
   private final boolean isUserScan;
 
   private final boolean isReversed;
 
   /**
-   * True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan.
-   */
-  private final boolean get;
-
-  /**
    * Construct a QueryMatcher for a scan
+   * @param scan
    * @param scanInfo The store's immutable scan info
+   * @param columns
    * @param scanType Type of the scan
    * @param earliestPutTs Earliest put seen in any of the store files.
-   * @param oldestUnexpiredTS the oldest timestamp we are interested in, based 
on TTL
+   * @param oldestUnexpiredTS the oldest timestamp we are interested in,
+   *  based on TTL
+   * @param regionCoprocessorHost 
+   * @throws IOException 
    */
   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> 
columns,
       ScanType scanType, long readPointToUse, long earliestPutTs, long 
oldestUnexpiredTS,
@@ -166,7 +166,6 @@ public class ScanQueryMatcher {
     } else {
       this.tr = timeRange;
     }
-    this.get = scan.isGetScan();
     this.rowComparator = scanInfo.getComparator();
     this.regionCoprocessorHost = regionCoprocessorHost;
     this.deletes =  instantiateDeleteTracker();
@@ -235,8 +234,8 @@ public class ScanQueryMatcher {
    * @param now the current server time
    * @param dropDeletesFromRow The inclusive left bound of the range; can be 
EMPTY_START_ROW.
    * @param dropDeletesToRow The exclusive right bound of the range; can be 
EMPTY_END_ROW.
-   * @param regionCoprocessorHost
-   * @throws IOException
+   * @param regionCoprocessorHost 
+   * @throws IOException 
    */
   public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> 
columns,
       long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long 
now,
@@ -281,7 +280,7 @@ public class ScanQueryMatcher {
    *      caused by a data corruption.
    */
   public MatchCode match(Cell cell) throws IOException {
-    if (filter != null && filter.filterAllRemaining()) {
+      if (filter != null && filter.filterAllRemaining()) {
       return MatchCode.DONE_SCAN;
     }
     if (curCell != null) {
@@ -325,7 +324,7 @@ public class ScanQueryMatcher {
     // check if the cell is expired by cell TTL
     if (HStore.isCellTTLExpired(cell, this.oldestUnexpiredTS, this.now)) {
       return MatchCode.SKIP;
-    }
+    }    
 
     /*
      * The delete logic is pretty complicated now.
@@ -360,10 +359,10 @@ public class ScanQueryMatcher {
         }
         // Can't early out now, because DelFam come before any other keys
       }
-
+     
       if ((!isUserScan)
           && timeToPurgeDeletes > 0
-          && (EnvironmentEdgeManager.currentTime() - timestamp)
+          && (EnvironmentEdgeManager.currentTime() - timestamp) 
             <= timeToPurgeDeletes) {
         return MatchCode.INCLUDE;
       } else if (retainDeletesInOutput || mvccVersion > 
maxReadPointToTrackVersions) {
@@ -504,27 +503,22 @@ public class ScanQueryMatcher {
     }
   }
 
-  /**
-   * @return Returns false if we know there are no more rows to be scanned 
(We've reached the
-   * <code>stopRow</code> or we are scanning on row only because this Scan is 
for a Get, etc.
-   */
   public boolean moreRowsMayExistAfter(Cell kv) {
-    // If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan 
in implementation) --
-    // then we are looking at one row only, the one specified in the Get 
coordinate..so we know
-    // for sure that there are no more rows on this Scan
-    if (this.get) {
-      return false;
+    if (this.isReversed) {
+      if (rowComparator.compareRows(kv, stopRow, 0, stopRow.length) <= 0) {
+        return false;
+      } else {
+        return true;
+      }
     }
-    // If no stopRow, return that there may be more rows. The tests that 
follow depend on a
-    // non-empty, non-default stopRow so this little test below short-circuits 
out doing the
-    // following compares.
-    if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) {
+    if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) &&
+        rowComparator.compareRows(kv, stopRow, 0, stopRow.length) >= 0) {
+      // KV >= STOPROW
+      // then NO there is nothing left.
+      return false;
+    } else {
       return true;
     }
-    return this.isReversed?
-      rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0:
-      Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) ||
-        rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
index 66f846a..2f0d284 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
@@ -133,7 +133,7 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
   protected List<KeyValueScanner> currentScanners = new 
ArrayList<KeyValueScanner>();
   // flush update lock
   private ReentrantLock flushLock = new ReentrantLock();
-
+  
   protected final long readPt;
 
   // used by the injection framework to test race between StoreScanner 
construction and compaction
@@ -600,12 +600,6 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
         continue;
 
       case DONE:
-        // Optimization for Gets! If DONE, no more to get on this row, early 
exit!
-        if (this.scan.isGetScan()) {
-          // Then no more to this row... exit.
-          close(false);// Do all cleanup except heap.close()
-          return 
scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
-        }
         matcher.curCell = null;
         return 
scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues();
 
@@ -655,67 +649,18 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
     return 
scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
   }
 
-  /**
-   * See if we should actually SEEK or rather just SKIP to the next Cell (see 
HBASE-13109).
-   * This method works together with ColumnTrackers and Filters. 
ColumnTrackers may issue SEEK
-   * hints, such as seek to next column, next row, or seek to an arbitrary 
seek key.
-   * This method intercepts these qcodes and decides whether a seek is the 
most efficient _actual_
-   * way to get us to the requested cell (SEEKs are more expensive than SKIP, 
SKIP, SKIP inside the
-   * current, loaded block).
-   * It does this by looking at the next indexed key of the current HFile. 
This key
-   * is then compared with the _SEEK_ key, where a SEEK key is an artificial 
'last possible key
-   * on the row' (only in here, we avoid actually creating a SEEK key; in the 
compare we work with
-   * the current Cell but compare as though it were a seek key; see down in
-   * matcher.compareKeyForNextRow, etc). If the compare gets us onto the
-   * next block we *_SEEK, otherwise we just INCLUDE or SKIP, and let the 
ColumnTrackers or Filters
-   * go through the next Cell, and so on)
-   *
-   * <p>The ColumnTrackers and Filters must behave correctly in all cases, 
i.e. if they are past the
-   * Cells they care about they must issues a SKIP or SEEK.
-   *
-   * <p>Other notes:
-   * <ul>
-   * <li>Rows can straddle block boundaries</li>
-   * <li>Versions of columns can straddle block boundaries (i.e. column C1 at 
T1 might be in a
-   * different block than column C1 at T2)</li>
-   * <li>We want to SKIP and INCLUDE if the chance is high that we'll find the 
desired Cell after a
-   * few SKIPs...</li>
-   * <li>We want to INCLUDE_AND_SEEK and SEEK when the chance is high that 
we'll be able to seek
-   * past many Cells, especially if we know we need to go to the next 
block.</li>
-   * </ul>
-   * <p>A good proxy (best effort) to determine whether INCLUDE/SKIP is better 
than SEEK is whether
-   * we'll likely end up seeking to the next block (or past the next block) to 
get our next column.
-   * Example:
-   * <pre>
-   * |    BLOCK 1              |     BLOCK 2                   |
-   * |  r1/c1, r1/c2, r1/c3    |    r1/c4, r1/c5, r2/c1        |
-   *                                   ^         ^
-   *                                   |         |
-   *                           Next Index Key   SEEK_NEXT_ROW (before r2/c1)
-   *
-   *
-   * |    BLOCK 1                       |     BLOCK 2                      |
-   * |  r1/c1/t5, r1/c1/t4, r1/c1/t3    |    r1/c1/t2, r1/c1/T1, r1/c2/T3  |
-   *                                            ^              ^
-   *                                            |              |
-   *                                    Next Index Key        SEEK_NEXT_COL
-   * </pre>
-   * Now imagine we want columns c1 and c3 (see first diagram above), the 
'Next Index Key' of r1/c4
-   * is > r1/c3 so we should seek to get to the c1 on the next row, r2. In 
second case, say we only
-   * want one version of c1, after we have it, a SEEK_COL will be issued to 
get to c2. Looking at
-   * the 'Next Index Key', it would land us in the next block, so we should 
SEEK. In other scenarios
-   * where the SEEK will not land us in the next block, it is very likely 
better to issues a series
-   * of SKIPs.
+  /*
+   * See if we should actually SEEK or rather just SKIP to the next Cell.
+   * (see HBASE-13109)
    */
-  @VisibleForTesting
-  protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode 
qcode, Cell cell) {
+  private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode 
qcode, Cell cell) {
     switch(qcode) {
     case INCLUDE_AND_SEEK_NEXT_COL:
     case SEEK_NEXT_COL:
     {
       Cell nextIndexedKey = getNextIndexedKey();
       if (nextIndexedKey != null && nextIndexedKey != 
KeyValueScanner.NO_NEXT_INDEXED_KEY
-          && matcher.compareKeyForNextColumn(nextIndexedKey, cell) > 0) {
+          && matcher.compareKeyForNextColumn(nextIndexedKey, cell) >= 0) {
         return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : 
MatchCode.INCLUDE;
       }
       break;
@@ -723,16 +668,10 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
     case INCLUDE_AND_SEEK_NEXT_ROW:
     case SEEK_NEXT_ROW:
     {
-      // If it is a Get Scan, then we know that we are done with this row; 
there are no more
-      // rows beyond the current one: don't try to optimize. We are DONE. 
Return the *_NEXT_ROW
-      // qcode as is. When the caller gets these flags on a Get Scan, it knows 
it can shut down the
-      // Scan.
-      if (!this.scan.isGetScan()) {
-        Cell nextIndexedKey = getNextIndexedKey();
-        if (nextIndexedKey != null && nextIndexedKey != 
KeyValueScanner.NO_NEXT_INDEXED_KEY
-            && matcher.compareKeyForNextRow(nextIndexedKey, cell) > 0) {
-          return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : 
MatchCode.INCLUDE;
-        }
+      Cell nextIndexedKey = getNextIndexedKey();
+      if (nextIndexedKey != null && nextIndexedKey != 
KeyValueScanner.NO_NEXT_INDEXED_KEY
+          && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) {
+        return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : 
MatchCode.INCLUDE;
       }
       break;
     }
@@ -870,10 +809,10 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
     // check the var without any lock. Suppose even if we see the old
     // value here still it is ok to continue because we will not be resetting
     // the heap but will continue with the referenced memstore's snapshot. For 
compactions
-    // any way we don't need the updateReaders at all to happen as we still 
continue with
+    // any way we don't need the updateReaders at all to happen as we still 
continue with 
     // the older files
     if (flushed) {
-      // If there is a flush and the current scan is notified on the flush 
ensure that the
+      // If there is a flush and the current scan is notified on the flush 
ensure that the 
       // scan's heap gets reset and we do a seek on the newly flushed file.
       if(!this.closing) {
         this.lastTop = this.peek();
@@ -903,7 +842,7 @@ public class StoreScanner extends 
NonReversedNonLazyKeyValueScanner
     if (scanners.isEmpty()) return;
     int storeFileScannerCount = scanners.size();
     CountDownLatch latch = new CountDownLatch(storeFileScannerCount);
-    List<ParallelSeekHandler> handlers =
+    List<ParallelSeekHandler> handlers = 
         new ArrayList<ParallelSeekHandler>(storeFileScannerCount);
     for (KeyValueScanner scanner : scanners) {
       if (scanner instanceof StoreFileScanner) {

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
index 4720880..9fc068f 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java
@@ -30,7 +30,8 @@ import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner;
 
 /**
- * Utility scanner that wraps a sortable collection and serves as a 
KeyValueScanner.
+ * Utility scanner that wraps a sortable collection and serves
+ * as a KeyValueScanner.
  */
 @InterfaceAudience.Private
 public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner 
{

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
index a4e7f9b..3f87a00 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java
@@ -22,7 +22,6 @@ package org.apache.hadoop.hbase.regionserver;
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.util.CollectionBackedScanner;
@@ -34,8 +33,9 @@ import org.apache.hadoop.hbase.util.CollectionBackedScanner;
  * to be a store file scanner.
  */
 public class KeyValueScanFixture extends CollectionBackedScanner {
-  public KeyValueScanFixture(CellComparator comparator, Cell... cells) {
-    super(comparator, cells);
+  public KeyValueScanFixture(CellComparator comparator,
+                             KeyValue... incData) {
+    super(comparator, incData);
   }
 
   public static List<KeyValueScanner> scanFixture(KeyValue[] ... kvArrays) {
@@ -45,4 +45,4 @@ public class KeyValueScanFixture extends 
CollectionBackedScanner {
     }
     return scanners;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
index 0e96682..a8c2c65 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java
@@ -46,7 +46,8 @@ public class TestKeyValueScanFixture extends TestCase {
         KeyValueTestUtil.create("RowB", "family", "qf1",
             10, KeyValue.Type.Put, "value-10")
     };
-    KeyValueScanner scan = new KeyValueScanFixture(CellComparator.COMPARATOR, 
kvs);
+    KeyValueScanner scan = new KeyValueScanFixture(
+        CellComparator.COMPARATOR, kvs);
 
     KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA"));
     // should seek to this:

http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java
index 4c594b0..92c85aa 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java
@@ -20,7 +20,6 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import static 
org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture;
-import static org.junit.Assert.*;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -28,21 +27,16 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.NavigableSet;
 import java.util.TreeSet;
-import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.CategoryBasedTimeout;
 import org.apache.hadoop.hbase.Cell;
 import org.apache.hadoop.hbase.CellComparator;
-import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.KeepDeletedCells;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.KeyValueTestUtil;
-import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
@@ -59,113 +53,16 @@ import org.junit.rules.TestRule;
 // Can't be small as it plays with EnvironmentEdgeManager
 @Category({RegionServerTests.class, MediumTests.class})
 public class TestStoreScanner {
-  private static final Log LOG = LogFactory.getLog(TestStoreScanner.class);
   @Rule public TestName name = new TestName();
   @Rule public final TestRule timeout = 
CategoryBasedTimeout.builder().withTimeout(this.getClass()).
       withLookingForStuckThread(true).build();
   private static final String CF_STR = "cf";
-  private static final byte [] CF = Bytes.toBytes(CF_STR);
+  final byte [] CF = Bytes.toBytes(CF_STR);
   static Configuration CONF = HBaseConfiguration.create();
   private ScanInfo scanInfo = new ScanInfo(CONF, CF, 0, Integer.MAX_VALUE,
       Long.MAX_VALUE, KeepDeletedCells.FALSE, 0, CellComparator.COMPARATOR);
   private ScanType scanType = ScanType.USER_SCAN;
 
-  /**
-   * From here on down, we have a bunch of defines and specific CELL_GRID of 
Cells. The
-   * CELL_GRID then has a Scanner that can fake out 'block' transitions. All 
this elaborate
-   * setup is for tests that ensure we don't overread, and that the
-   * {@link 
StoreScanner#optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode,
-   * Cell)} is not overly enthusiastic.
-   */
-  private static final byte [] ZERO = new byte [] {'0'};
-  private static final byte [] ZERO_POINT_ZERO = new byte [] {'0', '.', '0'};
-  private static final byte [] ONE = new byte [] {'1'};
-  private static final byte [] TWO = new byte [] {'2'};
-  private static final byte [] TWO_POINT_TWO = new byte [] {'2', '.', '2'};
-  private static final byte [] THREE = new byte [] {'3'};
-  private static final byte [] FOUR = new byte [] {'4'};
-  private static final byte [] FIVE = new byte [] {'5'};
-  private static final byte [] VALUE = new byte [] {'v'};
-  private static final int CELL_GRID_BLOCK2_BOUNDARY = 4;
-  private static final int CELL_GRID_BLOCK3_BOUNDARY = 11;
-  private static final int CELL_GRID_BLOCK4_BOUNDARY = 15;
-  private static final int CELL_GRID_BLOCK5_BOUNDARY = 19;
-
-  /**
-   * Five rows by four columns distinguished by column qualifier (column 
qualifier is one of the
-   * four rows... ONE, TWO, etc.). Exceptions are a weird row after TWO; it is 
TWO_POINT_TWO.
-   * And then row FOUR has five columns finishing w/ row FIVE having a single 
column.
-   * We will use this to test scan does the right thing as it
-   * we do Gets, StoreScanner#optimize, and what we do on (faked) block 
boundaries.
-   */
-  private static final Cell [] CELL_GRID = new Cell [] {
-    CellUtil.createCell(ONE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(ONE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(ONE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(ONE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    // Offset 4 CELL_GRID_BLOCK2_BOUNDARY
-    CellUtil.createCell(TWO, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(TWO, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(TWO, CF, THREE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(TWO, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(TWO_POINT_TWO, CF, ZERO, 1L, 
KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(TWO_POINT_TWO, CF, ZERO_POINT_ZERO, 1L, 
KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(TWO_POINT_TWO, CF, FIVE, 1L, 
KeyValue.Type.Put.getCode(), VALUE),
-    // Offset 11! CELL_GRID_BLOCK3_BOUNDARY
-    CellUtil.createCell(THREE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(THREE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(THREE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(THREE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    // Offset 15 CELL_GRID_BLOCK4_BOUNDARY
-    CellUtil.createCell(FOUR, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(FOUR, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE),
-    CellUtil.createCell(FOUR, CF, THREE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(FOUR, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    // Offset 19 CELL_GRID_BLOCK5_BOUNDARY
-    CellUtil.createCell(FOUR, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-    CellUtil.createCell(FIVE, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), 
VALUE),
-  };
-
-  /**
-   * A StoreScanner for our CELL_GRID above. Fakes the block transitions. Does 
counts of
-   * calls to optimize and counts of when optimize actually did an optimize.
-   */
-  private static class CellGridStoreScanner extends StoreScanner {
-    // Count of how often optimize is called and of how often it does an 
optimize.
-    final AtomicInteger count = new AtomicInteger(0);
-    final AtomicInteger optimization = new AtomicInteger(0);
-
-    CellGridStoreScanner(final Scan scan, ScanInfo scanInfo, ScanType scanType)
-    throws IOException {
-      super(scan, scanInfo, scanType, scan.getFamilyMap().get(CF),
-        Arrays.<KeyValueScanner>asList(
-          new KeyValueScanner[] {new 
KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)}));
-    }
-
-    protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode 
qcode, Cell cell) {
-      count.incrementAndGet();
-      ScanQueryMatcher.MatchCode after = super.optimize(qcode, cell);
-      LOG.info("Cell=" + cell + ", nextIndex=" + 
CellUtil.toString(getNextIndexedKey(), false) +
-          ", before=" + qcode + ", after=" + after);
-      if (qcode != after) {
-        optimization.incrementAndGet();
-      }
-      return after;
-    };
-
-    @Override
-    public Cell getNextIndexedKey() {
-      // Fake block boundaries by having index of next block change as we go 
through scan.
-      return count.get() > CELL_GRID_BLOCK4_BOUNDARY?
-          CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK5_BOUNDARY]):
-            count.get() > CELL_GRID_BLOCK3_BOUNDARY?
-                
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK4_BOUNDARY]):
-                  count.get() > CELL_GRID_BLOCK2_BOUNDARY?
-                      
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK3_BOUNDARY]):
-                        
CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK2_BOUNDARY]);
-    }
-  };
-
   /*
    * Test utility for building a NavigableSet for scanners.
    * @param strCols
@@ -181,145 +78,6 @@ public class TestStoreScanner {
   }
 
   @Test
-  public void testFullRowGetDoesNotOverreadWhenRowInsideOneBlock() throws 
IOException {
-    // Do a Get against row two. Row two is inside a block that starts with 
row TWO but ends with
-    // row TWO_POINT_TWO. We should read one block only.
-    Get get = new Get(TWO);
-    Scan scan = new Scan(get);
-    CellGridStoreScanner scanner = new CellGridStoreScanner(scan, 
this.scanInfo, this.scanType);
-    try {
-      List<Cell> results = new ArrayList<Cell>();
-      while (scanner.next(results)) {
-        continue;
-      }
-      // Should be four results of column 1 (though there are 5 rows in the 
CELL_GRID -- the
-      // TWO_POINT_TWO row does not have a a column ONE.
-      Assert.assertEquals(4, results.size());
-      // We should have gone the optimize route 5 times totally... an INCLUDE 
for the four cells
-      // in the row plus the DONE on the end.
-      Assert.assertEquals(5, scanner.count.get());
-      // For a full row Get, there should be no opportunity for scanner 
optimization.
-      Assert.assertEquals(0, scanner.optimization.get());
-    } finally {
-      scanner.close();
-    }
-  }
-
-  @Test
-  public void testFullRowSpansBlocks() throws IOException {
-    // Do a Get against row FOUR. It spans two blocks.
-    Get get = new Get(FOUR);
-    Scan scan = new Scan(get);
-    CellGridStoreScanner scanner = new CellGridStoreScanner(scan, 
this.scanInfo, this.scanType);
-    try {
-      List<Cell> results = new ArrayList<Cell>();
-      while (scanner.next(results)) {
-        continue;
-      }
-      // Should be four results of column 1 (though there are 5 rows in the 
CELL_GRID -- the
-      // TWO_POINT_TWO row does not have a a column ONE.
-      Assert.assertEquals(5, results.size());
-      // We should have gone the optimize route 6 times totally... an INCLUDE 
for the five cells
-      // in the row plus the DONE on the end.
-      Assert.assertEquals(6, scanner.count.get());
-      // For a full row Get, there should be no opportunity for scanner 
optimization.
-      Assert.assertEquals(0, scanner.optimization.get());
-    } finally {
-      scanner.close();
-    }
-  }
-
-  /**
-   * Test optimize in StoreScanner. Test that we skip to the next 'block' when 
we it makes sense
-   * reading the block 'index'.
-   * @throws IOException
-   */
-  @Test
-  public void testOptimize() throws IOException {
-    Scan scan = new Scan();
-    // A scan that just gets the first qualifier on each row of the CELL_GRID
-    scan.addColumn(CF, ONE);
-    CellGridStoreScanner scanner = new CellGridStoreScanner(scan, 
this.scanInfo, this.scanType);
-    try {
-      List<Cell> results = new ArrayList<Cell>();
-      while (scanner.next(results)) {
-        continue;
-      }
-      // Should be four results of column 1 (though there are 5 rows in the 
CELL_GRID -- the
-      // TWO_POINT_TWO row does not have a a column ONE.
-      Assert.assertEquals(4, results.size());
-      for (Cell cell: results) {
-        assertTrue(Bytes.equals(ONE, 0, ONE.length,
-            cell.getQualifierArray(), cell.getQualifierOffset(), 
cell.getQualifierLength()));
-      }
-      Assert.assertTrue("Optimize should do some optimizations", 
scanner.optimization.get() > 0);
-    } finally {
-      scanner.close();
-    }
-  }
-
-  /**
-   * Ensure the optimize Scan method in StoreScanner does not get in the way 
of a Get doing minimum
-   * work... seeking to start of block and then SKIPPING until we find the 
wanted Cell.
-   * This 'simple' scenario mimics case of all Cells fitting inside a single 
HFileBlock.
-   * See HBASE-15392. This test is a little cryptic. Takes a bit of staring to 
figure what it up to.
-   * @throws IOException
-   */
-  @Test
-  public void testOptimizeAndGet() throws IOException {
-    // First test a Get of two columns in the row R2. Every Get is a Scan. Get 
columns named
-    // R2 and R3.
-    Get get = new Get(TWO);
-    get.addColumn(CF, TWO);
-    get.addColumn(CF, THREE);
-    Scan scan = new Scan(get);
-    CellGridStoreScanner scanner = new CellGridStoreScanner(scan, 
this.scanInfo, this.scanType);
-    try {
-      List<Cell> results = new ArrayList<Cell>();
-      // For a Get there should be no more next's after the first call.
-      Assert.assertEquals(false, scanner.next(results));
-      // Should be one result only.
-      Assert.assertEquals(2, results.size());
-      // And we should have gone through optimize twice only.
-      Assert.assertEquals("First qcode is SEEK_NEXT_COL and second 
INCLUDE_AND_SEEK_NEXT_ROW",
-        3, scanner.count.get());
-    } finally {
-      scanner.close();
-    }
-  }
-
-  /**
-   * Ensure that optimize does not cause the Get to do more seeking than 
required. Optimize
-   * (see HBASE-15392) was causing us to seek all Cells in a block when a Get 
Scan if the next block
-   * index/start key was a different row to the current one. A bug. We'd call 
next too often
-   * because we had to exhaust all Cells in the current row making us load the 
next block just to
-   * discard what we read there. This test is a little cryptic. Takes a bit of 
staring to figure
-   * what it up to.
-   * @throws IOException
-   */
-  @Test
-  public void testOptimizeAndGetWithFakedNextBlockIndexStart() throws 
IOException {
-    // First test a Get of second column in the row R2. Every Get is a Scan. 
Second column has a
-    // qualifier of R2.
-    Get get = new Get(THREE);
-    get.addColumn(CF, TWO);
-    Scan scan = new Scan(get);
-    CellGridStoreScanner scanner = new CellGridStoreScanner(scan, 
this.scanInfo, this.scanType);
-    try {
-      List<Cell> results = new ArrayList<Cell>();
-      // For a Get there should be no more next's after the first call.
-      Assert.assertEquals(false, scanner.next(results));
-      // Should be one result only.
-      Assert.assertEquals(1, results.size());
-      // And we should have gone through optimize twice only.
-      Assert.assertEquals("First qcode is SEEK_NEXT_COL and second 
INCLUDE_AND_SEEK_NEXT_ROW",
-        2, scanner.count.get());
-    } finally {
-      scanner.close();
-    }
-  }
-
-  @Test
   public void testScanTimeRange() throws IOException {
     String r1 = "R1";
     // returns only 1 of these 2 even though same timestamp

[16/50] [abbrv] hbase git commit: Revert "HBASE-15392 Single Cell Get reads two HFileBlocks M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save

Reply via email to