Revert "HBASE-15392 Single Cell Get reads two HFileBlocks M hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java moreRowsMayExistAfterCell Exploit the fact a Scan is a Get Scan. Also save compares if no non-default stopRow."
Revert mistaken commit This reverts commit 7073f69993e446c2a63308f7827639d675a2da58. Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/a13d6e00 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/a13d6e00 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/a13d6e00 Branch: refs/heads/hbase-12439 Commit: a13d6e000ddef2c524b641e04f7969950b2b1811 Parents: c49d0ca Author: stack <[email protected]> Authored: Tue Mar 22 18:39:22 2016 -0700 Committer: stack <[email protected]> Committed: Tue Mar 22 18:39:22 2016 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/CellComparator.java | 2 +- .../hbase/io/hfile/CombinedBlockCache.java | 13 +- .../hadoop/hbase/io/hfile/HFileWriterImpl.java | 2 +- .../hbase/regionserver/KeyValueScanner.java | 10 +- .../hbase/regionserver/ScanQueryMatcher.java | 60 ++--- .../hadoop/hbase/regionserver/StoreScanner.java | 87 +------ .../hbase/util/CollectionBackedScanner.java | 3 +- .../hbase/regionserver/KeyValueScanFixture.java | 8 +- .../regionserver/TestKeyValueScanFixture.java | 3 +- .../hbase/regionserver/TestStoreScanner.java | 244 +------------------ 10 files changed, 62 insertions(+), 370 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java ---------------------------------------------------------------------- diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java index 4a5c0b7..a5e26cf 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/CellComparator.java @@ -433,7 +433,7 @@ public class CellComparator implements Comparator<Cell>, Serializable { /** * Used to compare two cells based on the column hint provided. This is specifically * used when we need to optimize the seeks based on the next indexed key. This is an - * advanced usage API specifically needed for some optimizations. + * advance usage API specifically needed for some optimizations. * @param nextIndexedCell the next indexed cell * @param currentCell the cell to be compared * @param foff the family offset of the currentCell http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index 666b357..22bffee 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -63,8 +63,8 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { @Override public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory, final boolean cacheDataInL1) { - boolean metaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; - if (metaBlock || cacheDataInL1) { + boolean isMetaBlock = buf.getBlockType().getCategory() != BlockCategory.DATA; + if (isMetaBlock || cacheDataInL1) { lruCache.cacheBlock(cacheKey, buf, inMemory, cacheDataInL1); } else { l2Cache.cacheBlock(cacheKey, buf, inMemory, false); @@ -81,9 +81,12 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { boolean repeat, boolean updateCacheMetrics) { // TODO: is there a hole here, or just awkwardness since in the lruCache getBlock // we end up calling l2Cache.getBlock. - return lruCache.containsBlock(cacheKey)? - lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics): - l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); + if (lruCache.containsBlock(cacheKey)) { + return lruCache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); + } + Cacheable result = l2Cache.getBlock(cacheKey, caching, repeat, updateCacheMetrics); + + return result; } @Override http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index 9ab46cf..d310d13 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -416,7 +416,7 @@ public class HFileWriterImpl implements HFile.Writer { // No opportunity for optimization. Just return right key. return right; } - + /** * @param leftArray * @param leftOffset http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java index e26022e..eae713f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueScanner.java @@ -29,9 +29,6 @@ import org.apache.hadoop.hbase.client.Scan; * Scanner that returns the next KeyValue. */ @InterfaceAudience.Private -// TODO: Change name from KeyValueScanner to CellScanner only we already have a simple CellScanner -// so this should be something else altogether, a decoration on our base CellScanner. TODO. -// This class shows in CPs so do it all in one swell swoop. HBase-2.0.0. public interface KeyValueScanner extends Shipper { /** * The byte array represents for NO_NEXT_INDEXED_KEY; @@ -164,9 +161,8 @@ public interface KeyValueScanner extends Shipper { public boolean seekToLastRow() throws IOException; /** - * @return the next key in the index, usually the first key of next block OR a key that falls - * between last key of current block and first key of next block.. - * see HFileWriterImpl#getMidpoint, or null if not known. + * @return the next key in the index (the key to seek to the next block) + * if known, or null otherwise */ public Cell getNextIndexedKey(); -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java index 706fc5b..c220b5c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/ScanQueryMatcher.java @@ -93,7 +93,7 @@ public class ScanQueryMatcher { /* row is not private for tests */ /** Row the query is on */ Cell curCell; - + /** * Oldest put in any of the involved store files * Used to decide whether it is ok to delete @@ -119,7 +119,7 @@ public class ScanQueryMatcher { * first column. * */ private boolean hasNullColumn = true; - + private RegionCoprocessorHost regionCoprocessorHost= null; // By default, when hbase.hstore.time.to.purge.deletes is 0ms, a delete @@ -140,22 +140,22 @@ public class ScanQueryMatcher { // currently influencing. This is because Puts, that this delete can // influence. may appear out of order. private final long timeToPurgeDeletes; - + private final boolean isUserScan; private final boolean isReversed; /** - * True if we are doing a 'Get' Scan. Every Get is actually a one-row Scan. - */ - private final boolean get; - - /** * Construct a QueryMatcher for a scan + * @param scan * @param scanInfo The store's immutable scan info + * @param columns * @param scanType Type of the scan * @param earliestPutTs Earliest put seen in any of the store files. - * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL + * @param oldestUnexpiredTS the oldest timestamp we are interested in, + * based on TTL + * @param regionCoprocessorHost + * @throws IOException */ public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns, ScanType scanType, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, @@ -166,7 +166,6 @@ public class ScanQueryMatcher { } else { this.tr = timeRange; } - this.get = scan.isGetScan(); this.rowComparator = scanInfo.getComparator(); this.regionCoprocessorHost = regionCoprocessorHost; this.deletes = instantiateDeleteTracker(); @@ -235,8 +234,8 @@ public class ScanQueryMatcher { * @param now the current server time * @param dropDeletesFromRow The inclusive left bound of the range; can be EMPTY_START_ROW. * @param dropDeletesToRow The exclusive right bound of the range; can be EMPTY_END_ROW. - * @param regionCoprocessorHost - * @throws IOException + * @param regionCoprocessorHost + * @throws IOException */ public ScanQueryMatcher(Scan scan, ScanInfo scanInfo, NavigableSet<byte[]> columns, long readPointToUse, long earliestPutTs, long oldestUnexpiredTS, long now, @@ -281,7 +280,7 @@ public class ScanQueryMatcher { * caused by a data corruption. */ public MatchCode match(Cell cell) throws IOException { - if (filter != null && filter.filterAllRemaining()) { + if (filter != null && filter.filterAllRemaining()) { return MatchCode.DONE_SCAN; } if (curCell != null) { @@ -325,7 +324,7 @@ public class ScanQueryMatcher { // check if the cell is expired by cell TTL if (HStore.isCellTTLExpired(cell, this.oldestUnexpiredTS, this.now)) { return MatchCode.SKIP; - } + } /* * The delete logic is pretty complicated now. @@ -360,10 +359,10 @@ public class ScanQueryMatcher { } // Can't early out now, because DelFam come before any other keys } - + if ((!isUserScan) && timeToPurgeDeletes > 0 - && (EnvironmentEdgeManager.currentTime() - timestamp) + && (EnvironmentEdgeManager.currentTime() - timestamp) <= timeToPurgeDeletes) { return MatchCode.INCLUDE; } else if (retainDeletesInOutput || mvccVersion > maxReadPointToTrackVersions) { @@ -504,27 +503,22 @@ public class ScanQueryMatcher { } } - /** - * @return Returns false if we know there are no more rows to be scanned (We've reached the - * <code>stopRow</code> or we are scanning on row only because this Scan is for a Get, etc. - */ public boolean moreRowsMayExistAfter(Cell kv) { - // If a 'get' Scan -- we are doing a Get (every Get is a single-row Scan in implementation) -- - // then we are looking at one row only, the one specified in the Get coordinate..so we know - // for sure that there are no more rows on this Scan - if (this.get) { - return false; + if (this.isReversed) { + if (rowComparator.compareRows(kv, stopRow, 0, stopRow.length) <= 0) { + return false; + } else { + return true; + } } - // If no stopRow, return that there may be more rows. The tests that follow depend on a - // non-empty, non-default stopRow so this little test below short-circuits out doing the - // following compares. - if (this.stopRow == null || this.stopRow == HConstants.EMPTY_BYTE_ARRAY) { + if (!Bytes.equals(stopRow , HConstants.EMPTY_END_ROW) && + rowComparator.compareRows(kv, stopRow, 0, stopRow.length) >= 0) { + // KV >= STOPROW + // then NO there is nothing left. + return false; + } else { return true; } - return this.isReversed? - rowComparator.compareRows(kv, stopRow, 0, stopRow.length) > 0: - Bytes.equals(stopRow, HConstants.EMPTY_END_ROW) || - rowComparator.compareRows(kv, stopRow, 0, stopRow.length) < 0; } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java index 66f846a..2f0d284 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java @@ -133,7 +133,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner protected List<KeyValueScanner> currentScanners = new ArrayList<KeyValueScanner>(); // flush update lock private ReentrantLock flushLock = new ReentrantLock(); - + protected final long readPt; // used by the injection framework to test race between StoreScanner construction and compaction @@ -600,12 +600,6 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner continue; case DONE: - // Optimization for Gets! If DONE, no more to get on this row, early exit! - if (this.scan.isGetScan()) { - // Then no more to this row... exit. - close(false);// Do all cleanup except heap.close() - return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); - } matcher.curCell = null; return scannerContext.setScannerState(NextState.MORE_VALUES).hasMoreValues(); @@ -655,67 +649,18 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner return scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues(); } - /** - * See if we should actually SEEK or rather just SKIP to the next Cell (see HBASE-13109). - * This method works together with ColumnTrackers and Filters. ColumnTrackers may issue SEEK - * hints, such as seek to next column, next row, or seek to an arbitrary seek key. - * This method intercepts these qcodes and decides whether a seek is the most efficient _actual_ - * way to get us to the requested cell (SEEKs are more expensive than SKIP, SKIP, SKIP inside the - * current, loaded block). - * It does this by looking at the next indexed key of the current HFile. This key - * is then compared with the _SEEK_ key, where a SEEK key is an artificial 'last possible key - * on the row' (only in here, we avoid actually creating a SEEK key; in the compare we work with - * the current Cell but compare as though it were a seek key; see down in - * matcher.compareKeyForNextRow, etc). If the compare gets us onto the - * next block we *_SEEK, otherwise we just INCLUDE or SKIP, and let the ColumnTrackers or Filters - * go through the next Cell, and so on) - * - * <p>The ColumnTrackers and Filters must behave correctly in all cases, i.e. if they are past the - * Cells they care about they must issues a SKIP or SEEK. - * - * <p>Other notes: - * <ul> - * <li>Rows can straddle block boundaries</li> - * <li>Versions of columns can straddle block boundaries (i.e. column C1 at T1 might be in a - * different block than column C1 at T2)</li> - * <li>We want to SKIP and INCLUDE if the chance is high that we'll find the desired Cell after a - * few SKIPs...</li> - * <li>We want to INCLUDE_AND_SEEK and SEEK when the chance is high that we'll be able to seek - * past many Cells, especially if we know we need to go to the next block.</li> - * </ul> - * <p>A good proxy (best effort) to determine whether INCLUDE/SKIP is better than SEEK is whether - * we'll likely end up seeking to the next block (or past the next block) to get our next column. - * Example: - * <pre> - * | BLOCK 1 | BLOCK 2 | - * | r1/c1, r1/c2, r1/c3 | r1/c4, r1/c5, r2/c1 | - * ^ ^ - * | | - * Next Index Key SEEK_NEXT_ROW (before r2/c1) - * - * - * | BLOCK 1 | BLOCK 2 | - * | r1/c1/t5, r1/c1/t4, r1/c1/t3 | r1/c1/t2, r1/c1/T1, r1/c2/T3 | - * ^ ^ - * | | - * Next Index Key SEEK_NEXT_COL - * </pre> - * Now imagine we want columns c1 and c3 (see first diagram above), the 'Next Index Key' of r1/c4 - * is > r1/c3 so we should seek to get to the c1 on the next row, r2. In second case, say we only - * want one version of c1, after we have it, a SEEK_COL will be issued to get to c2. Looking at - * the 'Next Index Key', it would land us in the next block, so we should SEEK. In other scenarios - * where the SEEK will not land us in the next block, it is very likely better to issues a series - * of SKIPs. + /* + * See if we should actually SEEK or rather just SKIP to the next Cell. + * (see HBASE-13109) */ - @VisibleForTesting - protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) { + private ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) { switch(qcode) { case INCLUDE_AND_SEEK_NEXT_COL: case SEEK_NEXT_COL: { Cell nextIndexedKey = getNextIndexedKey(); if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY - && matcher.compareKeyForNextColumn(nextIndexedKey, cell) > 0) { + && matcher.compareKeyForNextColumn(nextIndexedKey, cell) >= 0) { return qcode == MatchCode.SEEK_NEXT_COL ? MatchCode.SKIP : MatchCode.INCLUDE; } break; @@ -723,16 +668,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner case INCLUDE_AND_SEEK_NEXT_ROW: case SEEK_NEXT_ROW: { - // If it is a Get Scan, then we know that we are done with this row; there are no more - // rows beyond the current one: don't try to optimize. We are DONE. Return the *_NEXT_ROW - // qcode as is. When the caller gets these flags on a Get Scan, it knows it can shut down the - // Scan. - if (!this.scan.isGetScan()) { - Cell nextIndexedKey = getNextIndexedKey(); - if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY - && matcher.compareKeyForNextRow(nextIndexedKey, cell) > 0) { - return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE; - } + Cell nextIndexedKey = getNextIndexedKey(); + if (nextIndexedKey != null && nextIndexedKey != KeyValueScanner.NO_NEXT_INDEXED_KEY + && matcher.compareKeyForNextRow(nextIndexedKey, cell) >= 0) { + return qcode == MatchCode.SEEK_NEXT_ROW ? MatchCode.SKIP : MatchCode.INCLUDE; } break; } @@ -870,10 +809,10 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner // check the var without any lock. Suppose even if we see the old // value here still it is ok to continue because we will not be resetting // the heap but will continue with the referenced memstore's snapshot. For compactions - // any way we don't need the updateReaders at all to happen as we still continue with + // any way we don't need the updateReaders at all to happen as we still continue with // the older files if (flushed) { - // If there is a flush and the current scan is notified on the flush ensure that the + // If there is a flush and the current scan is notified on the flush ensure that the // scan's heap gets reset and we do a seek on the newly flushed file. if(!this.closing) { this.lastTop = this.peek(); @@ -903,7 +842,7 @@ public class StoreScanner extends NonReversedNonLazyKeyValueScanner if (scanners.isEmpty()) return; int storeFileScannerCount = scanners.size(); CountDownLatch latch = new CountDownLatch(storeFileScannerCount); - List<ParallelSeekHandler> handlers = + List<ParallelSeekHandler> handlers = new ArrayList<ParallelSeekHandler>(storeFileScannerCount); for (KeyValueScanner scanner : scanners) { if (scanner instanceof StoreFileScanner) { http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java index 4720880..9fc068f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/CollectionBackedScanner.java @@ -30,7 +30,8 @@ import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.regionserver.NonReversedNonLazyKeyValueScanner; /** - * Utility scanner that wraps a sortable collection and serves as a KeyValueScanner. + * Utility scanner that wraps a sortable collection and serves + * as a KeyValueScanner. */ @InterfaceAudience.Private public class CollectionBackedScanner extends NonReversedNonLazyKeyValueScanner { http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java index a4e7f9b..3f87a00 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/KeyValueScanFixture.java @@ -22,7 +22,6 @@ package org.apache.hadoop.hbase.regionserver; import java.util.ArrayList; import java.util.List; -import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.util.CollectionBackedScanner; @@ -34,8 +33,9 @@ import org.apache.hadoop.hbase.util.CollectionBackedScanner; * to be a store file scanner. */ public class KeyValueScanFixture extends CollectionBackedScanner { - public KeyValueScanFixture(CellComparator comparator, Cell... cells) { - super(comparator, cells); + public KeyValueScanFixture(CellComparator comparator, + KeyValue... incData) { + super(comparator, incData); } public static List<KeyValueScanner> scanFixture(KeyValue[] ... kvArrays) { @@ -45,4 +45,4 @@ public class KeyValueScanFixture extends CollectionBackedScanner { } return scanners; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java index 0e96682..a8c2c65 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestKeyValueScanFixture.java @@ -46,7 +46,8 @@ public class TestKeyValueScanFixture extends TestCase { KeyValueTestUtil.create("RowB", "family", "qf1", 10, KeyValue.Type.Put, "value-10") }; - KeyValueScanner scan = new KeyValueScanFixture(CellComparator.COMPARATOR, kvs); + KeyValueScanner scan = new KeyValueScanFixture( + CellComparator.COMPARATOR, kvs); KeyValue kv = KeyValueUtil.createFirstOnRow(Bytes.toBytes("RowA")); // should seek to this: http://git-wip-us.apache.org/repos/asf/hbase/blob/a13d6e00/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java index 4c594b0..92c85aa 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreScanner.java @@ -20,7 +20,6 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.regionserver.KeyValueScanFixture.scanFixture; -import static org.junit.Assert.*; import java.io.IOException; import java.util.ArrayList; @@ -28,21 +27,16 @@ import java.util.Arrays; import java.util.List; import java.util.NavigableSet; import java.util.TreeSet; -import java.util.concurrent.atomic.AtomicInteger; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; -import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeepDeletedCells; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueTestUtil; -import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; @@ -59,113 +53,16 @@ import org.junit.rules.TestRule; // Can't be small as it plays with EnvironmentEdgeManager @Category({RegionServerTests.class, MediumTests.class}) public class TestStoreScanner { - private static final Log LOG = LogFactory.getLog(TestStoreScanner.class); @Rule public TestName name = new TestName(); @Rule public final TestRule timeout = CategoryBasedTimeout.builder().withTimeout(this.getClass()). withLookingForStuckThread(true).build(); private static final String CF_STR = "cf"; - private static final byte [] CF = Bytes.toBytes(CF_STR); + final byte [] CF = Bytes.toBytes(CF_STR); static Configuration CONF = HBaseConfiguration.create(); private ScanInfo scanInfo = new ScanInfo(CONF, CF, 0, Integer.MAX_VALUE, Long.MAX_VALUE, KeepDeletedCells.FALSE, 0, CellComparator.COMPARATOR); private ScanType scanType = ScanType.USER_SCAN; - /** - * From here on down, we have a bunch of defines and specific CELL_GRID of Cells. The - * CELL_GRID then has a Scanner that can fake out 'block' transitions. All this elaborate - * setup is for tests that ensure we don't overread, and that the - * {@link StoreScanner#optimize(org.apache.hadoop.hbase.regionserver.ScanQueryMatcher.MatchCode, - * Cell)} is not overly enthusiastic. - */ - private static final byte [] ZERO = new byte [] {'0'}; - private static final byte [] ZERO_POINT_ZERO = new byte [] {'0', '.', '0'}; - private static final byte [] ONE = new byte [] {'1'}; - private static final byte [] TWO = new byte [] {'2'}; - private static final byte [] TWO_POINT_TWO = new byte [] {'2', '.', '2'}; - private static final byte [] THREE = new byte [] {'3'}; - private static final byte [] FOUR = new byte [] {'4'}; - private static final byte [] FIVE = new byte [] {'5'}; - private static final byte [] VALUE = new byte [] {'v'}; - private static final int CELL_GRID_BLOCK2_BOUNDARY = 4; - private static final int CELL_GRID_BLOCK3_BOUNDARY = 11; - private static final int CELL_GRID_BLOCK4_BOUNDARY = 15; - private static final int CELL_GRID_BLOCK5_BOUNDARY = 19; - - /** - * Five rows by four columns distinguished by column qualifier (column qualifier is one of the - * four rows... ONE, TWO, etc.). Exceptions are a weird row after TWO; it is TWO_POINT_TWO. - * And then row FOUR has five columns finishing w/ row FIVE having a single column. - * We will use this to test scan does the right thing as it - * we do Gets, StoreScanner#optimize, and what we do on (faked) block boundaries. - */ - private static final Cell [] CELL_GRID = new Cell [] { - CellUtil.createCell(ONE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(ONE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(ONE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(ONE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), - // Offset 4 CELL_GRID_BLOCK2_BOUNDARY - CellUtil.createCell(TWO, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO_POINT_TWO, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO_POINT_TWO, CF, ZERO_POINT_ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(TWO_POINT_TWO, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), VALUE), - // Offset 11! CELL_GRID_BLOCK3_BOUNDARY - CellUtil.createCell(THREE, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(THREE, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(THREE, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(THREE, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), - // Offset 15 CELL_GRID_BLOCK4_BOUNDARY - CellUtil.createCell(FOUR, CF, ONE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(FOUR, CF, TWO, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(FOUR, CF, THREE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(FOUR, CF, FOUR, 1L, KeyValue.Type.Put.getCode(), VALUE), - // Offset 19 CELL_GRID_BLOCK5_BOUNDARY - CellUtil.createCell(FOUR, CF, FIVE, 1L, KeyValue.Type.Put.getCode(), VALUE), - CellUtil.createCell(FIVE, CF, ZERO, 1L, KeyValue.Type.Put.getCode(), VALUE), - }; - - /** - * A StoreScanner for our CELL_GRID above. Fakes the block transitions. Does counts of - * calls to optimize and counts of when optimize actually did an optimize. - */ - private static class CellGridStoreScanner extends StoreScanner { - // Count of how often optimize is called and of how often it does an optimize. - final AtomicInteger count = new AtomicInteger(0); - final AtomicInteger optimization = new AtomicInteger(0); - - CellGridStoreScanner(final Scan scan, ScanInfo scanInfo, ScanType scanType) - throws IOException { - super(scan, scanInfo, scanType, scan.getFamilyMap().get(CF), - Arrays.<KeyValueScanner>asList( - new KeyValueScanner[] {new KeyValueScanFixture(CellComparator.COMPARATOR, CELL_GRID)})); - } - - protected ScanQueryMatcher.MatchCode optimize(ScanQueryMatcher.MatchCode qcode, Cell cell) { - count.incrementAndGet(); - ScanQueryMatcher.MatchCode after = super.optimize(qcode, cell); - LOG.info("Cell=" + cell + ", nextIndex=" + CellUtil.toString(getNextIndexedKey(), false) + - ", before=" + qcode + ", after=" + after); - if (qcode != after) { - optimization.incrementAndGet(); - } - return after; - }; - - @Override - public Cell getNextIndexedKey() { - // Fake block boundaries by having index of next block change as we go through scan. - return count.get() > CELL_GRID_BLOCK4_BOUNDARY? - CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK5_BOUNDARY]): - count.get() > CELL_GRID_BLOCK3_BOUNDARY? - CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK4_BOUNDARY]): - count.get() > CELL_GRID_BLOCK2_BOUNDARY? - CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK3_BOUNDARY]): - CellUtil.createFirstOnRow(CELL_GRID[CELL_GRID_BLOCK2_BOUNDARY]); - } - }; - /* * Test utility for building a NavigableSet for scanners. * @param strCols @@ -181,145 +78,6 @@ public class TestStoreScanner { } @Test - public void testFullRowGetDoesNotOverreadWhenRowInsideOneBlock() throws IOException { - // Do a Get against row two. Row two is inside a block that starts with row TWO but ends with - // row TWO_POINT_TWO. We should read one block only. - Get get = new Get(TWO); - Scan scan = new Scan(get); - CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType); - try { - List<Cell> results = new ArrayList<Cell>(); - while (scanner.next(results)) { - continue; - } - // Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the - // TWO_POINT_TWO row does not have a a column ONE. - Assert.assertEquals(4, results.size()); - // We should have gone the optimize route 5 times totally... an INCLUDE for the four cells - // in the row plus the DONE on the end. - Assert.assertEquals(5, scanner.count.get()); - // For a full row Get, there should be no opportunity for scanner optimization. - Assert.assertEquals(0, scanner.optimization.get()); - } finally { - scanner.close(); - } - } - - @Test - public void testFullRowSpansBlocks() throws IOException { - // Do a Get against row FOUR. It spans two blocks. - Get get = new Get(FOUR); - Scan scan = new Scan(get); - CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType); - try { - List<Cell> results = new ArrayList<Cell>(); - while (scanner.next(results)) { - continue; - } - // Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the - // TWO_POINT_TWO row does not have a a column ONE. - Assert.assertEquals(5, results.size()); - // We should have gone the optimize route 6 times totally... an INCLUDE for the five cells - // in the row plus the DONE on the end. - Assert.assertEquals(6, scanner.count.get()); - // For a full row Get, there should be no opportunity for scanner optimization. - Assert.assertEquals(0, scanner.optimization.get()); - } finally { - scanner.close(); - } - } - - /** - * Test optimize in StoreScanner. Test that we skip to the next 'block' when we it makes sense - * reading the block 'index'. - * @throws IOException - */ - @Test - public void testOptimize() throws IOException { - Scan scan = new Scan(); - // A scan that just gets the first qualifier on each row of the CELL_GRID - scan.addColumn(CF, ONE); - CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType); - try { - List<Cell> results = new ArrayList<Cell>(); - while (scanner.next(results)) { - continue; - } - // Should be four results of column 1 (though there are 5 rows in the CELL_GRID -- the - // TWO_POINT_TWO row does not have a a column ONE. - Assert.assertEquals(4, results.size()); - for (Cell cell: results) { - assertTrue(Bytes.equals(ONE, 0, ONE.length, - cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength())); - } - Assert.assertTrue("Optimize should do some optimizations", scanner.optimization.get() > 0); - } finally { - scanner.close(); - } - } - - /** - * Ensure the optimize Scan method in StoreScanner does not get in the way of a Get doing minimum - * work... seeking to start of block and then SKIPPING until we find the wanted Cell. - * This 'simple' scenario mimics case of all Cells fitting inside a single HFileBlock. - * See HBASE-15392. This test is a little cryptic. Takes a bit of staring to figure what it up to. - * @throws IOException - */ - @Test - public void testOptimizeAndGet() throws IOException { - // First test a Get of two columns in the row R2. Every Get is a Scan. Get columns named - // R2 and R3. - Get get = new Get(TWO); - get.addColumn(CF, TWO); - get.addColumn(CF, THREE); - Scan scan = new Scan(get); - CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType); - try { - List<Cell> results = new ArrayList<Cell>(); - // For a Get there should be no more next's after the first call. - Assert.assertEquals(false, scanner.next(results)); - // Should be one result only. - Assert.assertEquals(2, results.size()); - // And we should have gone through optimize twice only. - Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW", - 3, scanner.count.get()); - } finally { - scanner.close(); - } - } - - /** - * Ensure that optimize does not cause the Get to do more seeking than required. Optimize - * (see HBASE-15392) was causing us to seek all Cells in a block when a Get Scan if the next block - * index/start key was a different row to the current one. A bug. We'd call next too often - * because we had to exhaust all Cells in the current row making us load the next block just to - * discard what we read there. This test is a little cryptic. Takes a bit of staring to figure - * what it up to. - * @throws IOException - */ - @Test - public void testOptimizeAndGetWithFakedNextBlockIndexStart() throws IOException { - // First test a Get of second column in the row R2. Every Get is a Scan. Second column has a - // qualifier of R2. - Get get = new Get(THREE); - get.addColumn(CF, TWO); - Scan scan = new Scan(get); - CellGridStoreScanner scanner = new CellGridStoreScanner(scan, this.scanInfo, this.scanType); - try { - List<Cell> results = new ArrayList<Cell>(); - // For a Get there should be no more next's after the first call. - Assert.assertEquals(false, scanner.next(results)); - // Should be one result only. - Assert.assertEquals(1, results.size()); - // And we should have gone through optimize twice only. - Assert.assertEquals("First qcode is SEEK_NEXT_COL and second INCLUDE_AND_SEEK_NEXT_ROW", - 2, scanner.count.get()); - } finally { - scanner.close(); - } - } - - @Test public void testScanTimeRange() throws IOException { String r1 = "R1"; // returns only 1 of these 2 even though same timestamp
