Author: larsh Date: Tue Jan 10 23:39:57 2012 New Revision: 1229806 URL: http://svn.apache.org/viewvc?rev=1229806&view=rev Log: HBASE-5121 MajorCompaction may affect scan's correctness (chunhui shen and Lars H)
Modified: hbase/trunk/CHANGES.txt hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java Modified: hbase/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1229806&r1=1229805&r2=1229806&view=diff ============================================================================== --- hbase/trunk/CHANGES.txt (original) +++ hbase/trunk/CHANGES.txt Tue Jan 10 23:39:57 2012 @@ -476,6 +476,7 @@ Release 0.92.0 - Unreleased HBASE-5152 Region is on service before completing initialization when doing rollback of split, it will affect read correctness (Chunhui) HBASE-5137 MasterFileSystem.splitLog() should abort even if waitOnSafeMode() throws IOException(Ted) + HBASE-5121 MajorCompaction may affect scan's correctness (chunhui shen and Lars H) TESTS HBASE-4450 test for number of blocks read: to serve as baseline for expected Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java?rev=1229806&r1=1229805&r2=1229806&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/KeyValueHeap.java Tue Jan 10 23:39:57 2012 @@ -20,14 +20,14 @@ package org.apache.hadoop.hbase.regionserver; -import org.apache.hadoop.hbase.KeyValue; -import org.apache.hadoop.hbase.KeyValue.KVComparator; - import java.io.IOException; import java.util.Comparator; import java.util.List; import java.util.PriorityQueue; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.KeyValue.KVComparator; + /** * Implements a heap merge across any number of KeyValueScanners. * <p> @@ -124,7 +124,7 @@ public class KeyValueHeap extends NonLaz return false; } InternalScanner currentAsInternal = (InternalScanner)this.current; - boolean mayContainsMoreRows = currentAsInternal.next(result, limit); + boolean mayContainMoreRows = currentAsInternal.next(result, limit); KeyValue pee = this.current.peek(); /* * By definition, any InternalScanner must return false only when it has no @@ -133,7 +133,7 @@ public class KeyValueHeap extends NonLaz * more efficient to close scanners which are not needed than keep them in * the heap. This is also required for certain optimizations. */ - if (pee == null || !mayContainsMoreRows) { + if (pee == null || !mayContainMoreRows) { this.current.close(); } else { this.heap.add(this.current); Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=1229806&r1=1229805&r2=1229806&view=diff ============================================================================== --- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (original) +++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Tue Jan 10 23:39:57 2012 @@ -298,7 +298,9 @@ class StoreScanner extends NonLazyKeyVal @Override public synchronized boolean next(List<KeyValue> outResult, int limit) throws IOException { - checkReseek(); + if (checkReseek()) { + return true; + } // if the heap was left null, then the scanners had previously run out anyways, close and // return. @@ -448,12 +450,25 @@ class StoreScanner extends NonLazyKeyVal // Let the next() call handle re-creating and seeking } - private void checkReseek() throws IOException { + /** + * @return true if top of heap has changed (and KeyValueHeap has to try the + * next KV) + * @throws IOException + */ + private boolean checkReseek() throws IOException { if (this.heap == null && this.lastTop != null) { resetScannerStack(this.lastTop); + if (this.heap.peek() == null + || store.comparator.compare(this.lastTop, this.heap.peek()) != 0) { + LOG.debug("Storescanner.peek() is changed where before = " + + this.lastTop.toString() + ",and after = " + this.heap.peek()); + this.lastTop = null; + return true; + } this.lastTop = null; // gone! } // else dont need to reseek + return false; } private void resetScannerStack(KeyValue lastTopKey) throws IOException { Modified: hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java URL: http://svn.apache.org/viewvc/hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java?rev=1229806&r1=1229805&r2=1229806&view=diff ============================================================================== --- hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java (original) +++ hbase/trunk/src/test/java/org/apache/hadoop/hbase/regionserver/TestScanner.java Tue Jan 10 23:39:57 2012 @@ -27,7 +27,15 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.*; +import org.apache.hadoop.hbase.HBaseTestCase; +import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.HTableDescriptor; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.SmallTests; +import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; @@ -39,7 +47,6 @@ import org.apache.hadoop.hbase.filter.Wh import org.apache.hadoop.hbase.io.hfile.Compression; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Writables; -import org.apache.hadoop.hdfs.MiniDFSCluster; import org.junit.experimental.categories.Category; /** @@ -77,6 +84,23 @@ public class TestScanner extends HBaseTe private HRegion r; private HRegionIncommon region; + + private byte[] firstRowBytes, secondRowBytes, thirdRowBytes; + final private byte[] col1, col2; + + public TestScanner() throws Exception { + super(); + + firstRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING); + secondRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING); + // Increment the least significant character so we get to next row. + secondRowBytes[START_KEY_BYTES.length - 1]++; + thirdRowBytes = START_KEY.getBytes(HConstants.UTF8_ENCODING); + thirdRowBytes[START_KEY_BYTES.length - 1]++; + thirdRowBytes[START_KEY_BYTES.length - 1]++; + col1 = "column1".getBytes(HConstants.UTF8_ENCODING); + col2 = "column2".getBytes(HConstants.UTF8_ENCODING); + } /** * Test basic stop row filter works. @@ -466,6 +490,68 @@ public class TestScanner extends HBaseTe } } + /** + * Make sure scanner returns correct result when we run a major compaction + * with deletes. + * + * @throws Exception + */ + @SuppressWarnings("deprecation") + public void testScanAndConcurrentMajorCompact() throws Exception { + HTableDescriptor htd = createTableDescriptor(getName()); + this.r = createNewHRegion(htd, null, null); + HRegionIncommon hri = new HRegionIncommon(r); + + try { + addContent(hri, Bytes.toString(fam1), Bytes.toString(col1), + firstRowBytes, secondRowBytes); + addContent(hri, Bytes.toString(fam2), Bytes.toString(col1), + firstRowBytes, secondRowBytes); + + Delete dc = new Delete(firstRowBytes); + /* delete column1 of firstRow */ + dc.deleteColumns(fam1, col1); + r.delete(dc, null, true); + r.flushcache(); + + addContent(hri, Bytes.toString(fam1), Bytes.toString(col1), + secondRowBytes, thirdRowBytes); + addContent(hri, Bytes.toString(fam2), Bytes.toString(col1), + secondRowBytes, thirdRowBytes); + r.flushcache(); + + InternalScanner s = r.getScanner(new Scan()); + // run a major compact, column1 of firstRow will be cleaned. + r.compactStores(true); + + List<KeyValue> results = new ArrayList<KeyValue>(); + s.next(results); + + // make sure returns column2 of firstRow + assertTrue("result is not correct, keyValues : " + results, + results.size() == 1); + assertTrue(Bytes.BYTES_COMPARATOR.compare(firstRowBytes, results.get(0) + .getRow()) == 0); + assertTrue(Bytes.BYTES_COMPARATOR.compare(fam2, results.get(0) + .getFamily()) == 0); + + results = new ArrayList<KeyValue>(); + s.next(results); + + // get secondRow + assertTrue(results.size() == 2); + assertTrue(Bytes.BYTES_COMPARATOR.compare(secondRowBytes, results.get(0) + .getRow()) == 0); + assertTrue(Bytes.BYTES_COMPARATOR.compare(fam1, results.get(0) + .getFamily()) == 0); + assertTrue(Bytes.BYTES_COMPARATOR.compare(fam2, results.get(1) + .getFamily()) == 0); + } finally { + this.r.close(); + this.r.getLog().closeAndDelete(); + } + } + /* * @param hri Region