Repository: hbase-site Updated Branches: refs/heads/asf-site f1997f363 -> 836bc028d
http://git-wip-us.apache.org/repos/asf/hbase-site/blob/836bc028/testdevapidocs/src-html/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.html ---------------------------------------------------------------------- diff --git a/testdevapidocs/src-html/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.html b/testdevapidocs/src-html/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.html index 5f7f1cc..1890d8b 100644 --- a/testdevapidocs/src-html/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.html +++ b/testdevapidocs/src-html/org/apache/hadoop/hbase/regionserver/TestCompoundBloomFilter.html @@ -233,155 +233,157 @@ <span class="sourceLineNo">225</span> // Test for false positives (some percentage allowed). We test in two modes:<a name="line.225"></a> <span class="sourceLineNo">226</span> // "fake lookup" which ignores the key distribution, and production mode.<a name="line.226"></a> <span class="sourceLineNo">227</span> for (boolean fakeLookupEnabled : new boolean[] { true, false }) {<a name="line.227"></a> -<span class="sourceLineNo">228</span> BloomFilterUtil.setFakeLookupMode(fakeLookupEnabled);<a name="line.228"></a> -<span class="sourceLineNo">229</span> try {<a name="line.229"></a> -<span class="sourceLineNo">230</span> String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?<a name="line.230"></a> -<span class="sourceLineNo">231</span> "enabled" : "disabled");<a name="line.231"></a> -<span class="sourceLineNo">232</span> CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();<a name="line.232"></a> -<span class="sourceLineNo">233</span> cbf.enableTestingStats();<a name="line.233"></a> -<span class="sourceLineNo">234</span> int numFalsePos = 0;<a name="line.234"></a> -<span class="sourceLineNo">235</span> Random rand = new Random(EVALUATION_SEED);<a name="line.235"></a> -<span class="sourceLineNo">236</span> int nTrials = NUM_KV[t] * 10;<a name="line.236"></a> -<span class="sourceLineNo">237</span> for (int i = 0; i < nTrials; ++i) {<a name="line.237"></a> -<span class="sourceLineNo">238</span> byte[] query = RandomKeyValueUtil.randomRowOrQualifier(rand);<a name="line.238"></a> -<span class="sourceLineNo">239</span> if (isInBloom(scanner, query, bt, rand)) {<a name="line.239"></a> -<span class="sourceLineNo">240</span> numFalsePos += 1;<a name="line.240"></a> -<span class="sourceLineNo">241</span> }<a name="line.241"></a> -<span class="sourceLineNo">242</span> }<a name="line.242"></a> -<span class="sourceLineNo">243</span> double falsePosRate = numFalsePos * 1.0 / nTrials;<a name="line.243"></a> -<span class="sourceLineNo">244</span> LOG.debug(String.format(testIdMsg<a name="line.244"></a> -<span class="sourceLineNo">245</span> + " False positives: %d out of %d (%f)",<a name="line.245"></a> -<span class="sourceLineNo">246</span> numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);<a name="line.246"></a> -<span class="sourceLineNo">247</span><a name="line.247"></a> -<span class="sourceLineNo">248</span> // Check for obvious Bloom filter crashes.<a name="line.248"></a> -<span class="sourceLineNo">249</span> assertTrue("False positive is too high: " + falsePosRate + " (greater "<a name="line.249"></a> -<span class="sourceLineNo">250</span> + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,<a name="line.250"></a> -<span class="sourceLineNo">251</span> falsePosRate < TOO_HIGH_ERROR_RATE);<a name="line.251"></a> -<span class="sourceLineNo">252</span><a name="line.252"></a> -<span class="sourceLineNo">253</span> // Now a more precise check to see if the false positive rate is not<a name="line.253"></a> -<span class="sourceLineNo">254</span> // too high. The reason we use a relaxed restriction for the real-world<a name="line.254"></a> -<span class="sourceLineNo">255</span> // case as opposed to the "fake lookup" case is that our hash functions<a name="line.255"></a> -<span class="sourceLineNo">256</span> // are not completely independent.<a name="line.256"></a> -<span class="sourceLineNo">257</span><a name="line.257"></a> -<span class="sourceLineNo">258</span> double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;<a name="line.258"></a> -<span class="sourceLineNo">259</span> validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,<a name="line.259"></a> -<span class="sourceLineNo">260</span> fakeLookupModeStr);<a name="line.260"></a> -<span class="sourceLineNo">261</span><a name="line.261"></a> -<span class="sourceLineNo">262</span> // For checking the lower bound we need to eliminate the last chunk,<a name="line.262"></a> -<span class="sourceLineNo">263</span> // because it is frequently smaller and the false positive rate in it<a name="line.263"></a> -<span class="sourceLineNo">264</span> // is too low. This does not help if there is only one under-sized<a name="line.264"></a> -<span class="sourceLineNo">265</span> // chunk, though.<a name="line.265"></a> -<span class="sourceLineNo">266</span> int nChunks = cbf.getNumChunks();<a name="line.266"></a> -<span class="sourceLineNo">267</span> if (nChunks > 1) {<a name="line.267"></a> -<span class="sourceLineNo">268</span> numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);<a name="line.268"></a> -<span class="sourceLineNo">269</span> nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);<a name="line.269"></a> -<span class="sourceLineNo">270</span> falsePosRate = numFalsePos * 1.0 / nTrials;<a name="line.270"></a> -<span class="sourceLineNo">271</span> LOG.info(testIdMsg + " False positive rate without last chunk is " +<a name="line.271"></a> -<span class="sourceLineNo">272</span> falsePosRate + fakeLookupModeStr);<a name="line.272"></a> -<span class="sourceLineNo">273</span> }<a name="line.273"></a> -<span class="sourceLineNo">274</span><a name="line.274"></a> -<span class="sourceLineNo">275</span> validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,<a name="line.275"></a> -<span class="sourceLineNo">276</span> fakeLookupModeStr);<a name="line.276"></a> -<span class="sourceLineNo">277</span> } finally {<a name="line.277"></a> -<span class="sourceLineNo">278</span> BloomFilterUtil.setFakeLookupMode(false);<a name="line.278"></a> -<span class="sourceLineNo">279</span> }<a name="line.279"></a> -<span class="sourceLineNo">280</span> }<a name="line.280"></a> -<span class="sourceLineNo">281</span><a name="line.281"></a> -<span class="sourceLineNo">282</span> r.close(true); // end of test so evictOnClose<a name="line.282"></a> -<span class="sourceLineNo">283</span> }<a name="line.283"></a> -<span class="sourceLineNo">284</span><a name="line.284"></a> -<span class="sourceLineNo">285</span> private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,<a name="line.285"></a> -<span class="sourceLineNo">286</span> Random rand) {<a name="line.286"></a> -<span class="sourceLineNo">287</span> return isInBloom(scanner, row, RandomKeyValueUtil.randomRowOrQualifier(rand));<a name="line.287"></a> -<span class="sourceLineNo">288</span> }<a name="line.288"></a> -<span class="sourceLineNo">289</span><a name="line.289"></a> -<span class="sourceLineNo">290</span> private boolean isInBloom(StoreFileScanner scanner, byte[] row,<a name="line.290"></a> -<span class="sourceLineNo">291</span> byte[] qualifier) {<a name="line.291"></a> -<span class="sourceLineNo">292</span> Scan scan = new Scan().withStartRow(row).withStopRow(row, true);<a name="line.292"></a> -<span class="sourceLineNo">293</span> scan.addColumn(Bytes.toBytes(RandomKeyValueUtil.COLUMN_FAMILY_NAME), qualifier);<a name="line.293"></a> -<span class="sourceLineNo">294</span> HStore store = mock(HStore.class);<a name="line.294"></a> -<span class="sourceLineNo">295</span> when(store.getColumnFamilyDescriptor())<a name="line.295"></a> -<span class="sourceLineNo">296</span> .thenReturn(ColumnFamilyDescriptorBuilder.of(RandomKeyValueUtil.COLUMN_FAMILY_NAME));<a name="line.296"></a> -<span class="sourceLineNo">297</span> return scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);<a name="line.297"></a> -<span class="sourceLineNo">298</span> }<a name="line.298"></a> -<span class="sourceLineNo">299</span><a name="line.299"></a> -<span class="sourceLineNo">300</span> private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)<a name="line.300"></a> -<span class="sourceLineNo">301</span> throws IOException {<a name="line.301"></a> -<span class="sourceLineNo">302</span> conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,<a name="line.302"></a> -<span class="sourceLineNo">303</span> BLOOM_BLOCK_SIZES[t]);<a name="line.303"></a> -<span class="sourceLineNo">304</span> conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);<a name="line.304"></a> -<span class="sourceLineNo">305</span> cacheConf = new CacheConfig(conf);<a name="line.305"></a> -<span class="sourceLineNo">306</span> HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCK_SIZES[t]).build();<a name="line.306"></a> -<span class="sourceLineNo">307</span> StoreFileWriter w = new StoreFileWriter.Builder(conf, cacheConf, fs)<a name="line.307"></a> -<span class="sourceLineNo">308</span> .withOutputDir(TEST_UTIL.getDataTestDir())<a name="line.308"></a> -<span class="sourceLineNo">309</span> .withBloomType(bt)<a name="line.309"></a> -<span class="sourceLineNo">310</span> .withFileContext(meta)<a name="line.310"></a> -<span class="sourceLineNo">311</span> .build();<a name="line.311"></a> -<span class="sourceLineNo">312</span><a name="line.312"></a> -<span class="sourceLineNo">313</span> assertTrue(w.hasGeneralBloom());<a name="line.313"></a> -<span class="sourceLineNo">314</span> assertTrue(w.getGeneralBloomWriter() instanceof CompoundBloomFilterWriter);<a name="line.314"></a> -<span class="sourceLineNo">315</span> CompoundBloomFilterWriter cbbf =<a name="line.315"></a> -<span class="sourceLineNo">316</span> (CompoundBloomFilterWriter) w.getGeneralBloomWriter();<a name="line.316"></a> -<span class="sourceLineNo">317</span><a name="line.317"></a> -<span class="sourceLineNo">318</span> int keyCount = 0;<a name="line.318"></a> -<span class="sourceLineNo">319</span> KeyValue prev = null;<a name="line.319"></a> -<span class="sourceLineNo">320</span> LOG.debug("Total keys/values to insert: " + kvs.size());<a name="line.320"></a> -<span class="sourceLineNo">321</span> for (KeyValue kv : kvs) {<a name="line.321"></a> -<span class="sourceLineNo">322</span> w.append(kv);<a name="line.322"></a> -<span class="sourceLineNo">323</span><a name="line.323"></a> -<span class="sourceLineNo">324</span> // Validate the key count in the Bloom filter.<a name="line.324"></a> -<span class="sourceLineNo">325</span> boolean newKey = true;<a name="line.325"></a> -<span class="sourceLineNo">326</span> if (prev != null) {<a name="line.326"></a> -<span class="sourceLineNo">327</span> newKey = !(bt == BloomType.ROW ? CellUtil.matchingRows(kv,<a name="line.327"></a> -<span class="sourceLineNo">328</span> prev) : CellUtil.matchingRowColumn(kv, prev));<a name="line.328"></a> -<span class="sourceLineNo">329</span> }<a name="line.329"></a> -<span class="sourceLineNo">330</span> if (newKey)<a name="line.330"></a> -<span class="sourceLineNo">331</span> ++keyCount;<a name="line.331"></a> -<span class="sourceLineNo">332</span> assertEquals(keyCount, cbbf.getKeyCount());<a name="line.332"></a> -<span class="sourceLineNo">333</span><a name="line.333"></a> -<span class="sourceLineNo">334</span> prev = kv;<a name="line.334"></a> -<span class="sourceLineNo">335</span> }<a name="line.335"></a> -<span class="sourceLineNo">336</span> w.close();<a name="line.336"></a> -<span class="sourceLineNo">337</span><a name="line.337"></a> -<span class="sourceLineNo">338</span> return w.getPath();<a name="line.338"></a> -<span class="sourceLineNo">339</span> }<a name="line.339"></a> -<span class="sourceLineNo">340</span><a name="line.340"></a> -<span class="sourceLineNo">341</span> @Test<a name="line.341"></a> -<span class="sourceLineNo">342</span> public void testCompoundBloomSizing() {<a name="line.342"></a> -<span class="sourceLineNo">343</span> int bloomBlockByteSize = 4096;<a name="line.343"></a> -<span class="sourceLineNo">344</span> int bloomBlockBitSize = bloomBlockByteSize * 8;<a name="line.344"></a> -<span class="sourceLineNo">345</span> double targetErrorRate = 0.01;<a name="line.345"></a> -<span class="sourceLineNo">346</span> long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize,<a name="line.346"></a> -<span class="sourceLineNo">347</span> targetErrorRate);<a name="line.347"></a> -<span class="sourceLineNo">348</span><a name="line.348"></a> -<span class="sourceLineNo">349</span> long bloomSize1 = bloomBlockByteSize * 8;<a name="line.349"></a> -<span class="sourceLineNo">350</span> long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk,<a name="line.350"></a> -<span class="sourceLineNo">351</span> targetErrorRate);<a name="line.351"></a> -<span class="sourceLineNo">352</span><a name="line.352"></a> -<span class="sourceLineNo">353</span> double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);<a name="line.353"></a> -<span class="sourceLineNo">354</span> assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);<a name="line.354"></a> -<span class="sourceLineNo">355</span> }<a name="line.355"></a> -<span class="sourceLineNo">356</span><a name="line.356"></a> -<span class="sourceLineNo">357</span> @Test<a name="line.357"></a> -<span class="sourceLineNo">358</span> public void testCreateKey() {<a name="line.358"></a> -<span class="sourceLineNo">359</span> byte[] row = "myRow".getBytes();<a name="line.359"></a> -<span class="sourceLineNo">360</span> byte[] qualifier = "myQualifier".getBytes();<a name="line.360"></a> -<span class="sourceLineNo">361</span> // Mimic what Storefile.createBloomKeyValue() does<a name="line.361"></a> -<span class="sourceLineNo">362</span> byte[] rowKey = KeyValueUtil.createFirstOnRow(row, 0, row.length, new byte[0], 0, 0, row, 0, 0).getKey();<a name="line.362"></a> -<span class="sourceLineNo">363</span> byte[] rowColKey = KeyValueUtil.createFirstOnRow(row, 0, row.length,<a name="line.363"></a> -<span class="sourceLineNo">364</span> new byte[0], 0, 0, qualifier, 0, qualifier.length).getKey();<a name="line.364"></a> -<span class="sourceLineNo">365</span> KeyValue rowKV = KeyValueUtil.createKeyValueFromKey(rowKey);<a name="line.365"></a> -<span class="sourceLineNo">366</span> KeyValue rowColKV = KeyValueUtil.createKeyValueFromKey(rowColKey);<a name="line.366"></a> -<span class="sourceLineNo">367</span> assertEquals(rowKV.getTimestamp(), rowColKV.getTimestamp());<a name="line.367"></a> -<span class="sourceLineNo">368</span> assertEquals(Bytes.toStringBinary(rowKV.getRowArray(), rowKV.getRowOffset(),<a name="line.368"></a> -<span class="sourceLineNo">369</span> rowKV.getRowLength()), Bytes.toStringBinary(rowColKV.getRowArray(), rowColKV.getRowOffset(),<a name="line.369"></a> -<span class="sourceLineNo">370</span> rowColKV.getRowLength()));<a name="line.370"></a> -<span class="sourceLineNo">371</span> assertEquals(0, rowKV.getQualifierLength());<a name="line.371"></a> -<span class="sourceLineNo">372</span> }<a name="line.372"></a> -<span class="sourceLineNo">373</span><a name="line.373"></a> -<span class="sourceLineNo">374</span><a name="line.374"></a> -<span class="sourceLineNo">375</span>}<a name="line.375"></a> +<span class="sourceLineNo">228</span> if (fakeLookupEnabled) {<a name="line.228"></a> +<span class="sourceLineNo">229</span> BloomFilterUtil.setRandomGeneratorForTest(new Random(283742987L));<a name="line.229"></a> +<span class="sourceLineNo">230</span> }<a name="line.230"></a> +<span class="sourceLineNo">231</span> try {<a name="line.231"></a> +<span class="sourceLineNo">232</span> String fakeLookupModeStr = ", fake lookup is " + (fakeLookupEnabled ?<a name="line.232"></a> +<span class="sourceLineNo">233</span> "enabled" : "disabled");<a name="line.233"></a> +<span class="sourceLineNo">234</span> CompoundBloomFilter cbf = (CompoundBloomFilter) r.getGeneralBloomFilter();<a name="line.234"></a> +<span class="sourceLineNo">235</span> cbf.enableTestingStats();<a name="line.235"></a> +<span class="sourceLineNo">236</span> int numFalsePos = 0;<a name="line.236"></a> +<span class="sourceLineNo">237</span> Random rand = new Random(EVALUATION_SEED);<a name="line.237"></a> +<span class="sourceLineNo">238</span> int nTrials = NUM_KV[t] * 10;<a name="line.238"></a> +<span class="sourceLineNo">239</span> for (int i = 0; i < nTrials; ++i) {<a name="line.239"></a> +<span class="sourceLineNo">240</span> byte[] query = RandomKeyValueUtil.randomRowOrQualifier(rand);<a name="line.240"></a> +<span class="sourceLineNo">241</span> if (isInBloom(scanner, query, bt, rand)) {<a name="line.241"></a> +<span class="sourceLineNo">242</span> numFalsePos += 1;<a name="line.242"></a> +<span class="sourceLineNo">243</span> }<a name="line.243"></a> +<span class="sourceLineNo">244</span> }<a name="line.244"></a> +<span class="sourceLineNo">245</span> double falsePosRate = numFalsePos * 1.0 / nTrials;<a name="line.245"></a> +<span class="sourceLineNo">246</span> LOG.debug(String.format(testIdMsg<a name="line.246"></a> +<span class="sourceLineNo">247</span> + " False positives: %d out of %d (%f)",<a name="line.247"></a> +<span class="sourceLineNo">248</span> numFalsePos, nTrials, falsePosRate) + fakeLookupModeStr);<a name="line.248"></a> +<span class="sourceLineNo">249</span><a name="line.249"></a> +<span class="sourceLineNo">250</span> // Check for obvious Bloom filter crashes.<a name="line.250"></a> +<span class="sourceLineNo">251</span> assertTrue("False positive is too high: " + falsePosRate + " (greater "<a name="line.251"></a> +<span class="sourceLineNo">252</span> + "than " + TOO_HIGH_ERROR_RATE + ")" + fakeLookupModeStr,<a name="line.252"></a> +<span class="sourceLineNo">253</span> falsePosRate < TOO_HIGH_ERROR_RATE);<a name="line.253"></a> +<span class="sourceLineNo">254</span><a name="line.254"></a> +<span class="sourceLineNo">255</span> // Now a more precise check to see if the false positive rate is not<a name="line.255"></a> +<span class="sourceLineNo">256</span> // too high. The reason we use a relaxed restriction for the real-world<a name="line.256"></a> +<span class="sourceLineNo">257</span> // case as opposed to the "fake lookup" case is that our hash functions<a name="line.257"></a> +<span class="sourceLineNo">258</span> // are not completely independent.<a name="line.258"></a> +<span class="sourceLineNo">259</span><a name="line.259"></a> +<span class="sourceLineNo">260</span> double maxZValue = fakeLookupEnabled ? 1.96 : 2.5;<a name="line.260"></a> +<span class="sourceLineNo">261</span> validateFalsePosRate(falsePosRate, nTrials, maxZValue, cbf,<a name="line.261"></a> +<span class="sourceLineNo">262</span> fakeLookupModeStr);<a name="line.262"></a> +<span class="sourceLineNo">263</span><a name="line.263"></a> +<span class="sourceLineNo">264</span> // For checking the lower bound we need to eliminate the last chunk,<a name="line.264"></a> +<span class="sourceLineNo">265</span> // because it is frequently smaller and the false positive rate in it<a name="line.265"></a> +<span class="sourceLineNo">266</span> // is too low. This does not help if there is only one under-sized<a name="line.266"></a> +<span class="sourceLineNo">267</span> // chunk, though.<a name="line.267"></a> +<span class="sourceLineNo">268</span> int nChunks = cbf.getNumChunks();<a name="line.268"></a> +<span class="sourceLineNo">269</span> if (nChunks > 1) {<a name="line.269"></a> +<span class="sourceLineNo">270</span> numFalsePos -= cbf.getNumPositivesForTesting(nChunks - 1);<a name="line.270"></a> +<span class="sourceLineNo">271</span> nTrials -= cbf.getNumQueriesForTesting(nChunks - 1);<a name="line.271"></a> +<span class="sourceLineNo">272</span> falsePosRate = numFalsePos * 1.0 / nTrials;<a name="line.272"></a> +<span class="sourceLineNo">273</span> LOG.info(testIdMsg + " False positive rate without last chunk is " +<a name="line.273"></a> +<span class="sourceLineNo">274</span> falsePosRate + fakeLookupModeStr);<a name="line.274"></a> +<span class="sourceLineNo">275</span> }<a name="line.275"></a> +<span class="sourceLineNo">276</span><a name="line.276"></a> +<span class="sourceLineNo">277</span> validateFalsePosRate(falsePosRate, nTrials, -2.58, cbf,<a name="line.277"></a> +<span class="sourceLineNo">278</span> fakeLookupModeStr);<a name="line.278"></a> +<span class="sourceLineNo">279</span> } finally {<a name="line.279"></a> +<span class="sourceLineNo">280</span> BloomFilterUtil.setRandomGeneratorForTest(null);<a name="line.280"></a> +<span class="sourceLineNo">281</span> }<a name="line.281"></a> +<span class="sourceLineNo">282</span> }<a name="line.282"></a> +<span class="sourceLineNo">283</span><a name="line.283"></a> +<span class="sourceLineNo">284</span> r.close(true); // end of test so evictOnClose<a name="line.284"></a> +<span class="sourceLineNo">285</span> }<a name="line.285"></a> +<span class="sourceLineNo">286</span><a name="line.286"></a> +<span class="sourceLineNo">287</span> private boolean isInBloom(StoreFileScanner scanner, byte[] row, BloomType bt,<a name="line.287"></a> +<span class="sourceLineNo">288</span> Random rand) {<a name="line.288"></a> +<span class="sourceLineNo">289</span> return isInBloom(scanner, row, RandomKeyValueUtil.randomRowOrQualifier(rand));<a name="line.289"></a> +<span class="sourceLineNo">290</span> }<a name="line.290"></a> +<span class="sourceLineNo">291</span><a name="line.291"></a> +<span class="sourceLineNo">292</span> private boolean isInBloom(StoreFileScanner scanner, byte[] row,<a name="line.292"></a> +<span class="sourceLineNo">293</span> byte[] qualifier) {<a name="line.293"></a> +<span class="sourceLineNo">294</span> Scan scan = new Scan().withStartRow(row).withStopRow(row, true);<a name="line.294"></a> +<span class="sourceLineNo">295</span> scan.addColumn(Bytes.toBytes(RandomKeyValueUtil.COLUMN_FAMILY_NAME), qualifier);<a name="line.295"></a> +<span class="sourceLineNo">296</span> HStore store = mock(HStore.class);<a name="line.296"></a> +<span class="sourceLineNo">297</span> when(store.getColumnFamilyDescriptor())<a name="line.297"></a> +<span class="sourceLineNo">298</span> .thenReturn(ColumnFamilyDescriptorBuilder.of(RandomKeyValueUtil.COLUMN_FAMILY_NAME));<a name="line.298"></a> +<span class="sourceLineNo">299</span> return scanner.shouldUseScanner(scan, store, Long.MIN_VALUE);<a name="line.299"></a> +<span class="sourceLineNo">300</span> }<a name="line.300"></a> +<span class="sourceLineNo">301</span><a name="line.301"></a> +<span class="sourceLineNo">302</span> private Path writeStoreFile(int t, BloomType bt, List<KeyValue> kvs)<a name="line.302"></a> +<span class="sourceLineNo">303</span> throws IOException {<a name="line.303"></a> +<span class="sourceLineNo">304</span> conf.setInt(BloomFilterFactory.IO_STOREFILE_BLOOM_BLOCK_SIZE,<a name="line.304"></a> +<span class="sourceLineNo">305</span> BLOOM_BLOCK_SIZES[t]);<a name="line.305"></a> +<span class="sourceLineNo">306</span> conf.setBoolean(CacheConfig.CACHE_BLOCKS_ON_WRITE_KEY, true);<a name="line.306"></a> +<span class="sourceLineNo">307</span> cacheConf = new CacheConfig(conf);<a name="line.307"></a> +<span class="sourceLineNo">308</span> HFileContext meta = new HFileContextBuilder().withBlockSize(BLOCK_SIZES[t]).build();<a name="line.308"></a> +<span class="sourceLineNo">309</span> StoreFileWriter w = new StoreFileWriter.Builder(conf, cacheConf, fs)<a name="line.309"></a> +<span class="sourceLineNo">310</span> .withOutputDir(TEST_UTIL.getDataTestDir())<a name="line.310"></a> +<span class="sourceLineNo">311</span> .withBloomType(bt)<a name="line.311"></a> +<span class="sourceLineNo">312</span> .withFileContext(meta)<a name="line.312"></a> +<span class="sourceLineNo">313</span> .build();<a name="line.313"></a> +<span class="sourceLineNo">314</span><a name="line.314"></a> +<span class="sourceLineNo">315</span> assertTrue(w.hasGeneralBloom());<a name="line.315"></a> +<span class="sourceLineNo">316</span> assertTrue(w.getGeneralBloomWriter() instanceof CompoundBloomFilterWriter);<a name="line.316"></a> +<span class="sourceLineNo">317</span> CompoundBloomFilterWriter cbbf =<a name="line.317"></a> +<span class="sourceLineNo">318</span> (CompoundBloomFilterWriter) w.getGeneralBloomWriter();<a name="line.318"></a> +<span class="sourceLineNo">319</span><a name="line.319"></a> +<span class="sourceLineNo">320</span> int keyCount = 0;<a name="line.320"></a> +<span class="sourceLineNo">321</span> KeyValue prev = null;<a name="line.321"></a> +<span class="sourceLineNo">322</span> LOG.debug("Total keys/values to insert: " + kvs.size());<a name="line.322"></a> +<span class="sourceLineNo">323</span> for (KeyValue kv : kvs) {<a name="line.323"></a> +<span class="sourceLineNo">324</span> w.append(kv);<a name="line.324"></a> +<span class="sourceLineNo">325</span><a name="line.325"></a> +<span class="sourceLineNo">326</span> // Validate the key count in the Bloom filter.<a name="line.326"></a> +<span class="sourceLineNo">327</span> boolean newKey = true;<a name="line.327"></a> +<span class="sourceLineNo">328</span> if (prev != null) {<a name="line.328"></a> +<span class="sourceLineNo">329</span> newKey = !(bt == BloomType.ROW ? CellUtil.matchingRows(kv,<a name="line.329"></a> +<span class="sourceLineNo">330</span> prev) : CellUtil.matchingRowColumn(kv, prev));<a name="line.330"></a> +<span class="sourceLineNo">331</span> }<a name="line.331"></a> +<span class="sourceLineNo">332</span> if (newKey)<a name="line.332"></a> +<span class="sourceLineNo">333</span> ++keyCount;<a name="line.333"></a> +<span class="sourceLineNo">334</span> assertEquals(keyCount, cbbf.getKeyCount());<a name="line.334"></a> +<span class="sourceLineNo">335</span><a name="line.335"></a> +<span class="sourceLineNo">336</span> prev = kv;<a name="line.336"></a> +<span class="sourceLineNo">337</span> }<a name="line.337"></a> +<span class="sourceLineNo">338</span> w.close();<a name="line.338"></a> +<span class="sourceLineNo">339</span><a name="line.339"></a> +<span class="sourceLineNo">340</span> return w.getPath();<a name="line.340"></a> +<span class="sourceLineNo">341</span> }<a name="line.341"></a> +<span class="sourceLineNo">342</span><a name="line.342"></a> +<span class="sourceLineNo">343</span> @Test<a name="line.343"></a> +<span class="sourceLineNo">344</span> public void testCompoundBloomSizing() {<a name="line.344"></a> +<span class="sourceLineNo">345</span> int bloomBlockByteSize = 4096;<a name="line.345"></a> +<span class="sourceLineNo">346</span> int bloomBlockBitSize = bloomBlockByteSize * 8;<a name="line.346"></a> +<span class="sourceLineNo">347</span> double targetErrorRate = 0.01;<a name="line.347"></a> +<span class="sourceLineNo">348</span> long maxKeysPerChunk = BloomFilterUtil.idealMaxKeys(bloomBlockBitSize,<a name="line.348"></a> +<span class="sourceLineNo">349</span> targetErrorRate);<a name="line.349"></a> +<span class="sourceLineNo">350</span><a name="line.350"></a> +<span class="sourceLineNo">351</span> long bloomSize1 = bloomBlockByteSize * 8;<a name="line.351"></a> +<span class="sourceLineNo">352</span> long bloomSize2 = BloomFilterUtil.computeBitSize(maxKeysPerChunk,<a name="line.352"></a> +<span class="sourceLineNo">353</span> targetErrorRate);<a name="line.353"></a> +<span class="sourceLineNo">354</span><a name="line.354"></a> +<span class="sourceLineNo">355</span> double bloomSizeRatio = (bloomSize2 * 1.0 / bloomSize1);<a name="line.355"></a> +<span class="sourceLineNo">356</span> assertTrue(Math.abs(bloomSizeRatio - 0.9999) < 0.0001);<a name="line.356"></a> +<span class="sourceLineNo">357</span> }<a name="line.357"></a> +<span class="sourceLineNo">358</span><a name="line.358"></a> +<span class="sourceLineNo">359</span> @Test<a name="line.359"></a> +<span class="sourceLineNo">360</span> public void testCreateKey() {<a name="line.360"></a> +<span class="sourceLineNo">361</span> byte[] row = "myRow".getBytes();<a name="line.361"></a> +<span class="sourceLineNo">362</span> byte[] qualifier = "myQualifier".getBytes();<a name="line.362"></a> +<span class="sourceLineNo">363</span> // Mimic what Storefile.createBloomKeyValue() does<a name="line.363"></a> +<span class="sourceLineNo">364</span> byte[] rowKey = KeyValueUtil.createFirstOnRow(row, 0, row.length, new byte[0], 0, 0, row, 0, 0).getKey();<a name="line.364"></a> +<span class="sourceLineNo">365</span> byte[] rowColKey = KeyValueUtil.createFirstOnRow(row, 0, row.length,<a name="line.365"></a> +<span class="sourceLineNo">366</span> new byte[0], 0, 0, qualifier, 0, qualifier.length).getKey();<a name="line.366"></a> +<span class="sourceLineNo">367</span> KeyValue rowKV = KeyValueUtil.createKeyValueFromKey(rowKey);<a name="line.367"></a> +<span class="sourceLineNo">368</span> KeyValue rowColKV = KeyValueUtil.createKeyValueFromKey(rowColKey);<a name="line.368"></a> +<span class="sourceLineNo">369</span> assertEquals(rowKV.getTimestamp(), rowColKV.getTimestamp());<a name="line.369"></a> +<span class="sourceLineNo">370</span> assertEquals(Bytes.toStringBinary(rowKV.getRowArray(), rowKV.getRowOffset(),<a name="line.370"></a> +<span class="sourceLineNo">371</span> rowKV.getRowLength()), Bytes.toStringBinary(rowColKV.getRowArray(), rowColKV.getRowOffset(),<a name="line.371"></a> +<span class="sourceLineNo">372</span> rowColKV.getRowLength()));<a name="line.372"></a> +<span class="sourceLineNo">373</span> assertEquals(0, rowKV.getQualifierLength());<a name="line.373"></a> +<span class="sourceLineNo">374</span> }<a name="line.374"></a> +<span class="sourceLineNo">375</span><a name="line.375"></a> <span class="sourceLineNo">376</span><a name="line.376"></a> +<span class="sourceLineNo">377</span>}<a name="line.377"></a> +<span class="sourceLineNo">378</span><a name="line.378"></a>