Author: jbellis
Date: Wed Dec 14 02:18:44 2011
New Revision: 1214034
URL: http://svn.apache.org/viewvc?rev=1214034&view=rev
Log:
clean up OpenBitSet
patch by jbellis; reviewed by slebresne for CASSANDRA-3622
Modified:
cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java
cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
Modified: cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java?rev=1214034&r1=1214033&r2=1214034&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/BloomFilter.java Wed
Dec 14 02:18:44 2011
@@ -113,7 +113,7 @@ public class BloomFilter extends Filter
{
for (long bucketIndex : getHashBuckets(key))
{
- bitset.fastSet(bucketIndex);
+ bitset.set(bucketIndex);
}
}
@@ -121,7 +121,7 @@ public class BloomFilter extends Filter
{
for (long bucketIndex : getHashBuckets(key))
{
- if (!bitset.fastGet(bucketIndex))
+ if (!bitset.get(bucketIndex))
{
return false;
}
Modified:
cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
URL:
http://svn.apache.org/viewvc/cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java?rev=1214034&r1=1214033&r2=1214034&view=diff
==============================================================================
--- cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
(original)
+++ cassandra/trunk/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java Wed
Dec 14 02:18:44 2011
@@ -21,8 +21,10 @@ import java.util.Arrays;
import java.io.Serializable;
import java.util.BitSet;
-/** An "open" BitSet implementation that allows direct access to the array of
words
- * storing the bits.
+/**
+ * An "open" BitSet implementation that allows direct access to the arrays of
words
+ * storing the bits. Derived from Lucene's OpenBitSet, but with a paged
backing array
+ * (see bits delaration, below).
* <p/>
* Unlike java.util.bitset, the fact that bits are packed into an array of
longs
* is part of the interface. This allows efficient implementation of other
algorithms
@@ -39,77 +41,38 @@ import java.util.BitSet;
* hence people re-implement their own version in order to get better
performance).
* If you want a "safe", totally encapsulated (and slower and limited) BitSet
* class, use <code>java.util.BitSet</code>.
- * <p/>
- * <h3>Performance Results</h3>
- *
- Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
-<br/>BitSet size = 1,000,000
-<br/>Results are java.util.BitSet time divided by OpenBitSet time.
-<table border="1">
- <tr>
- <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th>
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
- </tr>
- <tr>
- <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td>
<td>1.99</td> <td>1.58</td>
- </tr>
- <tr>
- <th>1% full</th> <td>3.31</td> <td>3.90</td> <td> </td> <td>1.04</td>
<td> </td> <td>0.99</td>
- </tr>
-</table>
-<br/>
-Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
-<br/>BitSet size = 1,000,000
-<br/>Results are java.util.BitSet time divided by OpenBitSet time.
-<table border="1">
- <tr>
- <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th>
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
- </tr>
- <tr>
- <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td>
<td>1.12</td> <td>1.25</td>
- </tr>
- <tr>
- <th>1% full</th> <td>2.51</td> <td>3.49</td> <td> </td> <td>1.00</td>
<td> </td> <td>1.02</td>
- </tr>
-</table>
*/
public class OpenBitSet implements Serializable {
- protected long[][] bits;
- protected int wlen; // number of words (elements) used in the array
- private final int pageCount;
/**
- * length of bits[][] page in long[] elements.
- * Choosing unform size for all sizes of bitsets fight fragmentation for
very large
- * bloom filters.
+ * We break the bitset up into multiple arrays to avoid promotion failure
caused by attempting to allocate
+ * large, contiguous arrays (CASSANDRA-2466). All sub-arrays but the last
are uniformly PAGE_SIZE words;
+ * to avoid waste in small bloom filters (of which Cassandra has many: one
per row) the last sub-array
+ * is sized to exactly the remaining number of words required to achieve the
desired set size (CASSANDRA-3618).
*/
- protected static final int PAGE_SIZE= 4096;
+ private final long[][] bits;
+ private int wlen; // number of words (elements) used in the array
+ private final int pageCount;
+ private static final int PAGE_SIZE = 4096;
- /** Constructs an OpenBitSet large enough to hold numBits.
- *
+ /**
+ * Constructs an OpenBitSet large enough to hold numBits.
* @param numBits
*/
public OpenBitSet(long numBits)
{
- this(numBits,true);
- }
-
- public OpenBitSet(long numBits, boolean allocatePages)
- {
- wlen= bits2words(numBits);
- int lastPageSize = wlen % PAGE_SIZE;
- int fullPageCount = wlen / PAGE_SIZE;
- pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
-
- bits = new long[pageCount][];
+ wlen = bits2words(numBits);
+ int lastPageSize = wlen % PAGE_SIZE;
+ int fullPageCount = wlen / PAGE_SIZE;
+ pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
- if (allocatePages)
- {
- for (int i = 0; i < fullPageCount; ++i)
- bits[i] = new long[PAGE_SIZE];
+ bits = new long[pageCount][];
- if (lastPageSize != 0)
- bits[bits.length - 1] = new long[lastPageSize];
- }
+ for (int i = 0; i < fullPageCount; ++i)
+ bits[i] = new long[PAGE_SIZE];
+
+ if (lastPageSize != 0)
+ bits[bits.length - 1] = new long[lastPageSize];
}
public OpenBitSet() {
@@ -164,24 +127,11 @@ public class OpenBitSet implements Seria
public int getNumWords() { return wlen; }
- /** Returns true or false for the specified bit index. */
- public boolean get(int index) {
- int i = index >> 6; // div 64
- // signed shift will keep a negative index and force an
- // array-index-out-of-bounds-exception, removing the need for an explicit
check.
- if (i>=wlen) return false;
-
- int bit = index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- // TODO perfectionist one can implement this using bit operations
- return (bits[i / PAGE_SIZE ][i % PAGE_SIZE] & bitmask) != 0;
- }
-
-
- /** Returns true or false for the specified bit index.
+ /**
+ * Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size
*/
- public boolean fastGet(int index) {
+ public boolean get(int index) {
int i = index >> 6; // div 64
// signed shift will keep a negative index and force an
// array-index-out-of-bounds-exception, removing the need for an explicit
check.
@@ -191,23 +141,11 @@ public class OpenBitSet implements Seria
return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0;
}
-
-
- /** Returns true or false for the specified bit index
- */
- public boolean get(long index) {
- int i = (int)(index >> 6); // div 64
- if (i>=wlen) return false;
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- // TODO perfectionist one can implement this using bit operations
- return (bits[i / PAGE_SIZE][i % PAGE_SIZE ] & bitmask) != 0;
- }
-
- /** Returns true or false for the specified bit index.
+ /**
+ * Returns true or false for the specified bit index.
* The index should be less than the OpenBitSet size.
*/
- public boolean fastGet(long index) {
+ public boolean get(long index) {
int i = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
@@ -224,81 +162,33 @@ public class OpenBitSet implements Seria
return ((int)(bits[i / PAGE_SIZE][i % PAGE_SIZE ]>>>bit)) & 0x01;
}
-
- /** sets a bit, expanding the set size if necessary */
+ /**
+ * Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
public void set(long index) {
- int wordNum = expandingWordNum(index);
+ int wordNum = (int)(index >> 6);
int bit = (int)index & 0x3f;
long bitmask = 1L << bit;
bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
}
-
- /** Sets the bit at the specified index.
- * The index should be less than the OpenBitSet size.
- */
- public void fastSet(int index) {
+ /**
+ * Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void set(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
}
- /** Sets the bit at the specified index.
- * The index should be less than the OpenBitSet size.
- */
- public void fastSet(long index) {
- int wordNum = (int)(index >> 6);
- int bit = (int)index & 0x3f;
- long bitmask = 1L << bit;
- bits[ wordNum / PAGE_SIZE ][ wordNum % PAGE_SIZE ] |= bitmask;
- }
-
- /** Sets a range of bits, expanding the set size if necessary
- *
- * @param startIndex lower index
- * @param endIndex one-past the last bit to set
- */
- public void set(long startIndex, long endIndex) {
- if (endIndex <= startIndex) return;
-
- int startWord = (int)(startIndex>>6);
-
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = expandingWordNum(endIndex-1);
-
- long startmask = -1L << startIndex;
- long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as
-endIndex due to wrap
-
- if (startWord == endWord) {
- bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= (startmask &
endmask);
- return;
- }
-
- assert startWord / PAGE_SIZE == endWord / PAGE_SIZE : "cross page sets not
suppotred at all - they are not used";
-
- bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] |= startmask;
- Arrays.fill(bits[ startWord / PAGE_SIZE], (startWord+1) % PAGE_SIZE ,
endWord % PAGE_SIZE , -1L);
- bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] |= endmask;
- }
-
-
-
- protected int expandingWordNum(long index) {
- int wordNum = (int)(index >> 6);
- if (wordNum>=wlen) {
- ensureCapacity(index+1);
- wlen = wordNum+1;
- }
- return wordNum;
- }
-
-
- /** clears a bit.
+ /**
+ * clears a bit.
* The index should be less than the OpenBitSet size.
*/
- public void fastClear(int index) {
+ public void clear(int index) {
int wordNum = index >> 6;
int bit = index & 0x03f;
long bitmask = 1L << bit;
@@ -312,26 +202,19 @@ public class OpenBitSet implements Seria
// bits[word] &= Long.rotateLeft(0xfffffffe,bit);
}
- /** clears a bit.
+ /**
+ * clears a bit.
* The index should be less than the OpenBitSet size.
*/
- public void fastClear(long index) {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
- }
-
- /** clears a bit, allowing access beyond the current set size without
changing the size.*/
public void clear(long index) {
int wordNum = (int)(index >> 6); // div 64
- if (wordNum>=wlen) return;
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] &= ~bitmask;
}
- /** Clears a range of bits. Clearing past the end does not change the size
of the set.
+ /**
+ * Clears a range of bits. Clearing past the end does not change the size
of the set.
*
* @param startIndex lower index
* @param endIndex one-past the last bit to clear
@@ -448,26 +331,19 @@ public class OpenBitSet implements Seria
/** flips a bit.
* The index should be less than the OpenBitSet size.
*/
- public void fastFlip(int index) {
+ public void flip(int index) {
int wordNum = index >> 6; // div 64
int bit = index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
}
- /** flips a bit.
+ /**
+ * flips a bit.
* The index should be less than the OpenBitSet size.
*/
- public void fastFlip(long index) {
- int wordNum = (int)(index >> 6); // div 64
- int bit = (int)index & 0x3f; // mod 64
- long bitmask = 1L << bit;
- bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
- }
-
- /** flips a bit, expanding the set size if necessary */
public void flip(long index) {
- int wordNum = expandingWordNum(index);
+ int wordNum = (int)(index >> 6); // div 64
int bit = (int)index & 0x3f; // mod 64
long bitmask = 1L << bit;
bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] ^= bitmask;
@@ -495,44 +371,6 @@ public class OpenBitSet implements Seria
return (bits[wordNum / PAGE_SIZE][wordNum % PAGE_SIZE] & bitmask) != 0;
}
- /** Flips a range of bits, expanding the set size if necessary
- *
- * @param startIndex lower index
- * @param endIndex one-past the last bit to flip
- */
- public void flip(long startIndex, long endIndex) {
- if (endIndex <= startIndex) return;
- int startWord = (int)(startIndex>>6);
-
- // since endIndex is one past the end, this is index of the last
- // word to be changed.
- int endWord = expandingWordNum(endIndex-1);
-
- /*** Grrr, java shifting wraps around so -1L>>>64 == -1
- * for that reason, make sure not to use endmask if the bits to flip will
- * be zero in the last word (redefine endWord to be the last changed...)
- long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
- long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
- ***/
-
- long startmask = -1L << startIndex;
- long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as
-endIndex due to wrap
-
- if (startWord == endWord) {
- bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= (startmask &
endmask);
- return;
- }
-
- bits[startWord / PAGE_SIZE][startWord % PAGE_SIZE] ^= startmask;
-
- for (int i=startWord+1; i<endWord; i++) {
- bits[i / PAGE_SIZE][ i % PAGE_SIZE] = ~bits[i / PAGE_SIZE][ i %
PAGE_SIZE];
- }
-
- bits[endWord / PAGE_SIZE][endWord % PAGE_SIZE] ^= endmask;
- }
-
-
/** @return the number of set bits */
public long cardinality()
{
@@ -613,22 +451,6 @@ public class OpenBitSet implements Seria
intersect(other);
}
-
- /** Expand the long[] with the size given as a number of words (64 bit
longs).
- * getNumWords() is unchanged by this call.
- */
- public void ensureCapacityWords(int numWords)
- {
- assert numWords<=wlen : "Growing of paged bitset is not supported";
- }
-
- /** Ensure that the long[] is big enough to hold numBits, expanding it if
necessary.
- * getNumWords() is unchanged by this call.
- */
- public void ensureCapacity(long numBits) {
- ensureCapacityWords(bits2words(numBits));
- }
-
/** Lowers numWords, the number of words in use,
* by checking for trailing zero words.
*/