Added: 
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java?rev=1052542&view=auto
==============================================================================
--- 
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
 (added)
+++ 
cassandra/branches/cassandra-0.7/src/java/org/apache/cassandra/utils/obs/OpenBitSet.java
 Fri Dec 24 17:58:22 2010
@@ -0,0 +1,818 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.utils.obs;
+
+import java.util.Arrays;
+import java.io.Serializable;
+import java.util.BitSet;
+
+/** An "open" BitSet implementation that allows direct access to the array of 
words
+ * storing the bits.
+ * <p/>
+ * Unlike java.util.bitset, the fact that bits are packed into an array of 
longs
+ * is part of the interface.  This allows efficient implementation of other 
algorithms
+ * by someone other than the author.  It also allows one to efficiently 
implement
+ * alternate serialization or interchange formats.
+ * <p/>
+ * <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in 
most operations
+ * and *much* faster at calculating cardinality of sets and results of set 
operations.
+ * It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ * <p/>
+ * The goals of <code>OpenBitSet</code> are the fastest implementation 
possible, and
+ * maximum code reuse.  Extra safety and encapsulation
+ * may always be built on top, but if that's built in, the cost can never be 
removed (and
+ * hence people re-implement their own version in order to get better 
performance).
+ * If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ * class, use <code>java.util.BitSet</code>.
+ * <p/>
+ * <h3>Performance Results</h3>
+ *
+ Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+<br/>BitSet size = 1,000,000
+<br/>Results are java.util.BitSet time divided by OpenBitSet time.
+<table border="1">
+ <tr>
+  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> 
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ </tr>
+ <tr>
+  <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> 
<td>1.99</td> <td>1.58</td>
+ </tr>
+ <tr>
+   <th>1% full</th> <td>3.31</td> <td>3.90</td> <td>&nbsp;</td> <td>1.04</td> 
<td>&nbsp;</td> <td>0.99</td>
+ </tr>
+</table>
+<br/>
+Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+<br/>BitSet size = 1,000,000
+<br/>Results are java.util.BitSet time divided by OpenBitSet time.
+<table border="1">
+ <tr>
+  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> 
<th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ </tr>
+ <tr>
+  <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> 
<td>1.12</td> <td>1.25</td>
+ </tr>
+ <tr>
+   <th>1% full</th> <td>2.51</td> <td>3.49</td> <td>&nbsp;</td> <td>1.00</td> 
<td>&nbsp;</td> <td>1.02</td>
+ </tr>
+</table>
+ */
+
+public class OpenBitSet implements Cloneable, Serializable {
+  protected long[] bits;
+  protected int wlen;   // number of words (elements) used in the array
+
+  /** Constructs an OpenBitSet large enough to hold numBits.
+   *
+   * @param numBits
+   */
+  public OpenBitSet(long numBits) {
+    bits = new long[bits2words(numBits)];
+    wlen = bits.length;
+  }
+
+  public OpenBitSet() {
+    this(64);
+  }
+
+  /** Constructs an OpenBitSet from an existing long[].
+   * <br/>
+   * The first 64 bits are in long[0],
+   * with bit index 0 at the least significant bit, and bit index 63 at the 
most significant.
+   * Given a bit index,
+   * the word containing it is long[index/64], and it is at bit number 
index%64 within that word.
+   * <p>
+   * numWords are the number of elements in the array that contain
+   * set bits (non-zero longs).
+   * numWords should be &lt= bits.length, and
+   * any existing words in the array at position &gt= numWords should be zero.
+   *
+   */
+  public OpenBitSet(long[] bits, int numWords) {
+    this.bits = bits;
+    this.wlen = numWords;
+  }
+
+
+  /** Contructs an OpenBitset from a BitSet
+  */
+  public OpenBitSet(BitSet bits) {
+    this(bits.length());
+  }
+
+  /** Returns the current capacity in bits (1 greater than the index of the 
last bit) */
+  public long capacity() { return bits.length << 6; }
+
+ /**
+  * Returns the current capacity of this set.  Included for
+  * compatibility.  This is *not* equal to {...@link #cardinality}
+  */
+  public long size() {
+      return capacity();
+  }
+
+  // @Override -- not until Java 1.6
+  public int length() {
+    return bits.length << 6;
+  }
+
+  /** Returns true if there are no set bits */
+  public boolean isEmpty() { return cardinality()==0; }
+
+  /** Expert: returns the long[] storing the bits */
+  public long[] getBits() { return bits; }
+
+  /** Expert: sets a new long[] to use as the bit storage */
+  public void setBits(long[] bits) { this.bits = bits; }
+
+  /** Expert: gets the number of longs in the array that are in use */
+  public int getNumWords() { return wlen; }
+
+  /** Expert: sets the number of longs in the array that are in use */
+  public void setNumWords(int nWords) { this.wlen=nWords; }
+
+
+
+  /** Returns true or false for the specified bit index. */
+  public boolean get(int index) {
+    int i = index >> 6;               // div 64
+    // signed shift will keep a negative index and force an
+    // array-index-out-of-bounds-exception, removing the need for an explicit 
check.
+    if (i>=bits.length) return false;
+
+    int bit = index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+
+ /** Returns true or false for the specified bit index.
+   * The index should be less than the OpenBitSet size
+   */
+  public boolean fastGet(int index) {
+    int i = index >> 6;               // div 64
+    // signed shift will keep a negative index and force an
+    // array-index-out-of-bounds-exception, removing the need for an explicit 
check.
+    int bit = index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+
+
+ /** Returns true or false for the specified bit index
+  */
+  public boolean get(long index) {
+    int i = (int)(index >> 6);             // div 64
+    if (i>=bits.length) return false;
+    int bit = (int)index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+  /** Returns true or false for the specified bit index.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean fastGet(long index) {
+    int i = (int)(index >> 6);               // div 64
+    int bit = (int)index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+  /*
+  // alternate implementation of get()
+  public boolean get1(int index) {
+    int i = index >> 6;                // div 64
+    int bit = index & 0x3f;            // mod 64
+    return ((bits[i]>>>bit) & 0x01) != 0;
+    // this does a long shift and a bittest (on x86) vs
+    // a long shift, and a long AND, (the test for zero is prob a no-op)
+    // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+  }
+  */
+
+
+  /** returns 1 if the bit is set, 0 if not.
+   * The index should be less than the OpenBitSet size
+   */
+  public int getBit(int index) {
+    int i = index >> 6;                // div 64
+    int bit = index & 0x3f;            // mod 64
+    return ((int)(bits[i]>>>bit)) & 0x01;
+  }
+
+
+  /*
+  public boolean get2(int index) {
+    int word = index >> 6;            // div 64
+    int bit = index & 0x0000003f;     // mod 64
+    return (bits[word] << bit) < 0;   // hmmm, this would work if bit order 
were reversed
+    // we could right shift and check for parity bit, if it was available to 
us.
+  }
+  */
+
+  /** sets a bit, expanding the set size if necessary */
+  public void set(long index) {
+    int wordNum = expandingWordNum(index);
+    int bit = (int)index & 0x3f;
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+
+ /** Sets the bit at the specified index.
+  * The index should be less than the OpenBitSet size.
+  */
+  public void fastSet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+ /** Sets the bit at the specified index.
+  * The index should be less than the OpenBitSet size.
+  */
+  public void fastSet(long index) {
+    int wordNum = (int)(index >> 6);
+    int bit = (int)index & 0x3f;
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+  /** Sets a range of bits, expanding the set size if necessary
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to set
+   */
+  public void set(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (int)(startIndex>>6);
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = expandingWordNum(endIndex-1);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
+
+    if (startWord == endWord) {
+      bits[startWord] |= (startmask & endmask);
+      return;
+    }
+
+    bits[startWord] |= startmask;
+    Arrays.fill(bits, startWord+1, endWord, -1L);
+    bits[endWord] |= endmask;
+  }
+
+
+
+  protected int expandingWordNum(long index) {
+    int wordNum = (int)(index >> 6);
+    if (wordNum>=wlen) {
+      ensureCapacity(index+1);
+      wlen = wordNum+1;
+    }
+    return wordNum;
+  }
+
+
+  /** clears a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastClear(int index) {
+    int wordNum = index >> 6;
+    int bit = index & 0x03f;
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+    // hmmm, it takes one more instruction to clear than it does to set... any
+    // way to work around this?  If there were only 63 bits per word, we could
+    // use a right shift of 10111111...111 in binary to position the 0 in the
+    // correct place (using sign extension).
+    // Could also use Long.rotateRight() or rotateLeft() *if* they were 
converted
+    // by the JVM into a native instruction.
+    // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+  }
+
+  /** clears a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastClear(long index) {
+    int wordNum = (int)(index >> 6); // div 64
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+  }
+
+  /** clears a bit, allowing access beyond the current set size without 
changing the size.*/
+  public void clear(long index) {
+    int wordNum = (int)(index >> 6); // div 64
+    if (wordNum>=wlen) return;
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+  }
+
+  /** Clears a range of bits.  Clearing past the end does not change the size 
of the set.
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to clear
+   */
+  public void clear(int startIndex, int endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (startIndex>>6);
+    if (startWord >= wlen) return;
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = ((endIndex-1)>>6);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
+
+    // invert masks since we are clearing
+    startmask = ~startmask;
+    endmask = ~endmask;
+
+    if (startWord == endWord) {
+      bits[startWord] &= (startmask | endmask);
+      return;
+    }
+
+    bits[startWord] &= startmask;
+
+    int middle = Math.min(wlen, endWord);
+    Arrays.fill(bits, startWord+1, middle, 0L);
+    if (endWord < wlen) {
+      bits[endWord] &= endmask;
+    }
+  }
+
+
+  /** Clears a range of bits.  Clearing past the end does not change the size 
of the set.
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to clear
+   */
+  public void clear(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (int)(startIndex>>6);
+    if (startWord >= wlen) return;
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = (int)((endIndex-1)>>6);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
+
+    // invert masks since we are clearing
+    startmask = ~startmask;
+    endmask = ~endmask;
+
+    if (startWord == endWord) {
+      bits[startWord] &= (startmask | endmask);
+      return;
+    }
+
+    bits[startWord] &= startmask;
+
+    int middle = Math.min(wlen, endWord);
+    Arrays.fill(bits, startWord+1, middle, 0L);
+    if (endWord < wlen) {
+      bits[endWord] &= endmask;
+    }
+  }
+
+
+
+  /** Sets a bit and returns the previous value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean getAndSet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    boolean val = (bits[wordNum] & bitmask) != 0;
+    bits[wordNum] |= bitmask;
+    return val;
+  }
+
+  /** Sets a bit and returns the previous value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean getAndSet(long index) {
+    int wordNum = (int)(index >> 6);      // div 64
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    boolean val = (bits[wordNum] & bitmask) != 0;
+    bits[wordNum] |= bitmask;
+    return val;
+  }
+
+  /** flips a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastFlip(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastFlip(long index) {
+    int wordNum = (int)(index >> 6);   // div 64
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit, expanding the set size if necessary */
+  public void flip(long index) {
+    int wordNum = expandingWordNum(index);
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit and returns the resulting bit value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean flipAndGet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+    return (bits[wordNum] & bitmask) != 0;
+  }
+
+  /** flips a bit and returns the resulting bit value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean flipAndGet(long index) {
+    int wordNum = (int)(index >> 6);   // div 64
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+    return (bits[wordNum] & bitmask) != 0;
+  }
+
+  /** Flips a range of bits, expanding the set size if necessary
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to flip
+   */
+  public void flip(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+    int startWord = (int)(startIndex>>6);
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = expandingWordNum(endIndex-1);
+
+    /*** Grrr, java shifting wraps around so -1L>>>64 == -1
+     * for that reason, make sure not to use endmask if the bits to flip will
+     * be zero in the last word (redefine endWord to be the last changed...)
+    long startmask = -1L << (startIndex & 0x3f);     // example: 11111...111000
+    long endmask = -1L >>> (64-(endIndex & 0x3f));   // example: 00111...111111
+    ***/
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as 
-endIndex due to wrap
+
+    if (startWord == endWord) {
+      bits[startWord] ^= (startmask & endmask);
+      return;
+    }
+
+    bits[startWord] ^= startmask;
+
+    for (int i=startWord+1; i<endWord; i++) {
+      bits[i] = ~bits[i];
+    }
+
+    bits[endWord] ^= endmask;
+  }
+
+
+  /*
+  public static int pop(long v0, long v1, long v2, long v3) {
+    // derived from pop_array by setting last four elems to 0.
+    // exchanges one pop() call for 10 elementary operations
+    // saving about 7 instructions... is there a better way?
+      long twosA=v0 & v1;
+      long ones=v0^v1;
+
+      long u2=ones^v2;
+      long twosB =(ones&v2)|(u2&v3);
+      ones=u2^v3;
+
+      long fours=(twosA&twosB);
+      long twos=twosA^twosB;
+
+      return (pop(fours)<<2)
+             + (pop(twos)<<1)
+             + pop(ones);
+
+  }
+  */
+
+
+  /** @return the number of set bits */
+  public long cardinality() {
+    return BitUtil.pop_array(bits,0,wlen);
+  }
+
+ /** Returns the popcount or cardinality of the intersection of the two sets.
+   * Neither set is modified.
+   */
+  public static long intersectionCount(OpenBitSet a, OpenBitSet b) {
+    return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+ }
+
+  /** Returns the popcount or cardinality of the union of the two sets.
+    * Neither set is modified.
+    */
+  public static long unionCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen < b.wlen) {
+      tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
+    } else if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+  /** Returns the popcount or cardinality of "a and not b"
+   * or "intersection(a, not(b))".
+   * Neither set is modified.
+   */
+  public static long andNotCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+ /** Returns the popcount or cardinality of the exclusive-or of the two sets.
+  * Neither set is modified.
+  */
+  public static long xorCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen < b.wlen) {
+      tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
+    } else if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+
+  /** Returns the index of the first set bit starting at the index specified.
+   *  -1 is returned if there are no more set bits.
+   */
+  public int nextSetBit(int index) {
+    int i = index>>6;
+    if (i>=wlen) return -1;
+    int subIndex = index & 0x3f;      // index within the word
+    long word = bits[i] >> subIndex;  // skip all the bits to the right of 
index
+
+    if (word!=0) {
+      return (i<<6) + subIndex + BitUtil.ntz(word);
+    }
+
+    while(++i < wlen) {
+      word = bits[i];
+      if (word!=0) return (i<<6) + BitUtil.ntz(word);
+    }
+
+    return -1;
+  }
+
+  /** Returns the index of the first set bit starting at the index specified.
+   *  -1 is returned if there are no more set bits.
+   */
+  public long nextSetBit(long index) {
+    int i = (int)(index>>>6);
+    if (i>=wlen) return -1;
+    int subIndex = (int)index & 0x3f; // index within the word
+    long word = bits[i] >>> subIndex;  // skip all the bits to the right of 
index
+
+    if (word!=0) {
+      return (((long)i)<<6) + (subIndex + BitUtil.ntz(word));
+    }
+
+    while(++i < wlen) {
+      word = bits[i];
+      if (word!=0) return (((long)i)<<6) + BitUtil.ntz(word);
+    }
+
+    return -1;
+  }
+
+
+
+
+  @Override
+  public Object clone() {
+    try {
+      OpenBitSet obs = (OpenBitSet)super.clone();
+      obs.bits = obs.bits.clone();  // hopefully an array clone is as fast(er) 
than arraycopy
+      return obs;
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** this = this AND other */
+  public void intersect(OpenBitSet other) {
+    int newLen= Math.min(this.wlen,other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    // testing against zero can be more efficient
+    int pos=newLen;
+    while(--pos>=0) {
+      thisArr[pos] &= otherArr[pos];
+    }
+    if (this.wlen > newLen) {
+      // fill zeros from the new shorter length to the old length
+      Arrays.fill(bits,newLen,this.wlen,0);
+    }
+    this.wlen = newLen;
+  }
+
+  /** this = this OR other */
+  public void union(OpenBitSet other) {
+    int newLen = Math.max(wlen,other.wlen);
+    ensureCapacityWords(newLen);
+
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    int pos=Math.min(wlen,other.wlen);
+    while(--pos>=0) {
+      thisArr[pos] |= otherArr[pos];
+    }
+    if (this.wlen < newLen) {
+      System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, 
newLen-this.wlen);
+    }
+    this.wlen = newLen;
+  }
+
+
+  /** Remove all elements set in other. this = this AND_NOT other */
+  public void remove(OpenBitSet other) {
+    int idx = Math.min(wlen,other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    while(--idx>=0) {
+      thisArr[idx] &= ~otherArr[idx];
+    }
+  }
+
+  /** this = this XOR other */
+  public void xor(OpenBitSet other) {
+    int newLen = Math.max(wlen,other.wlen);
+    ensureCapacityWords(newLen);
+
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    int pos=Math.min(wlen,other.wlen);
+    while(--pos>=0) {
+      thisArr[pos] ^= otherArr[pos];
+    }
+    if (this.wlen < newLen) {
+      System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, 
newLen-this.wlen);
+    }
+    this.wlen = newLen;
+  }
+
+
+  // some BitSet compatability methods
+
+  //** see {...@link intersect} */
+  public void and(OpenBitSet other) {
+    intersect(other);
+  }
+
+  //** see {...@link union} */
+  public void or(OpenBitSet other) {
+    union(other);
+  }
+
+  //** see {...@link andNot} */
+  public void andNot(OpenBitSet other) {
+    remove(other);
+  }
+
+  /** returns true if the sets have any elements in common */
+  public boolean intersects(OpenBitSet other) {
+    int pos = Math.min(this.wlen, other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    while (--pos>=0) {
+      if ((thisArr[pos] & otherArr[pos])!=0) return true;
+    }
+    return false;
+  }
+
+
+
+  /** Expand the long[] with the size given as a number of words (64 bit 
longs).
+   * getNumWords() is unchanged by this call.
+   */
+  public void ensureCapacityWords(int numWords) {
+    if (bits.length < numWords) {
+      bits = ArrayUtil.grow(bits, numWords);
+    }
+  }
+
+  /** Ensure that the long[] is big enough to hold numBits, expanding it if 
necessary.
+   * getNumWords() is unchanged by this call.
+   */
+  public void ensureCapacity(long numBits) {
+    ensureCapacityWords(bits2words(numBits));
+  }
+
+  /** Lowers numWords, the number of words in use,
+   * by checking for trailing zero words.
+   */
+  public void trimTrailingZeros() {
+    int idx = wlen-1;
+    while (idx>=0 && bits[idx]==0) idx--;
+    wlen = idx+1;
+  }
+
+  /** returns the number of 64 bit words it would take to hold numBits */
+  public static int bits2words(long numBits) {
+   return (int)(((numBits-1)>>>6)+1);
+  }
+
+
+  /** returns true if both sets have the same bits set */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof OpenBitSet)) return false;
+    OpenBitSet a;
+    OpenBitSet b = (OpenBitSet)o;
+    // make a the larger set.
+    if (b.wlen > this.wlen) {
+      a = b; b=this;
+    } else {
+      a=this;
+    }
+
+    // check for any set bits out of the range of b
+    for (int i=a.wlen-1; i>=b.wlen; i--) {
+      if (a.bits[i]!=0) return false;
+    }
+
+    for (int i=b.wlen-1; i>=0; i--) {
+      if (a.bits[i] != b.bits[i]) return false;
+    }
+
+    return true;
+  }
+
+
+  @Override
+  public int hashCode() {
+    // Start with a zero hash and use a mix that results in zero if the input 
is zero.
+    // This effectively truncates trailing zeros without an explicit check.
+    long h = 0;
+    for (int i = bits.length; --i>=0;) {
+      h ^= bits[i];
+      h = (h << 1) | (h >>> 63); // rotate left
+    }
+    // fold leftmost bits into right and add a constant to prevent
+    // empty sets from returning 0, which is too common.
+    return (int)((h>>32) ^ h) + 0x98761234;
+  }
+
+}
+
+

Modified: 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java?rev=1052542&r1=1052541&r2=1052542&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
 (original)
+++ 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongBloomFilterTest.java
 Fri Dec 24 17:58:22 2010
@@ -1,21 +1,21 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*    http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
 package org.apache.cassandra.utils;
 
 import java.io.IOException;
@@ -31,31 +31,35 @@ public class LongBloomFilterTest
      * NB: needs to run with -mx1G
      */
     @Test
-    public void testBigInt() {
+    public void testBigInt()
+    {
         int size = 10 * 1000 * 1000;
-        bf = BloomFilter.getFilter(size, FilterTest.spec.bucketsPerElement);
-        FilterTest.testFalsePositives(bf,
-                                      new KeyGenerator.IntGenerator(size),
-                                      new KeyGenerator.IntGenerator(size, size 
* 2));
+        bf = BloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        FilterTestHelper.testFalsePositives(bf,
+                                            new 
KeyGenerator.IntGenerator(size),
+                                            new 
KeyGenerator.IntGenerator(size, size * 2));
     }
 
     @Test
-    public void testBigRandom() {
+    public void testBigRandom()
+    {
         int size = 10 * 1000 * 1000;
-        bf = BloomFilter.getFilter(size, FilterTest.spec.bucketsPerElement);
-        FilterTest.testFalsePositives(bf,
-                                      new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
-                                      new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+        bf = BloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        FilterTestHelper.testFalsePositives(bf,
+                                            new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                            new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
     }
 
     @Test
-    public void timeit() {
-        int size = 300 * FilterTest.ELEMENTS;
-        bf = BloomFilter.getFilter(size, FilterTest.spec.bucketsPerElement);
-        for (int i = 0; i < 10; i++) {
-            FilterTest.testFalsePositives(bf,
-                                          new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
-                                          new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+    public void timeit()
+    {
+        int size = 300 * FilterTestHelper.ELEMENTS;
+        bf = BloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        for (int i = 0; i < 10; i++)
+        {
+            FilterTestHelper.testFalsePositives(bf,
+                                                new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                                new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
             bf.clear();
         }
     }

Added: 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongLegacyBloomFilterTest.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongLegacyBloomFilterTest.java?rev=1052542&view=auto
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongLegacyBloomFilterTest.java
 (added)
+++ 
cassandra/branches/cassandra-0.7/test/long/org/apache/cassandra/utils/LongLegacyBloomFilterTest.java
 Fri Dec 24 17:58:22 2010
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.cassandra.utils;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.junit.Test;
+
+public class LongLegacyBloomFilterTest
+{
+    public LegacyBloomFilter bf;
+
+    /**
+     * NB: needs to run with -mx1G
+     */
+    @Test
+    public void testBigInt()
+    {
+        int size = 10 * 1000 * 1000;
+        bf = LegacyBloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        FilterTestHelper.testFalsePositives(bf,
+                                            new 
KeyGenerator.IntGenerator(size),
+                                            new 
KeyGenerator.IntGenerator(size, size * 2));
+    }
+
+    @Test
+    public void testBigRandom()
+    {
+        int size = 10 * 1000 * 1000;
+        bf = LegacyBloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        FilterTestHelper.testFalsePositives(bf,
+                                            new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                            new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+    }
+
+    @Test
+    public void timeit()
+    {
+        int size = 300 * FilterTestHelper.ELEMENTS;
+        bf = LegacyBloomFilter.getFilter(size, 
FilterTestHelper.spec.bucketsPerElement);
+        for (int i = 0; i < 10; i++)
+        {
+            FilterTestHelper.testFalsePositives(bf,
+                                                new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size),
+                                                new 
KeyGenerator.RandomStringGenerator(new Random().nextInt(), size));
+            bf.clear();
+        }
+    }
+}

Modified: 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/io/LazilyCompactedRowTest.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/io/LazilyCompactedRowTest.java?rev=1052542&r1=1052541&r2=1052542&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/io/LazilyCompactedRowTest.java
 (original)
+++ 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/io/LazilyCompactedRowTest.java
 Fri Dec 24 17:58:22 2010
@@ -81,8 +81,8 @@ public class LazilyCompactedRowTest exte
             assertEquals(out1.getLength(), rowSize1 + 8);
             assertEquals(out2.getLength(), rowSize2 + 8);
             // bloom filter
-            IndexHelper.defreezeBloomFilter(in1);
-            IndexHelper.defreezeBloomFilter(in2);
+            IndexHelper.defreezeBloomFilter(in1, false);
+            IndexHelper.defreezeBloomFilter(in2, false);
             // index
             int indexSize1 = in1.readInt();
             int indexSize2 = in2.readInt();

Modified: 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/BloomFilterTest.java?rev=1052542&r1=1052541&r2=1052542&view=diff
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
 (original)
+++ 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/BloomFilterTest.java
 Fri Dec 24 17:58:22 2010
@@ -20,6 +20,13 @@ package org.apache.cassandra.utils;
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+
+import org.apache.cassandra.io.util.DataOutputBuffer;
 
 import org.junit.Before;
 import org.junit.Test;
@@ -30,16 +37,31 @@ public class BloomFilterTest
 
     public BloomFilterTest()
     {
-        bf = BloomFilter.getFilter(FilterTest.ELEMENTS, 
FilterTest.MAX_FAILURE_RATE);
+        bf = BloomFilter.getFilter(10000L, FilterTestHelper.MAX_FAILURE_RATE);
+    }
+
+    public static BloomFilter testSerialize(BloomFilter f) throws IOException
+    {
+        f.add(ByteBufferUtil.bytes("a"));
+        DataOutputBuffer out = new DataOutputBuffer();
+        f.serializer().serialize(f, out);
+
+        ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, 
out.getLength());
+        BloomFilter f2 = f.serializer().deserialize(new DataInputStream(in));
+
+        assert f2.isPresent(ByteBufferUtil.bytes("a"));
+        assert !f2.isPresent(ByteBufferUtil.bytes("b"));
+        return f2;
     }
 
+
     @Before
     public void clear()
     {
         bf.clear();
     }
 
-    @Test(expected=UnsupportedOperationException.class)
+    @Test(expected = UnsupportedOperationException.class)
     public void testBloomLimits1()
     {
         int maxBuckets = BloomCalculations.probs.length - 1;
@@ -63,13 +85,13 @@ public class BloomFilterTest
     @Test
     public void testFalsePositivesInt()
     {
-        FilterTest.testFalsePositives(bf, FilterTest.intKeys(), 
FilterTest.randomKeys2());
+        FilterTestHelper.testFalsePositives(bf, FilterTestHelper.intKeys(), 
FilterTestHelper.randomKeys2());
     }
 
     @Test
     public void testFalsePositivesRandom()
     {
-        FilterTest.testFalsePositives(bf, FilterTest.randomKeys(), 
FilterTest.randomKeys2());
+        FilterTestHelper.testFalsePositives(bf, FilterTestHelper.randomKeys(), 
FilterTestHelper.randomKeys2());
     }
 
     @Test
@@ -79,16 +101,40 @@ public class BloomFilterTest
         {
             return;
         }
-        BloomFilter bf2 = 
BloomFilter.getFilter(KeyGenerator.WordGenerator.WORDS / 2, 
FilterTest.MAX_FAILURE_RATE);
+        BloomFilter bf2 = 
BloomFilter.getFilter(KeyGenerator.WordGenerator.WORDS / 2, 
FilterTestHelper.MAX_FAILURE_RATE);
         int skipEven = KeyGenerator.WordGenerator.WORDS % 2 == 0 ? 0 : 2;
-        FilterTest.testFalsePositives(bf2,
-                                      new KeyGenerator.WordGenerator(skipEven, 
2),
-                                      new KeyGenerator.WordGenerator(1, 2));
+        FilterTestHelper.testFalsePositives(bf2,
+                                            new 
KeyGenerator.WordGenerator(skipEven, 2),
+                                            new KeyGenerator.WordGenerator(1, 
2));
     }
 
     @Test
     public void testSerialize() throws IOException
     {
-        FilterTest.testSerialize(bf);
+        BloomFilterTest.testSerialize(bf);
+    }
+
+    public void testManyHashes(Iterator<ByteBuffer> keys)
+    {
+        int MAX_HASH_COUNT = 128;
+        Set<Long> hashes = new HashSet<Long>();
+        long collisions = 0;
+        while (keys.hasNext())
+        {
+            hashes.clear();
+            ByteBuffer buf = keys.next();
+            for (long hashIndex : BloomFilter.getHashBuckets(buf, 
MAX_HASH_COUNT, 1024 * 1024))
+            {
+                hashes.add(hashIndex);
+            }
+            collisions += (MAX_HASH_COUNT - hashes.size());
+        }
+        assert collisions <= 100;
+    }
+
+    @Test
+    public void testManyRandom()
+    {
+        testManyHashes(FilterTestHelper.randomKeys());
     }
 }

Added: 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/FilterTestHelper.java?rev=1052542&view=auto
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
 (added)
+++ 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/FilterTestHelper.java
 Fri Dec 24 17:58:22 2010
@@ -0,0 +1,82 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+package org.apache.cassandra.utils;
+
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import org.apache.cassandra.io.util.DataOutputBuffer;
+import org.junit.Test;
+
+public class FilterTestHelper
+{
+    // used by filter subclass tests
+
+    static final double MAX_FAILURE_RATE = 0.1;
+    public static final BloomCalculations.BloomSpecification spec = 
BloomCalculations.computeBloomSpec(15, MAX_FAILURE_RATE);
+    static final int ELEMENTS = 10000;
+
+    static final ResetableIterator<ByteBuffer> intKeys()
+    {
+        return new KeyGenerator.IntGenerator(ELEMENTS);
+    }
+
+    static final ResetableIterator<ByteBuffer> randomKeys()
+    {
+        return new KeyGenerator.RandomStringGenerator(314159, ELEMENTS);
+    }
+
+    static final ResetableIterator<ByteBuffer> randomKeys2()
+    {
+        return new KeyGenerator.RandomStringGenerator(271828, ELEMENTS);
+    }
+
+    public static void testFalsePositives(Filter f, 
ResetableIterator<ByteBuffer> keys, ResetableIterator<ByteBuffer> otherkeys)
+    {
+        assert keys.size() == otherkeys.size();
+
+        while (keys.hasNext())
+        {
+            f.add(keys.next());
+        }
+
+        int fp = 0;
+        while (otherkeys.hasNext())
+        {
+            if (f.isPresent(otherkeys.next()))
+            {
+                fp++;
+            }
+        }
+
+        double fp_ratio = fp / (keys.size() * 
BloomCalculations.probs[spec.bucketsPerElement][spec.K]);
+        assert fp_ratio < 1.03 : fp_ratio;
+    }
+
+    public void testTrue()
+    {
+      assert true;
+    }
+
+}

Added: 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
URL: 
http://svn.apache.org/viewvc/cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java?rev=1052542&view=auto
==============================================================================
--- 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
 (added)
+++ 
cassandra/branches/cassandra-0.7/test/unit/org/apache/cassandra/utils/LegacyBloomFilterTest.java
 Fri Dec 24 17:58:22 2010
@@ -0,0 +1,138 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+package org.apache.cassandra.utils;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+import java.io.ByteArrayInputStream;
+import java.io.DataInputStream;
+import org.apache.cassandra.io.util.DataOutputBuffer;
+
+import org.junit.Before;
+import org.junit.Test;
+
+public class LegacyBloomFilterTest
+{
+    public LegacyBloomFilter bf;
+
+    public LegacyBloomFilterTest()
+    {
+        bf = LegacyBloomFilter.getFilter(FilterTestHelper.ELEMENTS, 
FilterTestHelper.MAX_FAILURE_RATE);
+    }
+
+    public static Filter testSerialize(LegacyBloomFilter f) throws IOException
+    {
+        f.add(ByteBufferUtil.bytes("a"));
+        DataOutputBuffer out = new DataOutputBuffer();
+        f.serializer().serialize(f, out);
+
+        ByteArrayInputStream in = new ByteArrayInputStream(out.getData(), 0, 
out.getLength());
+        LegacyBloomFilter f2 = f.serializer().deserialize(new 
DataInputStream(in));
+
+        assert f2.isPresent(ByteBufferUtil.bytes("a"));
+        assert !f2.isPresent(ByteBufferUtil.bytes("b"));
+        return f2;
+    }
+
+
+    @Before
+    public void clear()
+    {
+        bf.clear();
+    }
+
+    @Test(expected=UnsupportedOperationException.class)
+    public void testBloomLimits1()
+    {
+        int maxBuckets = BloomCalculations.probs.length - 1;
+        int maxK = BloomCalculations.probs[maxBuckets].length - 1;
+
+        // possible
+        BloomCalculations.computeBloomSpec(maxBuckets, 
BloomCalculations.probs[maxBuckets][maxK]);
+
+        // impossible, throws
+        BloomCalculations.computeBloomSpec(maxBuckets, 
BloomCalculations.probs[maxBuckets][maxK] / 2);
+    }
+
+    @Test
+    public void testOne()
+    {
+        bf.add(ByteBufferUtil.bytes("a"));
+        assert bf.isPresent(ByteBufferUtil.bytes("a"));
+        assert !bf.isPresent(ByteBufferUtil.bytes("b"));
+    }
+
+    @Test
+    public void testFalsePositivesInt()
+    {
+        FilterTestHelper.testFalsePositives(bf, FilterTestHelper.intKeys(), 
FilterTestHelper.randomKeys2());
+    }
+
+    @Test
+    public void testFalsePositivesRandom()
+    {
+        FilterTestHelper.testFalsePositives(bf, FilterTestHelper.randomKeys(), 
FilterTestHelper.randomKeys2());
+    }
+
+    @Test
+    public void testWords()
+    {
+        if (KeyGenerator.WordGenerator.WORDS == 0)
+        {
+            return;
+        }
+        LegacyBloomFilter bf2 = 
LegacyBloomFilter.getFilter(KeyGenerator.WordGenerator.WORDS / 2, 
FilterTestHelper.MAX_FAILURE_RATE);
+        int skipEven = KeyGenerator.WordGenerator.WORDS % 2 == 0 ? 0 : 2;
+        FilterTestHelper.testFalsePositives(bf2,
+                                      new KeyGenerator.WordGenerator(skipEven, 
2),
+                                      new KeyGenerator.WordGenerator(1, 2));
+    }
+
+    @Test
+    public void testSerialize() throws IOException
+    {
+        LegacyBloomFilterTest.testSerialize(bf);
+    }
+
+    public void testManyHashes(Iterator<ByteBuffer> keys)
+    {
+        int MAX_HASH_COUNT = 128;
+        Set<Integer> hashes = new HashSet<Integer>();
+        int collisions = 0;
+        while (keys.hasNext())
+        {
+            hashes.clear();
+            for (int hashIndex : LegacyBloomFilter.getHashBuckets(keys.next(), 
MAX_HASH_COUNT, 1024 * 1024))
+            {
+                hashes.add(hashIndex);
+            }
+            collisions += (MAX_HASH_COUNT - hashes.size());
+        }
+        assert collisions <= 100;
+    }
+
+    @Test
+    public void testManyRandom()
+    {
+        testManyHashes(FilterTestHelper.randomKeys());
+    }
+}


Reply via email to