This is an automated email from the ASF dual-hosted git repository. jmalkin pushed a commit to branch common_bitarray in repository https://gitbox.apache.org/repos/asf/datasketches-java.git
commit 158b91b92a647581881bc8d453c2370aafe3fd7a Author: jmalkin <[email protected]> AuthorDate: Thu May 30 22:26:35 2024 -0700 move BloomFilter's underlying BitArray into common directory, update its API so QuotientFilter can use it --- .../datasketches/filters/bloomfilter/BitArray.java | 135 --------- .../filters/bloomfilter/BloomFilter.java | 3 + .../datasketches/filters/common/BitArray.java | 302 +++++++++++++++++++ .../{bloomfilter => common}/DirectBitArray.java | 98 ++++-- .../{bloomfilter => common}/DirectBitArrayR.java | 86 +++++- .../{bloomfilter => common}/HeapBitArray.java | 128 ++++++-- .../filters/quotientfilter/Bitmap.java | 35 --- .../filters/quotientfilter/QuickBitVector.java | 328 --------------------- .../quotientfilter/QuickBitVectorWrapper.java | 62 ---- .../filters/quotientfilter/QuotientFilter.java | 36 ++- .../quotientfilter/QuotientFilterBuilder.java | 2 +- .../DirectBitArrayRTest.java | 26 +- .../DirectBitArrayTest.java | 68 ++++- .../{bloomfilter => common}/HeapBitArrayTest.java | 55 +++- .../filters/quotientfilter/BitVectorTests.java | 82 ------ .../filters/quotientfilter/QuotientFilterTest.java | 45 +-- 16 files changed, 756 insertions(+), 735 deletions(-) diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java deleted file mode 100644 index bfa696ca..00000000 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BitArray.java +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.bloomfilter; - -import static org.apache.datasketches.common.Util.LS; - -import org.apache.datasketches.common.SketchesArgumentException; -import org.apache.datasketches.memory.Buffer; -import org.apache.datasketches.memory.Memory; -import org.apache.datasketches.memory.WritableMemory; - -/** - * This class holds an array of bits suitable for use in a Bloom Filter - * - * <p>Rounds the number of bits up to the smallest multiple of 64 (one long) - * that is not smaller than the specified number. - */ -abstract class BitArray { - // MAX_BITS using longs, based on array indices being capped at Integer.MAX_VALUE - protected static final long MAX_BITS = Integer.MAX_VALUE * (long) Long.SIZE; - - protected BitArray() {} - - static BitArray heapify(final Buffer mem, final boolean isEmpty) { - return HeapBitArray.heapify(mem, isEmpty); - } - - static BitArray wrap(final Memory mem, final boolean isEmpty) { - return DirectBitArrayR.wrap(mem, isEmpty); - } - - static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { - return DirectBitArray.writableWrap(wmem, isEmpty); - } - - boolean isEmpty() { - return !isDirty() && getNumBitsSet() == 0; - } - - abstract boolean hasMemory(); - - abstract boolean isDirect(); - - abstract boolean isReadOnly(); - - abstract boolean getBit(final long index); - - abstract boolean getAndSetBit(final long index); - - abstract void setBit(final long index); - - abstract long getNumBitsSet(); - - abstract void reset(); - - abstract long getCapacity(); - - abstract int getArrayLength(); - - abstract void union(final BitArray other); - - abstract void intersect(final BitArray other); - - abstract void invert(); - - // prints the raw BitArray as 0s and 1s, one long per row - @Override - public String toString() { - final StringBuilder sb = new StringBuilder(); - for (int i = 0; i < getArrayLength(); ++i) { - sb.append(i + ": ") - .append(printLong(getLong(i))) - .append(LS); - } - return sb.toString(); - } - - long getSerializedSizeBytes() { - // We only really need an int for array length but this will keep everything - // aligned to 8 bytes. - // Always write array length, but write numBitsSet only if empty - return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); - } - - // returns the number of bytes needed for a non-empty BitArray of the requested size - static long getSerializedSizeBytes(final long numBits) { - if (numBits <= 0) { - throw new SketchesArgumentException("Requested number of bits must be strictly positive"); - } - if (numBits > MAX_BITS) { - throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " - + "Requested: " + numBits + ", maximum: " + MAX_BITS); - } - final int numLongs = (int) Math.ceil(numBits / 64.0); - return Long.BYTES * (numLongs + 2L); - } - - abstract protected boolean isDirty(); - - // used to get a long from the array regardless of underlying storage - // NOT used to query individual bits - abstract protected long getLong(final int arrayIndex); - - // used to set a long in the array regardless of underlying storage - // NOT used to set individual bits - abstract protected void setLong(final int arrayIndex, final long value); - - // prints a long as a series of 0s and 1s as little endian - protected static String printLong(final long val) { - final StringBuilder sb = new StringBuilder(); - for (int j = 0; j < Long.SIZE; ++j) { - sb.append((val & (1L << j)) != 0 ? "1" : "0"); - if (j % 8 == 7) { sb.append(" "); } - } - return sb.toString(); - } - -} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java index 3ea73b9b..10829d7b 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java +++ b/src/main/java/org/apache/datasketches/filters/bloomfilter/BloomFilter.java @@ -26,6 +26,9 @@ import java.nio.charset.StandardCharsets; import org.apache.datasketches.common.Family; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesStateException; +import org.apache.datasketches.filters.common.BitArray; +import org.apache.datasketches.filters.common.DirectBitArray; +import org.apache.datasketches.filters.common.HeapBitArray; import org.apache.datasketches.memory.Buffer; import org.apache.datasketches.memory.Memory; import org.apache.datasketches.memory.WritableBuffer; diff --git a/src/main/java/org/apache/datasketches/filters/common/BitArray.java b/src/main/java/org/apache/datasketches/filters/common/BitArray.java new file mode 100644 index 00000000..2fd2a49e --- /dev/null +++ b/src/main/java/org/apache/datasketches/filters/common/BitArray.java @@ -0,0 +1,302 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.datasketches.filters.common; + +import static org.apache.datasketches.common.Util.LS; + +import org.apache.datasketches.common.SketchesArgumentException; +import org.apache.datasketches.memory.Buffer; +import org.apache.datasketches.memory.Memory; +import org.apache.datasketches.memory.WritableMemory; + +/** + * This class holds an array of bits suitable for use in a Bloom Filter + * + * <p>Rounds the number of bits up to the smallest multiple of 64 (one long) + * that is not smaller than the specified number. + */ +public abstract class BitArray { + + /** + * The maximum number of bits that can be represented using longs, + * based on array indices being capped at Integer.MAX_VALUE + * and allowing room for encoding both the size and the number of bits set. + */ + protected static final long MAX_BITS = (Integer.MAX_VALUE - 1) * (long) Long.SIZE; + + /** + * Constructs a new BitArray. + */ + BitArray() {} + + /** + * Creates a BitArray from a given Buffer. + * + * @param mem The Buffer to heapify. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The heapified BitArray. + */ + public static BitArray heapify(final Buffer mem, final boolean isEmpty) { + return HeapBitArray.heapify(mem, isEmpty); + } + + /** + * Creates a BitArray from a given Memory. + * + * @param mem The Memory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The wrapped BitArray. + */ + public static BitArray wrap(final Memory mem, final boolean isEmpty) { + return DirectBitArrayR.wrap(mem, isEmpty); + } + + /** + * Creates a writable BitArray from a given WritableMemory. + * + * @param wmem The WritableMemory to wrap. + * @param isEmpty Indicates whether the BitArray is empty. + * @return The writable wrapped BitArray. + */ + public static BitArray writableWrap(final WritableMemory wmem, final boolean isEmpty) { + return DirectBitArray.writableWrap(wmem, isEmpty); + } + + /** + * Checks if the BitArray is empty. + * + * @return True if the BitArray is empty, false otherwise. + */ + public boolean isEmpty() { + return !isDirty() && getNumBitsSet() == 0; + } + + /** + * Checks if the BitArray has a backing Memory. + * + * @return True if the BitArray has a backing Memory, false otherwise. + */ + public abstract boolean hasMemory(); + + /** + * Checks if the BitArray is direct. + * + * @return True if the BitArray is direct, false otherwise. + */ + public abstract boolean isDirect(); + + /** + * Checks if the BitArray is read-only. + * + * @return True if the BitArray is read-only, false otherwise. + */ + public abstract boolean isReadOnly(); + + /** + * Gets the value of a bit at the specified index. + * + * @param index The index of the bit. + * @return The value of the bit at the specified index. + */ + public abstract boolean getBit(final long index); + + /** + * Gets the a specified number of bits starting at the given index. Limited + * to a single long (64 bits). + * + * @param index The starting index. + * @param numBits The number of bits to return. + * @return The value of the requested bits, starting at bit 0 of the result. + */ + public abstract long getBits(final long index, final int numBits); + + /** + * Gets the value of a bit at the specified index and sets it to true. + * + * @param index The index of the bit. + * @return The previous value of the bit at the specified index. + */ + public abstract boolean getAndSetBit(final long index); + + /** + * Assigns the value of a bit at the specified index to true. + * + * @param index The index of the bit. + */ + public abstract void setBit(final long index); + + /** + * Assigns the value of a bit at the specified index to false. + * + * @param index The index of the bit. + */ + public abstract void clearBit(final long index); + + /** + * Assigns the given value of a bit at the specified index. + * + * @param index The index of the bit. + * @param value The value to set the bit to. + */ + public abstract void assignBit(final long index, final boolean value); + + /** + /** + * Sets {@code numBits} starting from {@code index} to the specified value. + * Limited to a single long (64 bits). + * + * @param index the starting index of the range (inclusive) + * @param numBits the number of bits to write + * @param bits the value to set the bits to, starting with bit 0 + */ + public abstract void setBits(final long index, final int numBits, final long bits); + + /** + * Gets the number of bits that are set to true in the BitArray. + * + * @return The number of bits set to true. + */ + public abstract long getNumBitsSet(); + + /** + * Resets the BitArray, setting all bits to false. + */ + public abstract void reset(); + + /** + * Gets the capacity of the BitArray in bits. + * + * @return The capacity of the BitArray in bits + */ + public abstract long getCapacity(); + + /** + * Gets the length of the underlying array in longs. + * + * @return The length of the underlying array in longs. + */ + public abstract int getArrayLength(); + + /** + * Performs a union operation with another BitArray. + * + * @param other The other BitArray to perform the union with. + */ + public abstract void union(final BitArray other); + + /** + * Performs an intersection operation with another BitArray. + * + * @param other The other BitArray to perform the intersection with. + */ + public abstract void intersect(final BitArray other); + + /** + * Inverts the BitArray, flipping all bits. + */ + public abstract void invert(); + + /** + * Returns a string representation of the BitArray. + * + * @return A string representation of the BitArray. + */ + @Override + public String toString() { + final StringBuilder sb = new StringBuilder(); + for (int i = 0; i < getArrayLength(); ++i) { + sb.append(i + ": ") + .append(printLong(getLong(i))) + .append(LS); + } + return sb.toString(); + } + + /** + * Gets the serialized size of the BitArray in bytes. + * + * @return The serialized size of the BitArray in bytes. + */ + public long getSerializedSizeBytes() { + // We only really need an int for array length but this will keep everything + // aligned to 8 bytes. + // Always write array length, but write numBitsSet only if empty + return Long.BYTES * (isEmpty() ? 1L : (2L + getArrayLength())); + } + + /** + * Gets the serialized size of a non-empty BitArray of the specified size in bytes. + * + * @param numBits The number of bits in the BitArray. + * @return The serialized size of the BitArray in bytes. + * @throws SketchesArgumentException If the requested number of bits is not strictly positive + * or exceeds the maximum allowed. + */ + public static long getSerializedSizeBytes(final long numBits) { + if (numBits <= 0) { + throw new SketchesArgumentException("Requested number of bits must be strictly positive"); + } + if (numBits > MAX_BITS) { + throw new SketchesArgumentException("Requested number of bits exceeds maximum allowed. " + + "Requested: " + numBits + ", maximum: " + MAX_BITS); + } + final int numLongs = (int) Math.ceil(numBits / 64.0); + return Long.BYTES * (numLongs + 2L); + } + + /** + * Checks if the BitArray has changes not reflected in state variables. + * + * @return True if the BitArray is dirty, false otherwise. + */ + abstract boolean isDirty(); + + /** + * Gets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @return The long value at the specified array index. + */ + abstract long getLong(final int arrayIndex); + + /** + * Sets the long value at the specified array index. + * + * @param arrayIndex The index of the long value in the array. + * @param value The value to set the long to. + */ + abstract void setLong(final int arrayIndex, final long value); + + /** + * Returns a string representation of a long value as a series of 0s and 1s (little endian). + * + * @param val The long value to print. + * @return A string representation of the long value. + */ + public static String printLong(final long val) { + final StringBuilder sb = new StringBuilder(); + for (int j = 0; j < Long.SIZE; ++j) { + sb.append((val & (1L << j)) != 0 ? "1" : "0"); + if (j % 8 == 7) { sb.append(" "); } + } + return sb.toString(); + } + +} diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java similarity index 62% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java index 77c24f02..ac1d6eaf 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArray.java @@ -17,21 +17,21 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.memory.WritableMemory; -final class DirectBitArray extends DirectBitArrayR { +public final class DirectBitArray extends DirectBitArrayR { - DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final long storedNumBitsSet, final WritableMemory wmem) { super(dataLength, 0, wmem); // we'll set numBitsSet_ ourselves so pass 0 // can recompute later if needed numBitsSet_ = storedNumBitsSet; } - DirectBitArray(final int dataLength, final WritableMemory wmem) { + public DirectBitArray(final int dataLength, final WritableMemory wmem) { super(dataLength, 0, wmem); wmem_.putInt(0, dataLength_); @@ -39,7 +39,7 @@ final class DirectBitArray extends DirectBitArrayR { wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } - static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { + public static DirectBitArray initialize(final long numBits, final WritableMemory wmem) { if (numBits <= 0) { throw new SketchesArgumentException("Number of bits must be strictly positive. Found: " + numBits); } @@ -58,7 +58,7 @@ final class DirectBitArray extends DirectBitArrayR { return new DirectBitArray(arrayLength, wmem); } - static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { + public static DirectBitArray writableWrap(final WritableMemory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -81,7 +81,7 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - long getNumBitsSet() { + public long getNumBitsSet() { // update numBitsSet and store in array if (isDirty()) { numBitsSet_ = 0; @@ -95,17 +95,17 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - protected boolean isDirty() { + public boolean isDirty() { return numBitsSet_ == -1; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -115,21 +115,83 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - void reset() { + public void reset() { setNumBitsSet(0); wmem_.clear(DATA_OFFSET, (long) dataLength_ * Long.BYTES); } @Override - void setBit(final long index) { + public void setBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte val = wmem_.getByte(memoryOffset); - wmem_.setBits(memoryOffset, (byte) (val | (1 << (index & 0x07)))); + wmem_.putByte(memoryOffset, (byte) (val | (1 << (index & 0x07)))); setNumBitsSet(-1); // mark dirty } @Override - boolean getAndSetBit(final long index) { + public void clearBit(final long index) { + final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); + final byte val = wmem_.getByte(memoryOffset); + wmem_.putByte(memoryOffset, (byte) (val & ~(1 << (index & 0x07)))); + setNumBitsSet(-1); // mark dirty + } + + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + + setNumBitsSet(-1); // mark dirty + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + final long maskedVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & ~mask; + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedVal | ((bits << fromOffset) & mask)); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = -1L - ((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + final long maskedFromVal = wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & ~fromMask; + final long maskedToVal = wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & ~toMask; + + wmem_.putLong(DATA_OFFSET + (fromIndex << 3), maskedFromVal | ((bits << fromOffset) & fromMask)); + wmem_.putLong(DATA_OFFSET + (toIndex << 3), maskedToVal | ((bits >>> splitBit) & toMask)); + } + + @Override + public boolean getAndSetBit(final long index) { final long memoryOffset = DATA_OFFSET + ((int) index >>> 3); final byte mask = (byte) (1 << (index & 0x07)); final byte val = wmem_.getByte(memoryOffset); @@ -143,7 +205,7 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -158,7 +220,7 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -173,7 +235,7 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - void invert() { + public void invert() { if (isDirty()) { numBitsSet_ = 0; for (int i = 0; i < dataLength_; ++i) { @@ -191,7 +253,7 @@ final class DirectBitArray extends DirectBitArrayR { } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { wmem_.putLong(DATA_OFFSET + (arrayIndex << 3), value); } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java similarity index 58% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java rename to src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java index 8acc36be..e446ae6e 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayR.java +++ b/src/main/java/org/apache/datasketches/filters/common/DirectBitArrayR.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import org.apache.datasketches.common.SketchesArgumentException; import org.apache.datasketches.common.SketchesReadOnlyException; @@ -35,7 +35,7 @@ public class DirectBitArrayR extends BitArray { final protected WritableMemory wmem_; // for inheritance; we won't write to it protected long numBitsSet_; // could be final here but writable direct will update it - protected DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { + public DirectBitArrayR(final int dataLength, final long storedNumBitsSet, final Memory mem) { super(); dataLength_ = dataLength; @@ -53,7 +53,7 @@ public class DirectBitArrayR extends BitArray { // assumes we have a region with only the portion of Memory // the BitArray cares about - static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { + public static DirectBitArrayR wrap(final Memory mem, final boolean isEmpty) { final int arrayLength = mem.getInt(0); final long storedNumBitsSet = isEmpty ? 0L : mem.getLong(NUM_BITS_OFFSET); @@ -71,34 +71,73 @@ public class DirectBitArrayR extends BitArray { } @Override - long getCapacity() { + public long getCapacity() { return (long) dataLength_ * Long.SIZE; } @Override - long getNumBitsSet() { + public long getNumBitsSet() { return numBitsSet_; } @Override - protected boolean isDirty() { + public boolean isDirty() { // read-only so necessarily false return false; } @Override - int getArrayLength() { + public int getArrayLength() { return dataLength_; } @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { if (isEmpty()) { return false; } return (wmem_.getByte(DATA_OFFSET + ((int) index >>> 3)) & (1 << (index & 0x7))) != 0; } @Override - protected long getLong(final int arrayIndex) { + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (isEmpty()) { return 0L; } + + // TODO: since Memory provides byte offsets even when reading a long, we can be sure + // that the result always fits in a single long. We can potentially optimize this, but + // need to handle cases where a long would read beyond the end of the Memory. + + final long endBit = index + numBits - 1; + + // these are indices into a long[] array, need to adjust to byte offsets + // when calling wmem_.getLong() + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = -1L - ((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (wmem_.getLong(DATA_OFFSET + (fromIndex << 3)) & fromMask) >>> fromOffset; + result |= (wmem_.getLong(DATA_OFFSET + (toIndex << 3)) & toMask) << splitBit; + return result; + } + + @Override + long getLong(final int arrayIndex) { if (isEmpty()) { return 0L; } return wmem_.getLong(DATA_OFFSET + (arrayIndex << 3)); } @@ -119,37 +158,52 @@ public class DirectBitArrayR extends BitArray { } @Override - void reset() { + public void reset() { throw new SketchesReadOnlyException("Attempt to call reset() on read-only memory"); } @Override - void setBit(final long index) { + public void setBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); + } + + @Override + public void clearBit(final long index) { + throw new SketchesReadOnlyException("Attempt to call clearBit() on read-only memory"); + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + throw new SketchesReadOnlyException("Attempt to call setBits() on read-only memory"); + } + + @Override + public void assignBit(final long index, final boolean value) { throw new SketchesReadOnlyException("Attempt to call setBit() on read-only memory"); } @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { throw new SketchesReadOnlyException("Attempt to call getAndSetBit() on read-only memory"); } @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call intersect() on read-only memory"); } @Override - void union(final BitArray other) { + public void union(final BitArray other) { throw new SketchesReadOnlyException("Attempt to call union() on read-only memory"); } @Override - void invert() { + public void invert() { throw new SketchesReadOnlyException("Attempt to call invert() on read-only memory"); } @Override - protected void setLong(final int arrayIndex, final long value) { + void setLong(final int arrayIndex, final long value) { throw new SketchesReadOnlyException("Attempt to call setLong() on read-only memory"); } } diff --git a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java similarity index 57% rename from src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java rename to src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java index 4048b677..184cc83c 100644 --- a/src/main/java/org/apache/datasketches/filters/bloomfilter/HeapBitArray.java +++ b/src/main/java/org/apache/datasketches/filters/common/HeapBitArray.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import java.util.Arrays; @@ -31,13 +31,13 @@ import org.apache.datasketches.memory.WritableBuffer; * <p>Rounds the number of bits up to the smallest multiple of 64 (one long) * that is not smaller than the specified number. */ -final class HeapBitArray extends BitArray { +public final class HeapBitArray extends BitArray { private long numBitsSet_; // if -1, need to recompute value private boolean isDirty_; final private long[] data_; // creates an array of a given size - HeapBitArray(final long numBits) { + public HeapBitArray(final long numBits) { super(); if (numBits <= 0) { @@ -54,7 +54,7 @@ final class HeapBitArray extends BitArray { } // uses the provided array - HeapBitArray(final long numBitsSet, final long[] data) { + public HeapBitArray(final long numBitsSet, final long[] data) { super(); data_ = data; @@ -64,7 +64,7 @@ final class HeapBitArray extends BitArray { // reads a serialized image, but the BitArray is not fully self-describing so requires // a flag to indicate whether the array is empty - static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { + public static HeapBitArray heapify(final Buffer buffer, final boolean isEmpty) { final int numLongs = buffer.getInt(); if (numLongs < 0) { throw new SketchesArgumentException("Possible corruption: Must have strictly positive array size. Found: " + numLongs); @@ -85,40 +85,124 @@ final class HeapBitArray extends BitArray { } @Override - protected boolean isDirty() { + public boolean isDirty() { return isDirty_; } @Override - boolean hasMemory() { + public boolean hasMemory() { return false; } @Override - boolean isDirect() { + public boolean isDirect() { return false; } @Override - boolean isReadOnly() { return false; } + public boolean isReadOnly() { return false; } // queries a single bit in the array @Override - boolean getBit(final long index) { + public boolean getBit(final long index) { return (data_[(int) index >>> 6] & (1L << index)) != 0 ? true : false; } + @Override + public long getBits(final long index, final int numBits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return 0; } + + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + return (data_[fromIndex] & (toMask - fromMask)) >>> fromOffset; + } + + // spans longs, need to combine bits from two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = -1L - ((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + long result = (data_[fromIndex] & fromMask) >>> fromOffset; + result |= (data_[toIndex] & toMask) << splitBit; + return result; + } + // sets a single bit in the array without querying, meaning the method // cannot properly track the number of bits set so set isDirty = true @Override - void setBit(final long index) { + public void setBit(final long index) { data_[(int) index >>> 6] |= 1L << index; isDirty_ = true; } + @Override + public void clearBit(final long index) { + data_[(int) index >>> 6] &= ~(1L << index); + isDirty_ = true; + } + + // assigns a single bit in the array without querying + @Override + public void assignBit(final long index, final boolean value) { + if (value) { + setBit(index); + } else { + clearBit(index); + } + } + + @Override + public void setBits(final long index, final int numBits, final long bits) { + if (numBits < 0 || numBits > 64) { + throw new SketchesArgumentException("numBits must be between 0 and 64 (inclusive)"); + } else if (index + numBits > getCapacity()) { + throw new SketchesArgumentException("End of range exceeds capacity"); + } + if (numBits == 0) { return; } + + isDirty_ = true; + final long endBit = index + numBits - 1; + + final int fromIndex = (int) index >>> 6; + final int toIndex = (int) endBit >>> 6; + final long fromOffset = index & 0x3F; + final long toOffset = endBit & 0x3F; + + // within a single long + if (fromIndex == toIndex) { + final long toMask = (toOffset == 63) ? -1L : (1L << (toOffset + 1)) - 1L; + final long fromMask = (1L << fromOffset) - 1L; + final long mask = toMask - fromMask; + data_[fromIndex] = (data_[fromIndex] & ~mask) | ((bits << fromOffset) & mask); + return; + } + + // spans longs, need to set bits in two longs + final long splitBit = Long.SIZE - (fromOffset); + final long fromMask = -1L - ((1L << fromOffset) - 1); + final long toMask = (1L << (toOffset + 1)) - 1; + + data_[fromIndex] = (data_[fromIndex] & ~fromMask) | ((bits << fromOffset) & fromMask); + data_[toIndex] = (data_[toIndex] & ~toMask) | ((bits >>> splitBit) & toMask); + } + // returns existing value of bit @Override - boolean getAndSetBit(final long index) { + public boolean getAndSetBit(final long index) { final int offset = (int) index >>> 6; final long mask = 1L << index; if ((data_[offset] & mask) != 0) { @@ -134,7 +218,7 @@ final class HeapBitArray extends BitArray { // O(1) if only getAndSetBit() has been used // O(data_.length) if setBit() has ever been used @Override - long getNumBitsSet() { + public long getNumBitsSet() { if (isDirty_) { numBitsSet_ = 0; for (final long val : data_) { @@ -145,14 +229,14 @@ final class HeapBitArray extends BitArray { } @Override - long getCapacity() { return (long) data_.length * Long.SIZE; } + public long getCapacity() { return (long) data_.length * Long.SIZE; } @Override - int getArrayLength() { return data_.length; } + public int getArrayLength() { return data_.length; } // applies logical OR @Override - void union(final BitArray other) { + public void union(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot union bit arrays with unequal lengths"); } @@ -168,7 +252,7 @@ final class HeapBitArray extends BitArray { // applies logical AND @Override - void intersect(final BitArray other) { + public void intersect(final BitArray other) { if (getCapacity() != other.getCapacity()) { throw new SketchesArgumentException("Cannot intersect bit arrays with unequal lengths"); } @@ -184,7 +268,7 @@ final class HeapBitArray extends BitArray { // applies bitwise inversion @Override - void invert() { + public void invert() { if (isDirty_) { numBitsSet_ = 0; for (int i = 0; i < data_.length; ++i) { @@ -200,7 +284,7 @@ final class HeapBitArray extends BitArray { } } - void writeToBuffer(final WritableBuffer wbuf) { + public void writeToBuffer(final WritableBuffer wbuf) { wbuf.putInt(data_.length); wbuf.putInt(0); // unused @@ -211,18 +295,18 @@ final class HeapBitArray extends BitArray { } @Override - protected long getLong(final int arrayIndex) { + public long getLong(final int arrayIndex) { return data_[arrayIndex]; } @Override - protected void setLong(final int arrayIndex, final long value) { + public void setLong(final int arrayIndex, final long value) { data_[arrayIndex] = value; } // clears the array @Override - void reset() { + public void reset() { Arrays.fill(data_, 0); numBitsSet_ = 0; isDirty_ = false; diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java deleted file mode 100644 index 658e15f0..00000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/Bitmap.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -public abstract class Bitmap { - - public abstract long size(); - public abstract void set(long bit_index, boolean value); - public abstract void setFromTo(long from, long to, long value); - public abstract boolean get(long bit_index); - public abstract long getFromTo(long from, long to); - - public static boolean get_fingerprint_bit(long index, long fingerprint) { - long mask = 1 << index; - long and = fingerprint & mask; - return and != 0; - } -} diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java deleted file mode 100644 index ca387ebc..00000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVector.java +++ /dev/null @@ -1,328 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -/* -Copyright � 1999 CERN - European Organization for Nuclear Research. -Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose -is hereby granted without fee, provided that the above copyright notice appear in all copies and -that both that copyright notice and this permission notice appear in supporting documentation. -CERN makes no representations about the suitability of this software for any purpose. -It is provided "as is" without expressed or implied warranty. -*/ - -/** - * Implements quick non polymorphic non bounds checking low level bitvector operations. - * Includes some operations that interpret sub-bitstrings as long integers. - * <p> - * <b>WARNING: Methods of this class do not check preconditions.</b> - * Provided with invalid parameters these method may return (or set) invalid values without throwing any exception. - * <b>You should only use this class when performance is critical and you are absolutely sure that indexes are within bounds.</b> - * <p> - * A bitvector is modelled as a long array, i.e. long[] bits holds bits of a bitvector. - * Each long value holds 64 bits. - * The i-th bit is stored in bits[i/64] at - * bit position i % 64 (where bit position 0 refers to the least - * significant bit and 63 refers to the most significant bit). - * - * @author [email protected] - * @version 1.0, 09/24/99 - * @see java.util.BitSet - */ -//package bitmap_implementations; - -public class QuickBitVector extends Object { - protected final static int ADDRESS_BITS_PER_UNIT = 6; // 64=2^6 - protected final static int BITS_PER_UNIT = 64; // = 1 << ADDRESS_BITS_PER_UNIT - protected final static int BIT_INDEX_MASK = 63; // = BITS_PER_UNIT - 1; - - private static final long[] pows = precomputePows(); //precompute bitmasks for speed - /** - * Makes this class non instantiable, but still inheritable. - */ - protected QuickBitVector() { - } - /** - * Returns a bit mask with bits in the specified range set to 1, all the rest set to 0. - * In other words, returns a bit mask having 0,1,2,3,...,64 bits set. - * If to-from+1==0 then returns zero (0L). - * Precondition (not checked): to-from+1 ≥ 0 AND to-from+1 ≤ 64. - * - * @param from index of start bit (inclusive) - * @param to index of end bit (inclusive). - * @return the bit mask having all bits between from and to set to 1. - */ - public static final long bitMaskWithBitsSetFromTo(long from, long to) { - return pows[(int)(to-from+1)] << from; - - // This turned out to be slower: - // 0xffffffffffffffffL == ~0L == -1L == all 64 bits set. - // int width; - // return (width=to-from+1) == 0 ? 0L : (0xffffffffffffffffL >>> (BITS_PER_UNIT-width)) << from; - } - /** - * Changes the bit with index bitIndex in the bitvector bits to the "clear" (false) state. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be cleared. - */ - public static void clear(long[] bits, long bitIndex) { - bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] &= ~(1L << (bitIndex & BIT_INDEX_MASK)); - } - /** - * Returns from the bitvector the value of the bit with the specified index. - * The value is true if the bit with the index bitIndex - * is currently set; otherwise, returns false. - * - * @param bits the bitvector. - * @param bitIndex the bit index. - * @return the value of the bit with the specified index. - */ - public static boolean get(long[] bits, long bitIndex) { - return ((bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] & (1L << (bitIndex & BIT_INDEX_MASK))) != 0); - } - /** - * Returns a long value representing bits of a bitvector from index from to index to. - * Bits are returned as a long value with the return value having bit 0 set to bit <code>from</code>, ..., bit <code>to-from</code> set to bit <code>to</code>. - * All other bits of return value are set to 0. - * If from > to then returns zero (0L). - * Precondition (not checked): to-from+1 ≤ 64. - * @param bits the bitvector. - * @param from index of start bit (inclusive). - * @param to index of end bit (inclusive). - * @return the specified bits as long value. - */ - public static long getLongFromTo(long[] bits, long from, long to) { - if (from>to) return 0L; - - final int fromIndex = (int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64 - final int toIndex = (int)(to >> ADDRESS_BITS_PER_UNIT); - final int fromOffset = (int)(from & BIT_INDEX_MASK); //equivalent to from%64 - final int toOffset = (int)(to & BIT_INDEX_MASK); - //this is equivalent to the above, but slower: - //final int fromIndex=from/BITS_PER_UNIT; - //final int toIndex=to/BITS_PER_UNIT; - //final int fromOffset=from%BITS_PER_UNIT; - //final int toOffset=to%BITS_PER_UNIT; - - - long mask; - if (fromIndex==toIndex) { //range does not cross unit boundaries; value to retrieve is contained in one single long value. - mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset); - return (bits[fromIndex] & mask) >>> fromOffset; - - } - - //range crosses unit boundaries; value to retrieve is spread over two long values. - //get part from first long value - mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK); - final long x1=(bits[fromIndex] & mask) >>> fromOffset; - - //get part from second long value - mask=bitMaskWithBitsSetFromTo(0, toOffset); - final long x2=(bits[toIndex] & mask) << (BITS_PER_UNIT-fromOffset); - - //combine - return x1|x2; - } - - /** - * Returns the index of the least significant bit in state "true". - * Returns 32 if no bit is in state "true". - * - * Examples: - * <pre> - * 0x80000000 : 31 - * 0x7fffffff : 0 - * 0x00000001 : 0 - * 0x00000000 : 32 - * </pre> - * - * @param value The integer value for which the least significant bit index is to be found. - * @return The index of the least significant bit in state "true". Returns 32 if no bit is in state "true". - */ - static public int leastSignificantBit(int value) { - int i=-1; - while (++i < 32 && (((1<<i) & value)) == 0); - return i; - } - /** - * Constructs a low level bitvector that holds size elements, with each element taking bitsPerElement bits. - * CD. THIS METHOD ESSENTIALLY ROUNDS TO THE NEXT MULTIPLE OF 64 BITS. - * This function gets the smallest number of longs that stores the requisite bits. - * @param size the number of elements to be stored in the bitvector (must be ≥ 0). - * @param bitsPerElement the number of bits one single element takes. - * @return a low level bitvector. - */ - public static long[] makeBitVector(long size, int bitsPerElement) { - long nBits = size*bitsPerElement; - //System.out.println("IN BITVECTOR"); - //System.out.println("Using " + nBits + " bits"); - long right_shift = ((nBits-1) >> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - long safe_right_shift = ((nBits-1) >>> ADDRESS_BITS_PER_UNIT) ; // This line basically does (nBits-1) / 2^ADDRESS... - // System.out.println("Right shift " + right_shift); - //System.out.println("Safe Right shift " + safe_right_shift); - int unitIndex = (int)((nBits-1) >> ADDRESS_BITS_PER_UNIT); // How many multiples of 64 bits do we need to store nBits bits? - //System.out.println(ADDRESS_BITS_PER_UNIT); - long[] bitVector = new long[unitIndex + 1]; - //System.out.println("length " + bitVector.length); - //System.out.println("Total bits: " + (bitVector.length * 64)); - //System.out.println("Num slots available: " + (bitVector.length * 64) / bitsPerElement); - return bitVector; - } - - /** - * Returns the index of the most significant bit in state "true". - * Returns -1 if no bit is in state "true". - * - * Examples: - * <pre> - * 0x80000000 : 31 - * 0x7fffffff : 30 - * 0x00000001 : 0 - * 0x00000000 : -1 - * </pre> - * - * @param value The integer value for which the most significant bit index is to be found. - * @return The index of the most significant bit in state "true". Returns -1 if no bit is in state "true". - */ - static public int mostSignificantBit(int value) { - int i=32; - while (--i >=0 && (((1<<i) & value)) == 0); - return i; - } - - /** - * Returns the index within the unit that contains the given bitIndex. - * - * @param bitIndex The index of the bit to be checked. - * @return The index within the unit that contains the given bitIndex. - */ - protected static long offset(long bitIndex) { - return bitIndex & BIT_INDEX_MASK; // equivalent to bitIndex%64 - } - /** - * Initializes a table with numbers having 1,2,3,...,64 bits set. - * pows[i] has bits [0..i-1] set. - * pows[64] == -1L == ~0L == has all 64 bits set : correct. - * to speedup calculations in subsequent methods. - * Output: -1, 2^63-1, 2^62-1, ..., 2^1-1. - */ - - private static long[] precomputePows() { - long[] pows=new long[BITS_PER_UNIT+1]; - long value = ~0L; - - // decrement i before executing the loop and only enter loop if the decremented value is at least 1. - // this means that the loop starts at i = 64 and iterates down to i = 1. - for (int i=BITS_PER_UNIT+1; --i >= 1; ) { - pows[i]=value >>> (BITS_PER_UNIT-i); - } - pows[0]=0L; - return pows; - } - - /** - * Sets the bit with index bitIndex in the bitvector bits to the state specified by value. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be changed. - * @param value the value to be stored in the bit. - */ - public static void put(long[] bits, long bitIndex, boolean value) { - if (value) - set(bits, bitIndex); - else - clear(bits, bitIndex); - } - - /** - * Sets bits of a bitvector from index <code>from</code> to index <code>to</code> to the bits of <code>value</code>. - * Bit <code>from</code> is set to bit 0 of <code>value</code>, ..., bit <code>to</code> is set to bit <code>to-from</code> of <code>value</code>. - * All other bits stay unaffected. - * If from > to then does nothing. - * Precondition (not checked): to-from+1 ≤ 64. - * - * this function is equivalent to the slower code below: - * int fromIndex=from/BITS_PER_UNIT; - * int toIndex=to/BITS_PER_UNIT; - * int fromOffset=from%BITS_PER_UNIT; - * int toOffset=to%BITS_PER_UNIT; - * - * @param bits the bitvector. - * @param value the value to be copied into the bitvector. - * @param from index of start bit (inclusive). - * @param to index of end bit (inclusive). - */ - public static void putLongFromTo(long[] bits, long value, long from, long to) { - if (from>to) return; - - final int fromIndex=(int)(from >> ADDRESS_BITS_PER_UNIT); //equivalent to from/64 - final int toIndex=(int)(to >> ADDRESS_BITS_PER_UNIT); - final int fromOffset=(int)(from & BIT_INDEX_MASK); //equivalent to from % 64 - final int toOffset=(int)(to & BIT_INDEX_MASK); - - //make sure all unused bits to the left are cleared. - long mask; - mask=bitMaskWithBitsSetFromTo(to-from+1, BIT_INDEX_MASK); - long cleanValue=value & (~mask); - - long shiftedValue; - - if (fromIndex==toIndex) { //range does not cross unit boundaries; should go into one single long value. - shiftedValue=cleanValue << fromOffset; - mask=bitMaskWithBitsSetFromTo(fromOffset, toOffset); - bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue; - return; - - } - - //range crosses unit boundaries; value should go into two long values. - //copy into first long value. - shiftedValue=cleanValue << fromOffset; - mask=bitMaskWithBitsSetFromTo(fromOffset, BIT_INDEX_MASK); - bits[fromIndex] = (bits[fromIndex] & (~mask)) | shiftedValue; - - //copy into second long value. - shiftedValue=cleanValue >>> (BITS_PER_UNIT - fromOffset); - mask=bitMaskWithBitsSetFromTo(0, toOffset); - bits[toIndex] = (bits[toIndex] & (~mask)) | shiftedValue; - } - - /** - * Changes the bit with index bitIndex in the bitvector bits to the "set" (true) state. - * - * @param bits the bitvector. - * @param bitIndex the index of the bit to be set. - */ - public static void set(long[] bits, long bitIndex) { - bits[(int)(bitIndex >> ADDRESS_BITS_PER_UNIT)] |= 1L << (bitIndex & BIT_INDEX_MASK); - } - - /** - * Returns the index of the unit that contains the given bitIndex. - * - * @param bitIndex The index of the bit to be checked. - * @return The index of the unit that contains the given bitIndex. - */ - protected static long unit(long bitIndex) { - return bitIndex >> ADDRESS_BITS_PER_UNIT; // equivalent to bitIndex/64 - } -} diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java deleted file mode 100644 index a4c24a3f..00000000 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuickBitVectorWrapper.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -public class QuickBitVectorWrapper extends Bitmap { - - long[] bs; - - public QuickBitVectorWrapper(int bits_per_entry, long num_entries) { - bs = QuickBitVector.makeBitVector(num_entries, bits_per_entry); - } - - @Override - public long size() { - return (long)bs.length * Long.BYTES * 8L; - } - - @Override - public void set(long bit_index, boolean value) { - if (value) { - QuickBitVector.set(bs, bit_index); - } - else { - QuickBitVector.clear(bs, bit_index); - } - } - - @Override - public void setFromTo(long from, long to, long value) { - QuickBitVector.putLongFromTo(bs, value, from, to - 1); - } - - @Override - public boolean get(long bit_index) { - return QuickBitVector.get(bs, bit_index); - } - - @Override - public long getFromTo(long from, long to) { - return QuickBitVector.getLongFromTo(bs, from, to - 1); - } - - -} - diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java index 93a6761c..8671dd18 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilter.java @@ -23,13 +23,16 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.Set; +import org.apache.datasketches.filters.common.BitArray; +import org.apache.datasketches.filters.common.HeapBitArray; + public class QuotientFilter extends Filter { int bitPerEntry; int fingerprintLength; int power_of_two_size; int num_entries; - Bitmap filter; + BitArray filter; double expansion_threshold; long max_entries_before_expansion; @@ -83,18 +86,19 @@ public class QuotientFilter extends Filter { expand_autonomously = val; } - Bitmap make_filter(long init_size, int bits_per_entry) { + BitArray make_filter(long init_size, int bits_per_entry) { // System.out.println(init_size ) ; // System.out.println(num_extension_slots); // System.out.println("Making BitVector with: " + (init_size + num_extension_slots) + "SLOTS"); - return new QuickBitVectorWrapper(bits_per_entry, init_size); + //return new QuickBitVectorWrapper(bits_per_entry, init_size); + return new HeapBitArray(init_size * bits_per_entry); } public int get_fingerprint_length() { return fingerprintLength; } - QuotientFilter(int power_of_two, int bits_per_entry, Bitmap bitmap) { + QuotientFilter(int power_of_two, int bits_per_entry, BitArray bitmap) { power_of_two_size = power_of_two; bitPerEntry = bits_per_entry; fingerprintLength = bits_per_entry - 3; @@ -152,7 +156,7 @@ public class QuotientFilter extends Filter { long getMask() { return get_num_slots() - 1; } - + // sets the metadata flag bits for a given slot index void modify_slot(boolean is_occupied, boolean is_continuation, boolean is_shifted, long index) { @@ -163,7 +167,7 @@ public class QuotientFilter extends Filter { // sets the fingerprint for a given slot index void set_fingerprint(long index, long fingerprint) { - filter.setFromTo(index * bitPerEntry + 3, (long)index * bitPerEntry + 3 + fingerprintLength, fingerprint); + filter.setBits(index * bitPerEntry + 3, fingerprintLength, fingerprint); } // print a nice representation of the filter that can be understood. @@ -185,7 +189,7 @@ public class QuotientFilter extends Filter { if (remainder == 3) { sbr.append(" "); } - sbr.append(filter.get(i) ? "1" : "0"); + sbr.append(filter.getBit(i) ? "1" : "0"); } sbr.append("\n"); return sbr.toString(); @@ -198,12 +202,12 @@ public class QuotientFilter extends Filter { // return a fingerprint in a given slot index long get_fingerprint(long index) { - return filter.getFromTo(index * bitPerEntry + 3, index * bitPerEntry + 3 + fingerprintLength); + return filter.getBits(index * bitPerEntry + 3, fingerprintLength); } // return an entire slot representation, including metadata flags and fingerprint long get_slot(long index) { - return filter.getFromTo(index * bitPerEntry, (index + 1) * bitPerEntry); + return filter.getBits(index * bitPerEntry, bitPerEntry); } // compare a fingerprint input to the fingerprint in some slot index @@ -251,27 +255,27 @@ public class QuotientFilter extends Filter { } boolean is_occupied(long index) { - return filter.get(index * bitPerEntry); + return filter.getBit(index * bitPerEntry); } boolean is_continuation(long index) { - return filter.get(index * bitPerEntry + 1); + return filter.getBit(index * bitPerEntry + 1); } boolean is_shifted(long index) { - return filter.get(index * bitPerEntry + 2); + return filter.getBit(index * bitPerEntry + 2); } void set_occupied(long index, boolean val) { - filter.set(index * bitPerEntry, val); + filter.assignBit(index * bitPerEntry, val); } void set_continuation(long index, boolean val) { - filter.set(index * bitPerEntry + 1, val); + filter.assignBit(index * bitPerEntry + 1, val); } void set_shifted(long index, boolean val) { - filter.set(index * bitPerEntry + 2, val); + filter.assignBit(index * bitPerEntry + 2, val); } boolean is_slot_empty(long index) { @@ -689,7 +693,7 @@ public class QuotientFilter extends Filter { } public boolean get_bit_at_offset(int offset) { - return filter.get(offset); + return filter.getBit(offset); } public void compute_statistics() { diff --git a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java index 1f98c82f..a3971219 100644 --- a/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java +++ b/src/main/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterBuilder.java @@ -18,7 +18,7 @@ */ package org.apache.datasketches.filters.quotientfilter; -import java.util.concurrent.ThreadLocalRandom; +//import java.util.concurrent.ThreadLocalRandom; import org.apache.datasketches.common.SketchesArgumentException; diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java similarity index 85% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java rename to src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java index 521019e6..ea02ad21 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayRTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayRTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -99,6 +99,27 @@ public class DirectBitArrayRTest { assertTrue(dba.isReadOnly()); } + @Test + public void getBitsFromToTest() { + final HeapBitArray hba = new HeapBitArray(128); + hba.setBit(1); // will override, but this forces non-empty + hba.setLong(0, 0x5555555555555555L); + hba.setLong(1, 0xFFFFFFFFFC003FFFL); + final Memory mem = bitArrayToMemory(hba); + DirectBitArrayR dba = DirectBitArrayR.wrap(mem, hba.isEmpty()); + + // single, full long test + assertEquals(dba.getBits(0, 64), 0x5555555555555555L); + + // subset of single long, mostly ones with a stretch of zeros + assertEquals(dba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(78, 12), 0); + assertEquals(dba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(dba.getBits(60, 20), 0x3FFF5); + } + @Test public void countBitsWhenDirty() { // like basicOperationTest but with setBit which does @@ -159,6 +180,9 @@ public class DirectBitArrayRTest { // all of these try to modify a read-only memory assertThrows(SketchesReadOnlyException.class, () -> dba.setBit(14)); + assertThrows(SketchesReadOnlyException.class, () -> dba.clearBit(7)); + assertThrows(SketchesReadOnlyException.class, () -> dba.assignBit(924, false)); + assertThrows(SketchesReadOnlyException.class, () -> dba.setBits(100, 30, 0xFF)); assertThrows(SketchesReadOnlyException.class, () -> dba.getAndSetBit(100)); assertThrows(SketchesReadOnlyException.class, () -> dba.reset()); assertThrows(SketchesReadOnlyException.class, () -> dba.invert()); diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java similarity index 78% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java rename to src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java index a45bcbb8..4cc229c5 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/DirectBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/DirectBitArrayTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -68,6 +68,8 @@ public class DirectBitArrayTest { } // no text of max size because the BitArray allows up to Integer.MAX_VALUE + // bits, which is the maximum size of an array in Java -- can't use it all, + // (need 2 longs for preamble) but also can't allocate that large to test on most machines @Test public void initializeTooSmallTest() { @@ -134,6 +136,70 @@ public class DirectBitArrayTest { dba.setBit(100); assertTrue(dba.getAndSetBit(100)); assertEquals(dba.getNumBitsSet(), 8); + + dba.reset(); + assertTrue(dba.isEmpty()); + assertEquals(dba.getNumBitsSet(), 0); + + dba.setBit(0); + dba.setLong(0, -1); + assertTrue(dba.getBit(60)); + dba.clearBit(60); + assertFalse(dba.getBit(60)); + + assertTrue(dba.getBit(35)); + dba.assignBit(35, false); + assertFalse(dba.getBit(35)); + dba.assignBit(35, true); + assertTrue(dba.getBit(35)); + } + + @Test + public void getBitsFromToTest() { + final int numBits = 128; + final WritableMemory wmem = WritableMemory.writableWrap(new byte[32]); + final DirectBitArray dba = DirectBitArray.initialize(numBits, wmem); + + // single, full long test + dba.setBit(0); // useless but forces non-empty when using setLong() + dba.setLong(0, 0x5555555555555555L); + assertEquals(dba.getBits(0, 64), 0x5555555555555555L); + assertEquals(dba.getBits(64, 64), 0); + + // subset of single long, mostly ones with a stretch of zeros + dba.setLong(1, 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(dba.getBits(78, 12), 0); + assertEquals(dba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(dba.getBits(60, 20), 0x3FFF5); + } + + @Test + public void setBitsFromToTest() { + final int numBits = 128; + WritableMemory wmem = WritableMemory.writableWrap(new byte[32]); + DirectBitArray ba = DirectBitArray.initialize(numBits, wmem); + + // within a single long + ba.setBits(0, 64, 0x80000000DAB8C730L); + assertEquals(ba.getLong(0), 0x80000000DAB8C730L); + assertEquals(ba.getLong(1), 0); + + ba.setBits(40, 8, 0xA6); + assertEquals(ba.getLong(0), 0x8000A600DAB8C730L); + + // spanning longs + ba.setBits(60, 20, 0x3FFF5); + assertEquals(ba.getLong(0), 0x5000A600DAB8C730L); + assertEquals(ba.getLong(1), 0x3FFFL); + + // found specific failure with this test + wmem = WritableMemory.writableWrap(new byte[1272]); + ba = DirectBitArray.initialize(10000, wmem); + ba.setBits(601 * 10 + 3, 7, 125); + assertEquals(ba.getBits(601 * 10 + 3, 7), 125); } @Test diff --git a/src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java b/src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java similarity index 79% rename from src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java rename to src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java index 0e91788e..a55f98a3 100644 --- a/src/test/java/org/apache/datasketches/filters/bloomfilter/HeapBitArrayTest.java +++ b/src/test/java/org/apache/datasketches/filters/common/HeapBitArrayTest.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.datasketches.filters.bloomfilter; +package org.apache.datasketches.filters.common; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; @@ -75,9 +75,62 @@ public class HeapBitArrayTest { assertTrue(ba.isEmpty()); assertEquals(ba.getNumBitsSet(), 0); + ba.setLong(0, -1); + assertTrue(ba.getBit(60)); + ba.clearBit(60); + assertFalse(ba.getBit(60)); + + assertTrue(ba.getBit(35)); + ba.assignBit(35, false); + assertFalse(ba.getBit(35)); + ba.assignBit(35, true); + assertTrue(ba.getBit(35)); + assertTrue(String.valueOf(ba).length() > 0); } + @Test + public void getBitsFromToTest() { + final HeapBitArray ba = new HeapBitArray(128); + + // single, full long test + ba.setLong(0, 0x5555555555555555L); + assertEquals(ba.getBits(0, 64), 0x5555555555555555L); + assertEquals(ba.getBits(64, 64), 0); + + // subset of single long, mostly ones with a stretch of zeros + ba.setLong(1, 0xFFFFFFFFFC003FFFL); + assertEquals(ba.getBits(64, 64), 0xFFFFFFFFFC003FFFL); + assertEquals(ba.getBits(78, 12), 0); + assertEquals(ba.getBits(77, 14), 8193); + + // spanning longs + assertEquals(ba.getBits(60, 20), 0x3FFF5); + } + + @Test + public void setBitsFromToTest() { + HeapBitArray ba = new HeapBitArray(128); + + // within a single long + ba.setBits(0, 64, 0x80000000DAB8C730L); + assertEquals(ba.getLong(0), 0x80000000DAB8C730L); + assertEquals(ba.getLong(1), 0); + + ba.setBits(40, 8, 0xA6); + assertEquals(ba.getLong(0), 0x8000A600DAB8C730L); + + // spanning longs + ba.setBits(60, 20, 0x3FFF5); + assertEquals(ba.getLong(0), 0x5000A600DAB8C730L); + assertEquals(ba.getLong(1), 0x3FFFL); + + // found specific failure with this test + ba = new HeapBitArray(10000); + ba.setBits(601 * 10 + 3, 7, 125); + assertEquals(ba.getBits(601 * 10 + 3, 7), 125); + } + @Test public void bitAddresOutOfBoundsTest() { final HeapBitArray ba = new HeapBitArray(1024); diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java deleted file mode 100644 index 487e3657..00000000 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/BitVectorTests.java +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.datasketches.filters.quotientfilter; - -import org.testng.annotations.Test; -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.assertTrue; -import static org.testng.Assert.assertFalse; - -public class BitVectorTests { - - /** - * This test method initializes a QuickBitVectorWrapper with various combinations of bits per entry and number of entries. - * It then calculates the expected length of the bit vector and asserts that the actual size of the bit vector matches the expected length. - * - * Example Input-Output Pairs: - * 1. Input: bitsPerEntry = 2, numEntries = 8 (1L << 3) - * Output: expectedLengthBits = 64 - * - * 2. Input: bitsPerEntry = 3, numEntries = 16 (1L << 4) - * Output: 64 - * - * 3. Input: bitsPerEntry = 33, numEntries = 8 (1L << 3) - * Output: expectedLengthBits = 320 - */ - @Test - static public void testSize(){ - int[] bitsPerEntry = {2, 3, 4, 5, 6, 7, 8, 9, 10, 23, 24, 25, 31, 32, 33}; - long[] numEntries = {1L << 3, 1L<<4, 1L<<8, 1L << 16}; - long nBits ; - long expectedLengthBits ; - - for (int i = 0; i < bitsPerEntry.length; i++){ - for (int j = 0; j < numEntries.length; j++) { - QuickBitVectorWrapper bv = new QuickBitVectorWrapper(bitsPerEntry[i], numEntries[j]); - nBits = bitsPerEntry[i] * numEntries[j]; - expectedLengthBits = 64 * ((nBits % 64 == 0) ? (nBits / 64) : (1 + nBits / 64)); - assertEquals(bv.size(), expectedLengthBits); - } - } - } - - /* - This test amends a few entries in the BitVector and checks that they are appropriately set. - */ - @Test - static public void testSettersAndGetters(){ - QuickBitVectorWrapper bv = new QuickBitVectorWrapper(6, 16); - - // All entries should be False before any updates - for (int i = 0; i < bv.size(); i++){ - assertFalse(bv.get(i), "All entries should be False"); - } - - // Set some values - bv.set(0, true); - assertTrue(bv.get(0), "Value at index 0 should be True"); - - bv.set(32, true) ; - assertTrue(bv.get(32), "Value at index 32 should be True"); - - bv.setFromTo(64, 128, ~0L); - assertTrue(bv.getFromTo(64, 128) == -1L, "Values from 64 to 128 should be set to 1") ; - } -} diff --git a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java index c30d7317..88491475 100644 --- a/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java +++ b/src/test/java/org/apache/datasketches/filters/quotientfilter/QuotientFilterTest.java @@ -22,12 +22,20 @@ import org.testng.annotations.Test; import static org.testng.Assert.assertTrue; import static org.testng.Assert.assertEquals; +import java.util.ArrayList; import java.util.BitSet; import java.util.HashSet; import java.util.Random; public class QuotientFilterTest { + // this method had been in Bitmap, but was used only to test the QuotientFilter + public static boolean get_fingerprint_bit(long index, long fingerprint) { + long mask = 1 << index; + long and = fingerprint & mask; + return and != 0; + } + /* * This test is based on the example from https://en.wikipedia.org/wiki/Quotient_filter * in "Algorithm Description" section. @@ -36,7 +44,7 @@ public class QuotientFilterTest { * (b,1), (e,4), (f, 7), (c,1), (d,2), (a,1) */ @Test - static public void WikiInsertionTest() { + public void WikiInsertionTest() { int bits_per_entry = 6; // 6 bits per entry => 3 bits fingerprint, resolved internally in the filter. int num_entries_power = 3; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); @@ -74,7 +82,7 @@ public class QuotientFilterTest { assertEquals(qf.get_fingerprint(7), F); } - static public int getState(QuotientFilter filter, int slot) { + public int getState(QuotientFilter filter, int slot) { return (filter.is_occupied(slot) ? 1 : 0) << 2 | (filter.is_continuation(slot) ? 1 : 0) << 1 | (filter.is_shifted(slot) ? 1 : 0); @@ -85,7 +93,7 @@ public class QuotientFilterTest { * It performs the same insertions as in Figure 2 and checks for the same result. */ @Test - static public void PaperInsertionTest() { + public void PaperInsertionTest() { int bits_per_entry = 8; int num_entries_power = 4; int num_entries = (int)Math.pow(2, num_entries_power); @@ -116,7 +124,7 @@ public class QuotientFilterTest { // test we don't get any false negatives for quotient filter @Test - static public void FalseNegativeTest() { + public void FalseNegativeTest() { int bits_per_entry = 10; int num_entries_power = 10; QuotientFilter filter = new QuotientFilter(num_entries_power, bits_per_entry); @@ -130,7 +138,7 @@ public class QuotientFilterTest { * Checks this can be handled by the internal data structure and then deletes one of the keys from the filter. */ @Test - static public void OverflowTest() { + public void OverflowTest() { int bits_per_entry = 8; int num_entries_power = 3; int num_entries = (int)Math.pow(2, num_entries_power); @@ -158,7 +166,7 @@ public class QuotientFilterTest { * and the program exits, indicating a test failure. */ @Test - static public void testQuotientFilterInsertionAndIteration() { + public void testQuotientFilterInsertionAndIteration() { int bits_per_entry = 8; int num_entries_power = 4; @@ -166,22 +174,24 @@ public class QuotientFilterTest { //int fingerprint_size = bits_per_entry - 3; QuotientFilter qf = new QuotientFilter(num_entries_power, bits_per_entry); - qf.insert(0, 2, false); - qf.insert(0, 3, false); - qf.insert(0, 3, false); - qf.insert(0, 4, false); - qf.insert(0, 15, false); // last slot in the filter - qf.insert(0, 16, false); // outside the bounds + qf.insert(0x1F, 0, false); + qf.insert(0x1F, 2, false); + qf.insert(0x1F, 3, false); + qf.insert(0x1F, 3, false); + qf.insert(0x1F, 4, false); + qf.insert(0x1F, 15, false); // last slot in the filter + qf.insert(0x1F, 16, false); // outside the bounds qf.pretty_print() ; Iterator it = new Iterator(qf); - int[] arr = new int[] {2, 3, 3, 4, 15}; + //int[] arr = new int[] {2, 3, 3, 4, 15}; + int[] arr = new int[] {0, 2, 3, 3, 4, 15}; int arr_index = 0; while (it.next()) {assertEquals(it.bucket_index, arr[arr_index++]);} } @Test - static public void testQuotientFilterIterator() { + public void testQuotientFilterIterator() { int bits_per_entry = 8; int num_entries_power = 4; @@ -225,7 +235,7 @@ public class QuotientFilterTest { result.set(index++, is_continuation); result.set(index++, is_shifted); for (int i = 0; i < bits_per_entry - 3; i++) { - result.set(index++, Bitmap.get_fingerprint_bit(i, fingerprint) ); + result.set(index++, get_fingerprint_bit(i, fingerprint) ); } return result; } @@ -256,7 +266,8 @@ public class QuotientFilterTest { Helper function to test that no false negatives are returned. */ static public boolean test_no_false_negatives(QuotientFilter filter, int num_entries) { - HashSet<Integer> added = new HashSet<Integer>(); + //HashSet<Integer> added = new HashSet<Integer>(); + ArrayList<Integer> added = new ArrayList<Integer>(); int seed = 5; Random rand = new Random(seed); @@ -274,7 +285,7 @@ public class QuotientFilterTest { for (Integer i : added) { boolean found = filter.search((long)i); if (!found) { - return false ; + return false; } } return true; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
