Author: schor
Date: Thu Oct 23 17:22:12 2014
New Revision: 1633890
URL: http://svn.apache.org/r1633890
Log:
[UIMA-4060] improve int set impls, fix one bug in PositiveIntSet, add test cases
Added:
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
(original)
+++
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
Thu Oct 23 17:22:12 2014
@@ -31,25 +31,73 @@ import java.util.NoSuchElementException;
*
* This impl is for use in a single thread case only
*
+ * This impl supports offset, to let this bit set store items in a range n ->
n + small number
+ *
+ * If using offset, you must add ints in a range equal to or above the offset
+ *
*/
public class IntBitSet {
private final BitSet set;
- public IntBitSet() {
- this(16);
+ private int size = 0; // tracked here to avoid slow counting of cardinality
+
+ private int offset = 0;
+
+ /**
+ * Sets the lowest int value that can be kept; trying to add a lower one
throws an error
+ * <ul>
+ * <li>Allows for a more compact representation, potentially</li>
+ * <li>only can be set if the size is 0</li>
+ * </ul>
+ * @param offset - the lowest value that can be kept in the set. 0 is
allowed.
+ */
+ public void setOffset(int offset) {
+ if (size != 0) {
+ throw new IllegalStateException("Cannot set offset unless set is empty");
+ }
+ if (offset < 0) {
+ throw new IllegalArgumentException("Offset must be 0 or a positive int,
was " + offset);
+ }
+ this.offset = offset;
+ }
+ /**
+ * @return the current offset
+ */
+ public int getOffset() {
+ return offset;
}
- public IntBitSet(int initialCapacity) {
- set = new BitSet(initialCapacity);
+ /**
+ * Construct an IntBitSet capable of holding ints from 0 to 63, (perhaps
plus an offset)
+ */
+ public IntBitSet() {
+ this(63); // in current java impls, this is the minimum 1 long int is
allocated
+ }
+
+ /**
+ * Construct an IntBitSet capable of holding ints from 0 to maxInt (perhaps
plus an offset)
+ * @param maxInt the biggest int (perhaps plus an offset) that can be held
without growing the space
+ */
+ public IntBitSet(int maxInt) {
+ set = new BitSet(Math.max(1, maxInt));
}
+ /**
+ * empty the IntBitSet
+ */
public void clear() {
- set.clear();
+ set.clear();
+ size = 0;
}
+ /**
+ *
+ * @param key
+ * @return
+ */
public boolean contains(int key) {
- return (key == 0) ? false : set.get(key);
+ return (key == 0) ? false : set.get(key - offset);
}
/**
@@ -57,10 +105,19 @@ public class IntBitSet {
* @param key
* @return true if this set did not already contain the specified element
*/
- public boolean add(int key) {
+ public boolean add(int original_key) {
+ if (original_key < offset) {
+ throw new IllegalArgumentException("key must be positive, but was " +
original_key);
+ }
+
+ final int key = original_key - offset;
final boolean prev = set.get(key);
- set.set(key);;
- return prev == false;
+ set.set(key);
+ if (!prev) {
+ size ++;
+ return true;
+ }
+ return false;
}
/**
@@ -68,30 +125,44 @@ public class IntBitSet {
* @param key -
* @return true if this key was removed, false if not present
*/
- public boolean remove(int key) {
+ public boolean remove(int original_key) {
+ final int key = original_key - offset;
final boolean prev = set.get(key);
set.clear(key);
- return prev;
+ if (prev) {
+ size --;
+ return true;
+ }
+ return false;
}
+ /**
+ *
+ * @return the number of elements in this set
+ */
public int size() {
- return set.cardinality();
+ return size; // bit set cardinality() is slow
+ }
+
+ public int getSpaceUsed_in_bits() {
+ return set.size();
}
/**
*
* @return space used in 32 bit words
*/
- public int getSpaceUsed() {
- return set.length() >> 5; // divide by 32
+ public int getSpaceUsed_in_words() {
+ return getSpaceUsed_in_bits() >> 5; // divide by 32
}
/**
*
- * @return largest int stored plus 1, or 0 if no ints in table
+ * @return largest int in the set
+ * If the set has no members, 0 is returned
*/
- public int getLargestIntP1() {
- return set.length();
+ public int getLargestMenber() {
+ return set.length() - 1 + offset;
}
private class IntBitSetIterator implements IntListIterator {
@@ -103,7 +174,7 @@ public class IntBitSet {
}
public final boolean hasNext() {
- return (curKey > 0 && curKey < set.length());
+ return (curKey >= 0);
}
public final int next() {
@@ -112,7 +183,7 @@ public class IntBitSet {
}
final int r = curKey;
curKey = set.nextSetBit(curKey + 1);
- return r;
+ return r + offset;
}
/**
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
(original)
+++
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
Thu Oct 23 17:22:12 2014
@@ -25,7 +25,8 @@ import java.util.NoSuchElementException;
import org.apache.uima.jcas.impl.JCasHashMap;
/**
- * A set of non-zero ints.
+ * A set of non-zero ints.
+ * 0 reserved to indicate "not in the map"
*
* based on Int2IntHashMap
* This impl is for use in a single thread case only
@@ -62,7 +63,8 @@ public class IntHashSet {
private boolean secondTimeShrinkable = false;
- private int mostPositive = 0;
+ private int mostPositive = Integer.MIN_VALUE;
+ private int mostNegative = Integer.MAX_VALUE;
public IntHashSet() {
this(16);
@@ -92,8 +94,10 @@ public class IntHashSet {
size++;
}
+ public int getSpaceUsedInWords() {
+ return keys.length + 8; // 8 is overhead for this class excluding any
Java object overhead
+ }
-
private void increaseTableCapacity() {
final int [] oldKeys = keys;
final int oldCapacity = oldKeys.length;
@@ -150,6 +154,8 @@ public class IntHashSet {
size = 0;
Arrays.fill(keys, 0);
resetHistogram();
+ mostPositive = Integer.MIN_VALUE;
+ mostNegative = Integer.MAX_VALUE;
}
/**
@@ -205,8 +211,18 @@ public class IntHashSet {
* @return true if this set did not already contain the specified element
*/
public boolean add(int key) {
- if (key > mostPositive) {
- mostPositive = key;
+ if (key == 0) {
+ throw new IllegalArgumentException("0 not allowed as a key to add");
+ }
+ if (size == 0) {
+ mostPositive = mostNegative = key;
+ } else {
+ if (key > mostPositive) {
+ mostPositive = key;
+ }
+ if (key < mostNegative) {
+ mostNegative = key;
+ }
}
final int i = find(key);
if (keys[i] == 0) {
@@ -228,7 +244,10 @@ public class IntHashSet {
}
/**
- *
+ * mostPositive and mostNegative are not updated
+ * for removes. So these values may be inaccurate,
+ * but mostPositive is always >= actual most positive,
+ * and mostNegative is always <= actual most negative.
* @param key -
* @return true if the key was present
*/
@@ -246,9 +265,23 @@ public class IntHashSet {
return size;
}
+ /**
+ *
+ * @return a value that is >= the actual most positive value in the table.
+ * it will be == unless a remove operation has removed a most positive
value
+ */
public int getMostPositive() {
return mostPositive;
}
+
+ /**
+ *
+ * @return a value that is <= the actual least positive value in the table.
+ * It will be == unless remove operations has removed a least positive
value.
+ */
+ public int getMostNegative() {
+ return mostNegative;
+ }
public void showHistogram() {
if (TUNE) {
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
(original)
+++
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
Thu Oct 23 17:22:12 2014
@@ -27,20 +27,31 @@ import java.util.NoSuchElementException;
/**
* An set of non-zero ints.
*
- * Uses two different representations depending on the ratio of the max int
stored and the total number of ints stored.
- *
+ * Uses 2 1/2 different representations depending on the ratio of the max
int stored and the total number of ints stored.
+ * For large sparse ints, it uses the IntHashSet
+ *
+ * For more dense ints, it uses bitSets or bitSets with an offset
+ *
+ * It switches when adding new members if the other representation is
sufficiently smaller (using a Hysteresis of 16 words)
*/
public class PositiveIntSet {
// number of words a inthashset has to be better than intbitset to cause
switch
- private static final int HYSTERESIS = 4;
+ private static final int HYSTERESIS = 16;
- IntBitSet intBitSet;
+ private IntBitSet intBitSet;
- IntHashSet intHashSet;
+ private IntHashSet intHashSet;
+ //package private for test case
boolean isBitSet;
+ /**
+ * Set false once we find we have to reduce the bit offset
+ */
+ //package private for test case
+ boolean useOffset = true;
+
public PositiveIntSet() {
intBitSet = new IntBitSet();
isBitSet = true;
@@ -62,7 +73,7 @@ public class PositiveIntSet {
IntListIterator it = intHashSet.getIterator();
int i = 0;
while (it.hasNext()) {
- allValues[i] = it.next();
+ allValues[i++] = it.next();
}
Arrays.sort(allValues);
return new IntListIteratorOverArray(allValues);
@@ -132,7 +143,7 @@ public class PositiveIntSet {
/**
*
- * @param key
+ * @param key -
* @return true if this set did not already contain the specified element
*/
public boolean add(int key) {
@@ -143,6 +154,33 @@ public class PositiveIntSet {
return intHashSet.add(key);
}
}
+
+ /**
+ * When a key is added which is lower than the offset, we need to adjust the
whole int set table.
+ * Although an array copy could do this, we don't have access to the bitset
impl.
+ *
+ * So we just set the offset to 0, and either convert the whole thing to a
inthashset or copy all the bits to a new
+ * bit set.
+ *
+ * @param key
+ */
+ private void adjustBitSetForLowerOffset(int key) {
+ final int largestInt = intBitSet.getLargestMenber();
+ final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(largestInt); //
doesn't use offset
+ final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
+
+ if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
+ switchToHashSet(size());
+ } else {
+ IntBitSet bs = new IntBitSet(largestInt);
+ IntListIterator it = intBitSet.getIterator();
+ while (it.hasNext()) {
+ bs.add(it.next());
+ }
+ intBitSet = bs;
+ }
+ useOffset = false; // stop using because we have evidence it isn't
appropriate
+ }
public void add(int[] vs) {
for (int i : vs) {
@@ -155,8 +193,7 @@ public class PositiveIntSet {
return intBitSet.remove(key);
} else {
return intHashSet.remove(key);
- }
-
+ }
}
public int size() {
@@ -186,6 +223,24 @@ public class PositiveIntSet {
return a;
}
+ /**
+ * number of 32 bit words in use by bit set, given the max key (less offset)
+ * @param maxKeyLessOffset
+ * @return number of words needed to store the highest value
+ */
+ private int getBitSetSpaceFromMaxInt(int maxKeyLessOffset) {
+ // e.g., 31 key => 1 word, 32 key => 2 words
+ return 2 + (maxKeyLessOffset >> 5) + 2; // add 2 because the bits are
stored in 2 words (1 long), and 2 more for IntBitSet object overhead
+ }
+
+ private int getHashSetSpace(int numberOfEntries) {
+ long v = numberOfEntries * 3 + 8; // plus 8 is for the overhead of the
intHashSet object.
+ if (v > Integer.MAX_VALUE) {
+ throw new RuntimeException("value overflowed int, orig was " +
numberOfEntries);
+ }
+ return (int) v; // the number of 32 bit words needed is 1.5 to 3; 3 is
worst case
+ }
+
/**
* logic for switching representation
*
@@ -198,51 +253,50 @@ public class PositiveIntSet {
* switch to HashSet from BitSet if:
* HashSet space < BitSet space
* size * 12 MaxInt / 8
- * with hysteresis 1 2 4 8 16 32 64 128 256
512 1024 2048 4096 8192 16k 32k 64k 128k 256k 512k 1m 2m 4m 8m 16m
- * Don't switch unless space saving is 1 2 3 4 5 6 7 8
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
- * intHashSet
- * size space savings
- * 1 12 32768
- * 1024 12288 32768
- * 16384 196608 100000 1/2 of space
- *
- * max(32768, 1/2 of hash-map space).
+ * with hysteresis
*
* switch to BitSet from HashSet if:
* BitSet used when its space requirement is less than hashset, with
hysteresis:
- * Don't switch unless space saving is max(32768, 1/4 of hash-map
space)
- * intHashSet
- * size space savings
- * 1 12 no switch
- * 1024 12288 no switch
- * 16384 196608 100000 1/2 of space
*
*/
private void maybeSwitchRepresentation(int key) {
-
if (isBitSet) {
/********************
* Bit Set
********************/
- final int maxKeyP1 = intBitSet.getLargestIntP1(); // p1 = plus 1
- if (key < maxKeyP1) {
+
+ /*********
+ * Size is 0,
+ * use this opportunity to set the offset
+ * unless that has been a bad strategy for this
+ * instance already
+ ********/
+ if (intBitSet.size() == 0 && useOffset) {
+ // initialize bitSetOffset to the key, minus some amount to allow a
bit of previous values
+ intBitSet.setOffset(Math.max(0, key - 63));
+ // because of offset, won't have to switch to hash for this key
+ return;
+ }
+
+ final int offset = intBitSet.getOffset();
+
+ if (key < offset) {
+ adjustBitSetForLowerOffset(key);
return;
}
- // space used in words (32 bit words)
- int bitSetSpace = 1 + (Math.max((maxKeyP1 - 1), key) >> 5); // space in
words
- // keep bit set unless key would grow it beyond
-// if (bitSetSpace < 16 || key < bitSetSpace) {
-// return;
-// }
- // maybe switch to IntHashSet
- final int size = size();
+ // return if key fits in existing allocation
+ if ((key - offset) < intBitSet.getSpaceUsed_in_bits()) {
+ return;
+ }
+
// space used in words (32 bit words)
- long hashsetSpace = (size << 1) + size; // size * 3 , 1.5 - 3 words per
entry in IntHashSet
+ final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(key - offset);
+ final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
// switch if hashmap space plus hysteresis would be < bitmap space
- if (hashsetSpace < (bitSetSpace - HYSTERESIS)) {
- switchToHashSet(size);
+ if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
+ switchToHashSet(intBitSet.size());
}
return;
}
@@ -251,23 +305,21 @@ public class PositiveIntSet {
* Hash Set
********************/
// maybe switch to IntBitSet
- final int size = size();
// space used in words (32 bit words)
- long hashsetSpace = (size << 1) + size; // size * 3 , 1.5 - 3 words per
entry in IntHashSet
+ // don't bother to include the new element - it might not be "added"
+ // if it was already there, and only serves to decrease hysteresis a bit
+
+ final int hashSetSpaceNeeded = getHashSetSpace(size());
final int maxInt = intHashSet.getMostPositive();
- final int bitSetSpace = bitSetSpace(maxInt);
+ final int offset = useOffset ? intHashSet.getMostNegative() : 0;
+ final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(maxInt - offset);
- if (bitSetSpace < (hashsetSpace - HYSTERESIS)) {
- switchToBitSet(maxInt);
+ if (bitSetSpaceNeeded < (hashSetSpaceNeeded - HYSTERESIS)) {
+ switchToBitSet(maxInt, offset);
}
}
-
- private int bitSetSpace(int maxInt) {
- // 0 - 31 : 1; 32-63: 2
- return 1 + (maxInt >> 5);
- }
-
+
private void switchToHashSet(int size) {
isBitSet = false;
intHashSet = new IntHashSet(size);
@@ -278,13 +330,15 @@ public class PositiveIntSet {
intBitSet = null;
}
- private void switchToBitSet(int maxInt) {
+ private void switchToBitSet(int maxInt, int offset) {
isBitSet = true;
- intBitSet = new IntBitSet(maxInt);
+ intBitSet = new IntBitSet(maxInt - offset);
+ intBitSet.setOffset(offset);
IntListIterator it = intHashSet.getIterator();
while (it.hasNext()) {
intBitSet.add(it.next());
}
+ intHashSet = null;
}
}
Added:
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java?rev=1633890&view=auto
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
(added)
+++
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
Thu Oct 23 17:22:12 2014
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import junit.framework.TestCase;
+
+
+public class IntBitSetTest extends TestCase {
+
+ IntBitSet ibs;
+ IntBitSet ibs1k;
+
+ public void setUp() {
+ ibs = new IntBitSet();
+ ibs1k = new IntBitSet();
+ ibs1k.setOffset(1000);
+ }
+
+ public void testBasic() {
+
+ ibs.add(15);
+ ibs.add(188);
+
+ IntListIterator it = ibs.getIterator();
+ assertTrue(it.hasNext());
+ assertEquals(15, it.next());
+ assertTrue(it.hasNext());
+ assertEquals(188, it.next());
+ assertFalse(it.hasNext());
+ assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+ assertEquals(6, ibs.getSpaceUsed_in_words());
+
+ ibs = ibs1k;
+
+ ibs.add(1015);
+ ibs.add(1188);
+ it = ibs.getIterator();
+ assertTrue(it.hasNext());
+ assertEquals(1015, it.next());
+ assertTrue(it.hasNext());
+ assertEquals(1188, it.next());
+ assertFalse(it.hasNext());
+ assertEquals(2,ibs.size());
+
+ assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+ assertEquals(6, ibs.getSpaceUsed_in_words());
+
+ ibs = new IntBitSet(64);
+ ibs.setOffset(1000);
+ ibs.add(1064);
+ assertEquals(1,ibs.size());
+ assertEquals(2*64, ibs.getSpaceUsed_in_bits());
+
+ ibs = new IntBitSet(64);
+ ibs.setOffset(1000);
+ ibs.add(1063);
+ assertEquals(1*64, ibs.getSpaceUsed_in_bits());
+ assertEquals(1,ibs.size());
+
+ ibs = new IntBitSet(6 * 64);
+ ibs.setOffset(1000);
+
+ ibs.add(1000 + 6 * 64 - 1);
+ assertEquals(6*64, ibs.getSpaceUsed_in_bits());
+ ibs.add(1000 + 6 * 64);
+ assertEquals(2,ibs.size());
+ assertEquals(12*64, ibs.getSpaceUsed_in_bits());
+
+ }
+
+ public void testRemove() {
+ ibs.add(15);
+ ibs.add(188);
+ ibs.add(101);
+ ibs.remove(188);
+ assertEquals(101, ibs.getLargestMenber());
+ assertEquals(2,ibs.size());
+ IntListIterator it = ibs.getIterator();
+ assertTrue(it.hasNext());
+ assertEquals(15, it.next());
+ assertTrue(it.hasNext());
+ assertEquals(101, it.next());
+ assertFalse(it.hasNext());
+ assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+
+ }
+
+ public void testContains() {
+ ibs = new IntBitSet();
+ ibs.setOffset(1000);
+
+ ibs.add(1015);
+ ibs.add(1188);
+ assertTrue(ibs.contains(1015));
+ assertFalse(ibs.contains(1187));
+ assertFalse(ibs.contains(1189));
+ assertTrue(ibs.contains(1188));
+ assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+ assertEquals(6, ibs.getSpaceUsed_in_words());
+ assertEquals(2,ibs.size());
+
+ }
+}
Added:
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java?rev=1633890&view=auto
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
(added)
+++
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
Thu Oct 23 17:22:12 2014
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+
+public class IntHashSetTest extends TestCase {
+
+ IntHashSet ihs;
+
+ public void setUp() {
+ ihs = new IntHashSet();
+ }
+
+ public void testBasic() {
+
+ ihs.add(15);
+ ihs.add(188);
+ int[] sv = getSortedValues(ihs);
+ assertEquals(2, sv.length);
+ assertEquals(15, sv[0]);
+ assertEquals(188, sv[1]);
+ assertEquals(15, ihs.getMostNegative());
+ assertEquals(188, ihs.getMostPositive());
+
+ // test most positive / negative
+ ihs.clear();
+ ihs.add(189);
+ assertEquals(189, ihs.getMostNegative());
+ assertEquals(189, ihs.getMostPositive());
+ ihs.add(1000);
+ ihs.add(-1000);
+ assertEquals(1000, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.add(500);
+ ihs.add(-500);
+ assertEquals(1000, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.remove(1000);
+ assertEquals(1000, ihs.getMostPositive());
+ assertEquals(-1000, ihs.getMostNegative());
+ ihs.add(1001);
+ assertEquals(1001, ihs.getMostPositive());
+ }
+
+ private int[] getSortedValues(IntHashSet s) {
+ IntListIterator it = s.getIterator();
+ int[] r = new int[s.size()];
+ int i = 0;
+ while (it.hasNext()) {
+ r[i++] = it.next();
+ }
+ Arrays.sort(r);
+ return r;
+ }
+
+ public void testContains() {
+ ihs.add(1188);
+ ihs.add(1040);
+ assertTrue(ihs.contains(1188));
+ assertTrue(ihs.contains(1040));
+ assertFalse(ihs.contains(0));
+ assertFalse(ihs.contains(99));
+
+ }
+}
Modified:
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
---
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
(original)
+++
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
Thu Oct 23 17:22:12 2014
@@ -37,23 +37,101 @@ public class PositiveIntSetTest extends
public void testBasic() {
PositiveIntSet s = new PositiveIntSet();
s.add(128);
+ assertTrue(s.isBitSet);
+ s.add(128128);
assertFalse(s.isBitSet);
- for (int i = 1; i < 10; i++) {
+ IntListIterator it = s.getOrderedIterator();
+ assertTrue(it.hasNext());
+ assertEquals(128, it.next());
+ assertTrue(it.hasNext());
+ assertEquals(128128, it.next());
+ assertFalse(it.hasNext());
+
+ // test offset
+ int bb = 300000;
+ s = new PositiveIntSet();
+ assertTrue(s.useOffset);
+ s.add(bb);
+ s.add(bb);
+ s.add(bb+1);
+ s.add(bb+2);
+
+ assertEquals(3, s.size());
+ assertTrue(s.isBitSet);
+ assertTrue(s.useOffset);
+
+ // test offset converting to hashset
+ s.add(bb - 66);
+ assertEquals(4, s.size());
+ assertFalse(s.isBitSet);
+ it = s.getOrderedIterator();
+ assertEquals(bb-66, it.next());
+ assertEquals(bb, it.next());
+ assertEquals(bb+1, it.next());
+ assertEquals(bb+2, it.next());
+
+
+ bb = 67;
+ s = new PositiveIntSet();
+ assertTrue(s.useOffset);
+ s.add(bb);
+ s.add(bb);
+ s.add(bb+1);
+ s.add(bb+2);
+
+ assertEquals(3, s.size());
+ assertTrue(s.isBitSet);
+ assertTrue(s.useOffset);
+ // test offset converting to bitset with no offset
+ s.add(bb - 66);
+ assertEquals(4, s.size());
+ assertTrue(s.isBitSet);
+ assertFalse(s.useOffset);
+ it = s.getOrderedIterator();
+ assertEquals(bb-66, it.next());
+ assertEquals(bb, it.next());
+ assertEquals(bb+1, it.next());
+ assertEquals(bb+2, it.next());
+
+ // test switch from hash set to bit set
+ s.clear(); // keeps useOffset false
+ s.add(767 - 64); // makes the space used by bit set = 25 words
+
+ for (int i = 1; i < 13; i++) {
s.add(i);
- assertTrue("i is " + i, (i < 4) ? (!s.isBitSet) : s.isBitSet);
+// System.out.println("i is " + i + ", isBitSet = " + s.isBitSet);
+ assertTrue("i is " + i, (i < 12) ? (!s.isBitSet) : s.isBitSet);
}
- for (int i = 10; i < 10000; i = i <<1) {
+ it = s.getOrderedIterator();
+ assertEquals(1,it.next());
+ assertEquals(2,it.next());
+ assertEquals(3,it.next());
+ assertEquals(4,it.next());
+ assertEquals(5,it.next());
+ assertEquals(6,it.next());
+ assertEquals(7,it.next());
+ assertEquals(8,it.next());
+ assertEquals(9,it.next());
+ assertEquals(10,it.next());
+ assertEquals(11,it.next());
+ assertEquals(12,it.next());
+ assertEquals(767-64,it.next());
+
+ boolean reached = false;
+ for (int i = 10; i < 5122; i = i <<1) {
s.add(i); // switches to hash set when i = 2560. == 1010 0000 0000
(>>5 = 101 0000 = 80 (decimal)
// hash set size for 19 entries = 19 * 3 = 57
// bit set size for 2560 = 80 words
// bit set size for prev i (1280 dec) = 101 0000 0000 (>>5 =
10 1000) = 40 words
// if (!s.isBitSet) {
-// System.out.println("# of entries is " + s.size());
+// System.out.println("is Bit set? " + s.isBitSet + ", # of entries is
" + s.size());
// }
- assertTrue((i < 2560) ? s.isBitSet : !s.isBitSet);
+ reached = i >= 5120;
+ assertTrue((!reached) ? s.isBitSet : !s.isBitSet);
}
+ assertTrue(reached);
}
public void testiterators() {
@@ -77,4 +155,5 @@ public class PositiveIntSetTest extends
}
+
}