Author: schor
Date: Thu Oct 23 17:22:12 2014
New Revision: 1633890

URL: http://svn.apache.org/r1633890
Log:
[UIMA-4060] improve int set impls, fix one bug in PositiveIntSet, add test cases

Added:
    
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
    
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
Modified:
    
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
    
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
    
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
    
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java

Modified: 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntBitSet.java
 Thu Oct 23 17:22:12 2014
@@ -31,25 +31,73 @@ import java.util.NoSuchElementException;
  *   
  * This impl is for use in a single thread case only
  * 
+ * This impl supports offset, to let this bit set store items in a range n -> 
n + small number
+ * 
+ * If using offset, you must add ints in a range equal to or above the offset
+ * 
  */
 public class IntBitSet {
     
   private final BitSet set;
   
-  public IntBitSet() {
-    this(16);  
+  private int size = 0;  // tracked here to avoid slow counting of cardinality
+  
+  private int offset = 0;
+  
+  /**
+   * Sets the lowest int value that can be kept; trying to add a lower one 
throws an error
+   * <ul>
+   *   <li>Allows for a more compact representation, potentially</li>
+   *   <li>only can be set if the size is 0</li>
+   * </ul>
+   * @param offset - the lowest value that can be kept in the set. 0 is 
allowed.
+   */
+  public void setOffset(int offset) {
+    if (size != 0) {
+      throw new IllegalStateException("Cannot set offset unless set is empty");
+    }
+    if (offset < 0) {
+      throw new IllegalArgumentException("Offset must be 0 or a positive int, 
was " + offset);
+    }
+    this.offset = offset;
+  }
+  /**
+   * @return the current offset
+   */
+  public int getOffset() {
+    return offset;
   }
   
-  public IntBitSet(int initialCapacity) {
-    set = new BitSet(initialCapacity);
+  /**
+   * Construct an IntBitSet capable of holding ints from 0 to 63, (perhaps 
plus an offset)
+   */
+  public IntBitSet() {
+    this(63);  // in current java impls, this is the minimum 1 long int is 
allocated  
+  }
+    
+  /**
+   * Construct an IntBitSet capable of holding ints from 0 to maxInt (perhaps 
plus an offset)
+   * @param maxInt the biggest int (perhaps plus an offset) that can be held 
without growing the space
+   */
+  public IntBitSet(int maxInt) {
+    set = new BitSet(Math.max(1, maxInt));
   }
   
+  /**
+   * empty the IntBitSet
+   */
   public void clear() {
-    set.clear();    
+    set.clear();
+    size = 0;
   }
    
+  /**
+   * 
+   * @param key
+   * @return
+   */
   public boolean contains(int key) {
-    return (key == 0) ? false : set.get(key);
+    return (key == 0) ? false : set.get(key - offset);
   }
  
   /**
@@ -57,10 +105,19 @@ public class IntBitSet {
    * @param key
    * @return true if this set did not already contain the specified element
    */
-  public boolean add(int key) {
+  public boolean add(int original_key) {
+    if (original_key < offset) {
+      throw new IllegalArgumentException("key must be positive, but was " + 
original_key);
+    }
+    
+    final int key = original_key - offset;
     final boolean prev = set.get(key);
-    set.set(key);;
-    return prev == false;
+    set.set(key);
+    if (!prev) {
+      size ++;
+      return true;
+    }
+    return false;
   }
   
   /**
@@ -68,30 +125,44 @@ public class IntBitSet {
    * @param key -
    * @return true if this key was removed, false if not present
    */
-  public boolean remove(int key) {
+  public boolean remove(int original_key) {
+    final int key = original_key - offset;
     final boolean prev = set.get(key);
     set.clear(key);
-    return prev;
+    if (prev) {
+      size --;
+      return true;
+    }
+    return false;
   }
 
+  /**
+   * 
+   * @return the number of elements in this set
+   */
   public int size() {
-    return set.cardinality();
+    return size;    // bit set cardinality() is slow
+  }
+  
+  public int getSpaceUsed_in_bits() {
+    return set.size();
   }
    
   /**
    * 
    * @return space used in 32 bit words
    */
-  public int getSpaceUsed() {
-    return set.length() >> 5;  // divide by 32
+  public int getSpaceUsed_in_words() {
+    return getSpaceUsed_in_bits() >> 5;  // divide by 32
   }
   
   /**
    * 
-   * @return largest int stored plus 1, or 0 if no ints in table
+   * @return largest int in the set
+   * If the set has no members, 0 is returned
    */
-  public int getLargestIntP1() {
-    return set.length();
+  public int getLargestMenber() {
+    return set.length() - 1 + offset;
   }
   
   private class IntBitSetIterator implements IntListIterator {
@@ -103,7 +174,7 @@ public class IntBitSet {
     }
 
     public final boolean hasNext() {
-      return (curKey > 0 && curKey < set.length());
+      return (curKey >= 0);
     }
 
     public final int next() {
@@ -112,7 +183,7 @@ public class IntBitSet {
       }
       final int r = curKey;
       curKey = set.nextSetBit(curKey + 1);
-      return r;
+      return r + offset;
     }
 
     /**

Modified: 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/IntHashSet.java
 Thu Oct 23 17:22:12 2014
@@ -25,7 +25,8 @@ import java.util.NoSuchElementException;
 import org.apache.uima.jcas.impl.JCasHashMap;
 
 /**
- * A set of non-zero ints.  
+ * A set of non-zero ints. 
+ *   0 reserved to indicate "not in the map"  
  *   
  * based on Int2IntHashMap
  * This impl is for use in a single thread case only
@@ -62,7 +63,8 @@ public class IntHashSet {
   
   private boolean secondTimeShrinkable = false;
   
-  private int mostPositive = 0;
+  private int mostPositive = Integer.MIN_VALUE;
+  private int mostNegative = Integer.MAX_VALUE;
 
   public IntHashSet() {
     this(16);
@@ -92,8 +94,10 @@ public class IntHashSet {
    size++;
   }
   
+  public int getSpaceUsedInWords() {
+    return keys.length + 8;  // 8 is overhead for this class excluding any 
Java object overhead
+  }
   
-
   private void increaseTableCapacity() {
     final int [] oldKeys = keys;
     final int oldCapacity = oldKeys.length;
@@ -150,6 +154,8 @@ public class IntHashSet {
     size = 0;
     Arrays.fill(keys, 0);
     resetHistogram();
+    mostPositive = Integer.MIN_VALUE;
+    mostNegative = Integer.MAX_VALUE;
   }
 
   /** 
@@ -205,8 +211,18 @@ public class IntHashSet {
    * @return true if this set did not already contain the specified element
    */
   public boolean add(int key) {
-    if (key > mostPositive) {
-      mostPositive = key;
+    if (key == 0) {
+      throw new IllegalArgumentException("0 not allowed as a key to add");
+    }
+    if (size == 0) {
+      mostPositive = mostNegative = key;
+    } else {
+      if (key > mostPositive) {
+        mostPositive = key;
+      }
+      if (key < mostNegative) {
+        mostNegative = key;
+      }
     }
     final int i = find(key);
     if (keys[i] == 0) {
@@ -228,7 +244,10 @@ public class IntHashSet {
   }
 
   /**
-   * 
+   * mostPositive and mostNegative are not updated
+   *   for removes.  So these values may be inaccurate,
+   *   but mostPositive is always >= actual most positive,
+   *   and mostNegative is always <= actual most negative.
    * @param key -
    * @return true if the key was present
    */
@@ -246,9 +265,23 @@ public class IntHashSet {
     return size;
   }
   
+  /**
+   * 
+   * @return a value that is >= the actual most positive value in the table.
+   *   it will be == unless a remove operation has removed a most positive 
value
+   */
   public int getMostPositive() {
     return mostPositive;
   }
+  
+  /**
+   * 
+   * @return a value that is <= the actual least positive value in the table.
+   *   It will be == unless remove operations has removed a least positive 
value.
+   */
+  public int getMostNegative() {
+    return mostNegative;
+  }
    
   public void showHistogram() {
     if (TUNE) {

Modified: 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/PositiveIntSet.java
 Thu Oct 23 17:22:12 2014
@@ -27,20 +27,31 @@ import java.util.NoSuchElementException;
 /**
  * An set of non-zero ints.
  * 
- *   Uses two different representations depending on the ratio of the max int 
stored and the total number of ints stored.
- * 
+ *   Uses 2 1/2 different representations depending on the ratio of the max 
int stored and the total number of ints stored.
+ *     For large sparse ints, it uses the IntHashSet
+ *     
+ *     For more dense ints, it uses bitSets or bitSets with an offset
+ *     
+ *     It switches when adding new members if the other representation is 
sufficiently smaller (using a Hysteresis of 16 words) 
  */
 public class PositiveIntSet {
   
   // number of words a inthashset has to be better than intbitset to cause 
switch
-  private static final int HYSTERESIS = 4;
+  private static final int HYSTERESIS = 16;
 
-  IntBitSet intBitSet;
+  private IntBitSet intBitSet;
   
-  IntHashSet intHashSet;
+  private IntHashSet intHashSet;
   
+  //package private for test case
   boolean isBitSet;  
   
+  /**
+   * Set false once we find we have to reduce the bit offset
+   */
+  //package private for test case
+  boolean useOffset = true;
+  
   public PositiveIntSet() {
     intBitSet = new IntBitSet();
     isBitSet = true;
@@ -62,7 +73,7 @@ public class PositiveIntSet {
     IntListIterator it = intHashSet.getIterator();
     int i = 0;
     while (it.hasNext()) {
-      allValues[i] = it.next();
+      allValues[i++] = it.next();
     }
     Arrays.sort(allValues);
     return new IntListIteratorOverArray(allValues);
@@ -132,7 +143,7 @@ public class PositiveIntSet {
  
   /**
    * 
-   * @param key
+   * @param key -
    * @return true if this set did not already contain the specified element
    */
   public boolean add(int key) {
@@ -143,6 +154,33 @@ public class PositiveIntSet {
       return intHashSet.add(key);
     }
   }
+
+  /**
+   * When a key is added which is lower than the offset, we need to adjust the 
whole int set table.
+   * Although an array copy could do this, we don't have access to the bitset 
impl.
+   * 
+   * So we just set the offset to 0, and either convert the whole thing to a 
inthashset or copy all the bits to a new
+   * bit set.
+   * 
+   * @param key
+   */
+  private void adjustBitSetForLowerOffset(int key) {
+    final int largestInt = intBitSet.getLargestMenber();
+    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(largestInt);  // 
doesn't use offset
+    final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
+    
+    if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
+      switchToHashSet(size());
+    } else {
+      IntBitSet bs = new IntBitSet(largestInt);
+      IntListIterator it = intBitSet.getIterator();
+      while (it.hasNext()) {
+        bs.add(it.next());
+      }
+      intBitSet = bs;      
+    }
+    useOffset = false;  // stop using because we have evidence it isn't 
appropriate
+  }
   
   public void add(int[] vs) {
     for (int i : vs) {
@@ -155,8 +193,7 @@ public class PositiveIntSet {
       return intBitSet.remove(key);
     } else {
       return intHashSet.remove(key);
-    }
-    
+    }   
   }
 
   public int size() {
@@ -186,6 +223,24 @@ public class PositiveIntSet {
     return a;
   }
   
+  /** 
+   * number of 32 bit words in use by bit set, given the max key (less offset)
+   * @param maxKeyLessOffset
+   * @return number of words needed to store the highest value
+   */
+  private int getBitSetSpaceFromMaxInt(int maxKeyLessOffset) {
+    // e.g., 31 key => 1 word, 32 key => 2 words
+    return 2 + (maxKeyLessOffset >> 5) + 2;  // add 2 because the bits are 
stored in 2 words (1 long), and 2 more for IntBitSet object overhead
+  }
+  
+  private int getHashSetSpace(int numberOfEntries) {
+    long v = numberOfEntries * 3 + 8;  // plus 8 is for the overhead of the 
intHashSet object.
+    if (v > Integer.MAX_VALUE) {
+      throw new RuntimeException("value overflowed int, orig was " + 
numberOfEntries);
+    }
+    return (int) v;   // the number of 32 bit words needed is 1.5 to 3; 3 is 
worst case
+  }
+  
   /**
    * logic for switching representation
    * 
@@ -198,51 +253,50 @@ public class PositiveIntSet {
    *    switch to HashSet from BitSet if:
    *      HashSet space   <   BitSet space  
    *       size * 12            MaxInt / 8
-   *    with hysteresis                              1 2 4 8 16 32 64 128 256 
512 1024 2048 4096 8192 16k 32k 64k 128k 256k 512k 1m 2m 4m 8m 16m
-   *       Don't switch unless space saving is         1 2 3  4  5  6  7   8   
9   10   11   12   13   14  15  16  17   18   19  20 21 22 23 24
-   *           intHashSet
-   *          size    space         savings
-   *            1       12         32768   
-   *            1024   12288       32768     
-   *            16384  196608      100000    1/2 of space
-   *            
-   *          max(32768, 1/2 of hash-map space). 
+   *    with hysteresis  
    *
    *   switch to BitSet from HashSet if:
    *      BitSet used when its space requirement is less than hashset, with 
hysteresis:
-   *       Don't switch  unless space saving is max(32768, 1/4 of hash-map 
space)
-   *           intHashSet
-   *          size    space         savings
-   *            1       12         no switch   
-   *            1024   12288       no switch     
-   *            16384  196608      100000    1/2 of space
    *           
    */
   
   private void maybeSwitchRepresentation(int key) {
-    
     if (isBitSet) {
       /********************
        *    Bit Set 
        ********************/
-      final int maxKeyP1 = intBitSet.getLargestIntP1(); // p1 = plus 1
-      if (key < maxKeyP1) {
+      
+      /*********
+       * Size is 0, 
+       * use this opportunity to set the offset
+       * unless that has been a bad strategy for this 
+       * instance already 
+       ********/
+      if (intBitSet.size() == 0 && useOffset) {
+        // initialize bitSetOffset to the key, minus some amount to allow a 
bit of previous values
+        intBitSet.setOffset(Math.max(0, key - 63));
+        // because of offset, won't have to switch to hash for this key
+        return;
+      }
+      
+      final int offset = intBitSet.getOffset();
+      
+      if (key < offset) {
+        adjustBitSetForLowerOffset(key);
         return;
       }
-      // space used in words (32 bit words)
-      int bitSetSpace = 1 + (Math.max((maxKeyP1 - 1), key) >> 5);  // space in 
words
-      // keep bit set unless key would grow it beyond 
-//      if (bitSetSpace < 16 || key < bitSetSpace) {
-//        return;
-//      }
       
-      // maybe switch to IntHashSet
-      final int size = size();
+      // return if key fits in existing allocation
+      if ((key - offset) < intBitSet.getSpaceUsed_in_bits()) {
+        return;
+      }
+
       // space used in words (32 bit words)
-      long hashsetSpace = (size << 1) + size;  // size * 3 , 1.5 - 3 words per 
entry in IntHashSet
+      final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(key - offset);
+      final int hashSetSpaceNeeded = getHashSetSpace(intBitSet.size());
       // switch if hashmap space plus hysteresis would be < bitmap space
-      if (hashsetSpace < (bitSetSpace - HYSTERESIS)) {
-        switchToHashSet(size);
+      if (hashSetSpaceNeeded < (bitSetSpaceNeeded - HYSTERESIS)) {
+        switchToHashSet(intBitSet.size());
       }
       return;    
     } 
@@ -251,23 +305,21 @@ public class PositiveIntSet {
      *    Hash Set 
      ********************/
     // maybe switch to IntBitSet
-    final int size = size();
     // space used in words (32 bit words)
-    long hashsetSpace = (size << 1) + size;  // size * 3 , 1.5 - 3 words per 
entry in IntHashSet
+    // don't bother to include the new element - it might not be "added"
+    //   if it was already there, and only serves to decrease hysteresis a bit
+    
+    final int hashSetSpaceNeeded = getHashSetSpace(size());    
     
     final int maxInt = intHashSet.getMostPositive();
-    final int bitSetSpace = bitSetSpace(maxInt);
+    final int offset = useOffset ? intHashSet.getMostNegative() : 0;
+    final int bitSetSpaceNeeded = getBitSetSpaceFromMaxInt(maxInt - offset);
  
-    if (bitSetSpace < (hashsetSpace - HYSTERESIS)) {
-      switchToBitSet(maxInt);
+    if (bitSetSpaceNeeded < (hashSetSpaceNeeded - HYSTERESIS)) {
+      switchToBitSet(maxInt, offset);
     }  
   }
-  
-  private int bitSetSpace(int maxInt) {
-    // 0 - 31 : 1;  32-63: 2
-    return 1 + (maxInt >> 5);
-  }
-  
+    
   private void switchToHashSet(int size) {
     isBitSet = false;
     intHashSet = new IntHashSet(size);
@@ -278,13 +330,15 @@ public class PositiveIntSet {
     intBitSet = null;
   }
   
-  private void switchToBitSet(int maxInt) {
+  private void switchToBitSet(int maxInt, int offset) {
     isBitSet = true;
-    intBitSet = new IntBitSet(maxInt);
+    intBitSet = new IntBitSet(maxInt - offset);
+    intBitSet.setOffset(offset);
     IntListIterator it = intHashSet.getIterator();
     while (it.hasNext()) {
       intBitSet.add(it.next());
     }
+    intHashSet = null;
   }
    
 }

Added: 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java?rev=1633890&view=auto
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
 (added)
+++ 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntBitSetTest.java
 Thu Oct 23 17:22:12 2014
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import junit.framework.TestCase;
+
+
+public class IntBitSetTest extends TestCase {
+  
+  IntBitSet ibs;
+  IntBitSet ibs1k;
+  
+  public void setUp() {
+    ibs = new IntBitSet();
+    ibs1k = new IntBitSet();
+    ibs1k.setOffset(1000);
+  }
+
+  public void testBasic() {
+    
+    ibs.add(15);
+    ibs.add(188);
+    
+    IntListIterator it = ibs.getIterator();    
+    assertTrue(it.hasNext());
+    assertEquals(15, it.next());
+    assertTrue(it.hasNext());
+    assertEquals(188, it.next());
+    assertFalse(it.hasNext());
+    assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+    assertEquals(6, ibs.getSpaceUsed_in_words());
+    
+    ibs = ibs1k;
+    
+    ibs.add(1015);
+    ibs.add(1188);
+    it = ibs.getIterator();    
+    assertTrue(it.hasNext());
+    assertEquals(1015, it.next());
+    assertTrue(it.hasNext());
+    assertEquals(1188, it.next());
+    assertFalse(it.hasNext());
+    assertEquals(2,ibs.size());
+    
+    assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+    assertEquals(6, ibs.getSpaceUsed_in_words());
+    
+    ibs = new IntBitSet(64);
+    ibs.setOffset(1000);
+    ibs.add(1064);
+    assertEquals(1,ibs.size());
+    assertEquals(2*64, ibs.getSpaceUsed_in_bits());
+    
+    ibs = new IntBitSet(64);
+    ibs.setOffset(1000);
+    ibs.add(1063);
+    assertEquals(1*64, ibs.getSpaceUsed_in_bits());
+    assertEquals(1,ibs.size());
+
+    ibs = new IntBitSet(6 * 64);
+    ibs.setOffset(1000);
+
+    ibs.add(1000 + 6 * 64 - 1);
+    assertEquals(6*64, ibs.getSpaceUsed_in_bits());
+    ibs.add(1000 + 6 * 64);
+    assertEquals(2,ibs.size());
+    assertEquals(12*64, ibs.getSpaceUsed_in_bits());
+    
+  }
+  
+  public void testRemove() {
+    ibs.add(15);
+    ibs.add(188);
+    ibs.add(101);
+    ibs.remove(188);
+    assertEquals(101, ibs.getLargestMenber());
+    assertEquals(2,ibs.size());
+    IntListIterator it = ibs.getIterator();    
+    assertTrue(it.hasNext());
+    assertEquals(15, it.next());
+    assertTrue(it.hasNext());
+    assertEquals(101, it.next());
+    assertFalse(it.hasNext());
+    assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+    
+  }
+  
+  public void testContains() {
+    ibs = new IntBitSet();
+    ibs.setOffset(1000);
+    
+    ibs.add(1015);
+    ibs.add(1188);
+    assertTrue(ibs.contains(1015));
+    assertFalse(ibs.contains(1187));
+    assertFalse(ibs.contains(1189));
+    assertTrue(ibs.contains(1188));
+    assertEquals(3*64, ibs.getSpaceUsed_in_bits());
+    assertEquals(6, ibs.getSpaceUsed_in_words());
+    assertEquals(2,ibs.size());
+    
+  }
+}

Added: 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java?rev=1633890&view=auto
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
 (added)
+++ 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/IntHashSetTest.java
 Thu Oct 23 17:22:12 2014
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.internal.util;
+
+import java.util.Arrays;
+
+import junit.framework.TestCase;
+
+
+public class IntHashSetTest extends TestCase {
+  
+  IntHashSet ihs;
+  
+  public void setUp() {
+    ihs = new IntHashSet();
+  }
+
+  public void testBasic() {
+    
+    ihs.add(15);
+    ihs.add(188);
+    int[] sv = getSortedValues(ihs);
+    assertEquals(2, sv.length);
+    assertEquals(15, sv[0]);
+    assertEquals(188, sv[1]);
+    assertEquals(15, ihs.getMostNegative());
+    assertEquals(188, ihs.getMostPositive());
+    
+    // test most positive / negative
+    ihs.clear();
+    ihs.add(189);
+    assertEquals(189, ihs.getMostNegative());
+    assertEquals(189, ihs.getMostPositive());
+    ihs.add(1000);
+    ihs.add(-1000);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(500);
+    ihs.add(-500);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.remove(1000);
+    assertEquals(1000, ihs.getMostPositive());
+    assertEquals(-1000, ihs.getMostNegative());
+    ihs.add(1001);    
+    assertEquals(1001, ihs.getMostPositive());
+  }
+  
+  private int[] getSortedValues(IntHashSet s) {
+    IntListIterator it = s.getIterator();
+    int[] r = new int[s.size()];
+    int i = 0;
+    while (it.hasNext()) {
+      r[i++] = it.next();
+    }
+    Arrays.sort(r);
+    return r;
+  }
+  
+  public void testContains() {
+    ihs.add(1188);
+    ihs.add(1040);
+    assertTrue(ihs.contains(1188));
+    assertTrue(ihs.contains(1040));
+    assertFalse(ihs.contains(0));
+    assertFalse(ihs.contains(99));
+    
+  }
+}

Modified: 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java?rev=1633890&r1=1633889&r2=1633890&view=diff
==============================================================================
--- 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
 (original)
+++ 
uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/internal/util/PositiveIntSetTest.java
 Thu Oct 23 17:22:12 2014
@@ -37,23 +37,101 @@ public class PositiveIntSetTest extends 
   public void testBasic() {
     PositiveIntSet s = new PositiveIntSet();
     s.add(128);
+    assertTrue(s.isBitSet);
+    s.add(128128);
     assertFalse(s.isBitSet);
     
-    for (int i = 1; i < 10; i++) {
+    IntListIterator it = s.getOrderedIterator();
+    assertTrue(it.hasNext());
+    assertEquals(128, it.next());
+    assertTrue(it.hasNext());
+    assertEquals(128128, it.next());
+    assertFalse(it.hasNext());
+
+    // test offset
+    int bb = 300000;
+    s = new PositiveIntSet();
+    assertTrue(s.useOffset);
+    s.add(bb);
+    s.add(bb);
+    s.add(bb+1);
+    s.add(bb+2);
+    
+    assertEquals(3, s.size());
+    assertTrue(s.isBitSet);
+    assertTrue(s.useOffset);
+    
+    // test offset converting to hashset
+    s.add(bb - 66);
+    assertEquals(4, s.size());
+    assertFalse(s.isBitSet);
+    it = s.getOrderedIterator();
+    assertEquals(bb-66, it.next());
+    assertEquals(bb, it.next());
+    assertEquals(bb+1, it.next());
+    assertEquals(bb+2, it.next());
+
+    
+    bb = 67;
+    s = new PositiveIntSet();
+    assertTrue(s.useOffset);
+    s.add(bb);
+    s.add(bb);
+    s.add(bb+1);
+    s.add(bb+2);
+    
+    assertEquals(3, s.size());
+    assertTrue(s.isBitSet);
+    assertTrue(s.useOffset);
+    // test offset converting to bitset with no offset    
+    s.add(bb - 66);
+    assertEquals(4, s.size());
+    assertTrue(s.isBitSet);
+    assertFalse(s.useOffset);
+    it = s.getOrderedIterator();
+    assertEquals(bb-66, it.next());
+    assertEquals(bb, it.next());
+    assertEquals(bb+1, it.next());
+    assertEquals(bb+2, it.next());
+    
+    // test switch from hash set to bit set
+    s.clear();  // keeps useOffset false
+    s.add(767 - 64);  // makes the space used by bit set = 25 words
+    
+    for (int i = 1; i < 13; i++) {
       s.add(i);
-      assertTrue("i is " + i, (i < 4) ? (!s.isBitSet) : s.isBitSet);
+//      System.out.println("i is " + i + ", isBitSet = " + s.isBitSet);
+      assertTrue("i is " + i, (i < 12) ? (!s.isBitSet) : s.isBitSet);
     }
     
-    for (int i = 10; i < 10000; i = i <<1) {
+    it = s.getOrderedIterator();
+    assertEquals(1,it.next());
+    assertEquals(2,it.next());
+    assertEquals(3,it.next());
+    assertEquals(4,it.next());
+    assertEquals(5,it.next());
+    assertEquals(6,it.next());
+    assertEquals(7,it.next());
+    assertEquals(8,it.next());
+    assertEquals(9,it.next());
+    assertEquals(10,it.next());
+    assertEquals(11,it.next());
+    assertEquals(12,it.next());
+    assertEquals(767-64,it.next());
+    
+    boolean reached = false;
+    for (int i = 10; i < 5122; i = i <<1) {
       s.add(i);  // switches to hash set when i = 2560. == 1010 0000 0000  
(>>5 = 101 0000 = 80 (decimal)
                  // hash set size for 19 entries = 19 * 3 = 57
                  // bit set size for 2560 = 80 words
                  // bit set size for prev i (1280 dec) = 101 0000 0000 (>>5 = 
10 1000) = 40 words
 //      if (!s.isBitSet) {
-//        System.out.println("# of entries is " + s.size());
+//        System.out.println("is Bit set? " + s.isBitSet + ", # of entries is 
" + s.size());
 //      }
-      assertTrue((i < 2560) ? s.isBitSet : !s.isBitSet);
+      reached = i >= 5120;
+      assertTrue((!reached) ? s.isBitSet : !s.isBitSet);
     }
+    assertTrue(reached);
   }
   
   public void testiterators() {
@@ -77,4 +155,5 @@ public class PositiveIntSetTest extends 
     
   }
   
+  
 }


Reply via email to