OK I'll switch to BitSet.

Mike McCandless

http://blog.mikemccandless.com


On Wed, Jun 18, 2014 at 11:44 AM, Uwe Schindler <[email protected]> wrote:
> Why not use java.util.BitSet here? It automatically oversizes!
>
> -----
> Uwe Schindler
> H.-H.-Meier-Allee 63, D-28213 Bremen
> http://www.thetaphi.de
> eMail: [email protected]
>
>
>> -----Original Message-----
>> From: [email protected] [mailto:[email protected]]
>> Sent: Wednesday, June 18, 2014 5:42 PM
>> To: [email protected]
>> Subject: svn commit: r1603489 - in
>> /lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene
>> /util/automaton: Automaton.java Operations.java
>>
>> Author: mikemccand
>> Date: Wed Jun 18 15:41:49 2014
>> New Revision: 1603489
>>
>> URL: http://svn.apache.org/r1603489
>> Log:
>> LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve
>> javadocs
>>
>> Modified:
>>
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Automaton.java
>>
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Operations.java
>>
>> Modified:
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Automaton.java
>> URL:
>> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor
>> e/src/java/org/apache/lucene/util/automaton/Automaton.java?rev=160348
>> 9&r1=1603488&r2=1603489&view=diff
>> ==========================================================
>> ====================
>> ---
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Automaton.java (original)
>> +++
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen
>> +++ e/util/automaton/Automaton.java Wed Jun 18 15:41:49 2014
>> @@ -24,6 +24,7 @@ import java.util.HashSet;  import java.util.Set;
>>
>>  import org.apache.lucene.util.ArrayUtil;
>> +import org.apache.lucene.util.FixedBitSet;
>>  import org.apache.lucene.util.InPlaceMergeSorter;
>>  import org.apache.lucene.util.Sorter;
>>
>> @@ -71,7 +72,7 @@ public class Automaton {
>>    /** Holds toState, min, max for each transition. */
>>    private int[] transitions = new int[6];
>>
>> -  private final Set<Integer> acceptStates = new HashSet<Integer>();
>> +  private FixedBitSet isAccept = new FixedBitSet(4);
>>
>>    /** True if no state has two transitions leaving with the same label. */
>>    private boolean deterministic = true; @@ -86,18 +87,23 @@ public class
>> Automaton {
>>      int state = nextState/2;
>>      states[nextState] = -1;
>>      nextState += 2;
>> +    if (state >= isAccept.length()) {
>> +      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1));
>> +      newBits.or(isAccept);
>> +      isAccept = newBits;
>> +    }
>>      return state;
>>    }
>>
>>    /** Set or clear this state as an accept state. */
>> -  public void setAccept(int state, boolean isAccept) {
>> +  public void setAccept(int state, boolean accept) {
>>      if (state >= getNumStates()) {
>>        throw new IllegalArgumentException("state=" + state + " is out of 
>> bounds
>> (numStates=" + getNumStates() + ")");
>>      }
>> -    if (isAccept) {
>> -      acceptStates.add(state);
>> +    if (accept) {
>> +      isAccept.set(state);
>>      } else {
>> -      acceptStates.remove(state);
>> +      isAccept.clear(state);
>>      }
>>    }
>>
>> @@ -119,14 +125,14 @@ public class Automaton {
>>      return transitions;
>>    }
>>
>> -  /** Returns accept states. */
>> -  public Set<Integer> getAcceptStates() {
>> -    return acceptStates;
>> +  /** Returns accept states.  If the bit is set then that state is an
>> + accept state. */  FixedBitSet getAcceptStates() {
>> +    return isAccept;
>>    }
>>
>>    /** Returns true if this state is an accept state. */
>>    public boolean isAccept(int state) {
>> -    return acceptStates.contains(state);
>> +    return isAccept.get(state);
>>    }
>>
>>    /** Add a new transition with min = max = label. */ @@ -195,12 +201,19
>> @@ public class Automaton {
>>        if (states[nextState+i] != -1) {
>>          states[nextState+i] += nextTransition;
>>        }
>> -      int state = i/2;
>>      }
>>      nextState += other.nextState;
>> -
>> -    for(int s : other.getAcceptStates()) {
>> -      setAccept(stateOffset+s, true);
>> +    if (isAccept.length() < nextState/2) {
>> +      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2,
>> 1));
>> +      newBits.or(isAccept);
>> +      isAccept = newBits;
>> +    }
>> +    int otherNumStates = other.getNumStates();
>> +    FixedBitSet otherAcceptStates = other.getAcceptStates();
>> +    int state = 0;
>> +    while (state < otherNumStates && (state =
>> otherAcceptStates.nextSetBit(state)) != -1) {
>> +      setAccept(stateOffset + state, true);
>> +      state++;
>>      }
>>
>>      // Bulk copy and then fixup dest for each transition:
>>
>> Modified:
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Operations.java
>> URL:
>> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor
>> e/src/java/org/apache/lucene/util/automaton/Operations.java?rev=160348
>> 9&r1=1603488&r2=1603489&view=diff
>> ==========================================================
>> ====================
>> ---
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
>> util/automaton/Operations.java (original)
>> +++
>> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen
>> +++ e/util/automaton/Operations.java Wed Jun 18 15:41:49 2014
>> @@ -42,6 +42,7 @@ import java.util.Set;
>>
>>  import org.apache.lucene.util.ArrayUtil;  import
>> org.apache.lucene.util.BytesRef;
>> +import org.apache.lucene.util.FixedBitSet;
>>  import org.apache.lucene.util.IntsRef;
>>  import org.apache.lucene.util.RamUsageEstimator;
>>
>> @@ -239,7 +240,7 @@ final public class Operations {
>>        b = concatenate(as);
>>      }
>>
>> -    Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
>> +    Set<Integer> prevAcceptStates = toSet(b, 0);
>>
>>      for(int i=min;i<max;i++) {
>>        int numStates = b.getNumStates(); @@ -247,16 +248,26 @@ final public
>> class Operations {
>>        for(int s : prevAcceptStates) {
>>          b.addEpsilon(s, numStates);
>>        }
>> -      prevAcceptStates.clear();
>> -      for(int s : a.getAcceptStates()) {
>> -        prevAcceptStates.add(numStates+s);
>> -      }
>> +      prevAcceptStates = toSet(a, numStates);
>>      }
>>
>>      b.finishState();
>>
>>      return b;
>>    }
>> +
>> +  private static Set<Integer> toSet(Automaton a, int offset) {
>> +    int numStates = a.getNumStates();
>> +    FixedBitSet isAccept = a.getAcceptStates();
>> +    Set<Integer> result = new HashSet<Integer>();
>> +    int upto = 0;
>> +    while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
>> +      result.add(offset+upto);
>> +      upto++;
>> +    }
>> +
>> +    return result;
>> +  }
>>
>>    /**
>>     * Returns a (deterministic) automaton that accepts the complement of the
>> @@ -913,13 +924,16 @@ final public class Operations {
>>
>>      LinkedList<Integer> workList = new LinkedList<>();
>>      BitSet live = new BitSet(numStates);
>> -    for (int s : a.getAcceptStates()) {
>> +    FixedBitSet acceptBits = a.getAcceptStates();
>> +    int s = 0;
>> +    while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
>>        live.set(s);
>>        workList.add(s);
>> +      s++;
>>      }
>>
>>      while (workList.isEmpty() == false) {
>> -      int s = workList.removeFirst();
>> +      s = workList.removeFirst();
>>        int count = a2.initTransition(s, t);
>>        for(int i=0;i<count;i++) {
>>          a2.getNextTransition(t);
>> @@ -971,6 +985,7 @@ final public class Operations {
>>      assert hasDeadStates(result) == false;
>>      return result;
>>    }
>> +
>>    /**
>>     * Finds the largest entry whose value is less than or equal to c, or 0 if
>>     * there is no such entry.
>> @@ -988,7 +1003,8 @@ final public class Operations {
>>    }
>>
>>    /**
>> -   * Returns true if the language of this automaton is finite.
>> +   * Returns true if the language of this automaton is finite.  The
>> +   * automaton must not have any dead states.
>>     */
>>    public static boolean isFinite(Automaton a) {
>>      if (a.getNumStates() == 0) {
>> @@ -998,7 +1014,7 @@ final public class Operations {
>>    }
>>
>>    /**
>> -   * Checks whether there is a loop containing s. (This is sufficient since
>> +   * Checks whether there is a loop containing state. (This is
>> + sufficient since
>>     * there are never transitions to dead states.)
>>     */
>>    // TODO: not great that this is recursive... in theory a @@ -1144,12 
>> +1160,14
>> @@ final public class Operations {
>>
>>      Automaton result = builder.finish();
>>
>> -    for(int s : a.getAcceptStates()) {
>> -      assert s < numStates;
>> +    int s = 0;
>> +    FixedBitSet acceptStates = a.getAcceptStates();
>> +    while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
>>        result.addEpsilon(0, s+1);
>>        if (initialStates != null) {
>>          initialStates.add(s+1);
>>        }
>> +      s++;
>>      }
>>
>>      result.finishState();
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: [email protected]
> For additional commands, e-mail: [email protected]
>

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to