Why not use java.util.BitSet here? It automatically oversizes!

-----
Uwe Schindler
H.-H.-Meier-Allee 63, D-28213 Bremen
http://www.thetaphi.de
eMail: [email protected]


> -----Original Message-----
> From: [email protected] [mailto:[email protected]]
> Sent: Wednesday, June 18, 2014 5:42 PM
> To: [email protected]
> Subject: svn commit: r1603489 - in
> /lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene
> /util/automaton: Automaton.java Operations.java
> 
> Author: mikemccand
> Date: Wed Jun 18 15:41:49 2014
> New Revision: 1603489
> 
> URL: http://svn.apache.org/r1603489
> Log:
> LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve
> javadocs
> 
> Modified:
> 
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Automaton.java
> 
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Operations.java
> 
> Modified:
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Automaton.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor
> e/src/java/org/apache/lucene/util/automaton/Automaton.java?rev=160348
> 9&r1=1603488&r2=1603489&view=diff
> ==========================================================
> ====================
> ---
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Automaton.java (original)
> +++
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen
> +++ e/util/automaton/Automaton.java Wed Jun 18 15:41:49 2014
> @@ -24,6 +24,7 @@ import java.util.HashSet;  import java.util.Set;
> 
>  import org.apache.lucene.util.ArrayUtil;
> +import org.apache.lucene.util.FixedBitSet;
>  import org.apache.lucene.util.InPlaceMergeSorter;
>  import org.apache.lucene.util.Sorter;
> 
> @@ -71,7 +72,7 @@ public class Automaton {
>    /** Holds toState, min, max for each transition. */
>    private int[] transitions = new int[6];
> 
> -  private final Set<Integer> acceptStates = new HashSet<Integer>();
> +  private FixedBitSet isAccept = new FixedBitSet(4);
> 
>    /** True if no state has two transitions leaving with the same label. */
>    private boolean deterministic = true; @@ -86,18 +87,23 @@ public class
> Automaton {
>      int state = nextState/2;
>      states[nextState] = -1;
>      nextState += 2;
> +    if (state >= isAccept.length()) {
> +      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1));
> +      newBits.or(isAccept);
> +      isAccept = newBits;
> +    }
>      return state;
>    }
> 
>    /** Set or clear this state as an accept state. */
> -  public void setAccept(int state, boolean isAccept) {
> +  public void setAccept(int state, boolean accept) {
>      if (state >= getNumStates()) {
>        throw new IllegalArgumentException("state=" + state + " is out of 
> bounds
> (numStates=" + getNumStates() + ")");
>      }
> -    if (isAccept) {
> -      acceptStates.add(state);
> +    if (accept) {
> +      isAccept.set(state);
>      } else {
> -      acceptStates.remove(state);
> +      isAccept.clear(state);
>      }
>    }
> 
> @@ -119,14 +125,14 @@ public class Automaton {
>      return transitions;
>    }
> 
> -  /** Returns accept states. */
> -  public Set<Integer> getAcceptStates() {
> -    return acceptStates;
> +  /** Returns accept states.  If the bit is set then that state is an
> + accept state. */  FixedBitSet getAcceptStates() {
> +    return isAccept;
>    }
> 
>    /** Returns true if this state is an accept state. */
>    public boolean isAccept(int state) {
> -    return acceptStates.contains(state);
> +    return isAccept.get(state);
>    }
> 
>    /** Add a new transition with min = max = label. */ @@ -195,12 +201,19
> @@ public class Automaton {
>        if (states[nextState+i] != -1) {
>          states[nextState+i] += nextTransition;
>        }
> -      int state = i/2;
>      }
>      nextState += other.nextState;
> -
> -    for(int s : other.getAcceptStates()) {
> -      setAccept(stateOffset+s, true);
> +    if (isAccept.length() < nextState/2) {
> +      FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2,
> 1));
> +      newBits.or(isAccept);
> +      isAccept = newBits;
> +    }
> +    int otherNumStates = other.getNumStates();
> +    FixedBitSet otherAcceptStates = other.getAcceptStates();
> +    int state = 0;
> +    while (state < otherNumStates && (state =
> otherAcceptStates.nextSetBit(state)) != -1) {
> +      setAccept(stateOffset + state, true);
> +      state++;
>      }
> 
>      // Bulk copy and then fixup dest for each transition:
> 
> Modified:
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Operations.java
> URL:
> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor
> e/src/java/org/apache/lucene/util/automaton/Operations.java?rev=160348
> 9&r1=1603488&r2=1603489&view=diff
> ==========================================================
> ====================
> ---
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/
> util/automaton/Operations.java (original)
> +++
> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen
> +++ e/util/automaton/Operations.java Wed Jun 18 15:41:49 2014
> @@ -42,6 +42,7 @@ import java.util.Set;
> 
>  import org.apache.lucene.util.ArrayUtil;  import
> org.apache.lucene.util.BytesRef;
> +import org.apache.lucene.util.FixedBitSet;
>  import org.apache.lucene.util.IntsRef;
>  import org.apache.lucene.util.RamUsageEstimator;
> 
> @@ -239,7 +240,7 @@ final public class Operations {
>        b = concatenate(as);
>      }
> 
> -    Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates());
> +    Set<Integer> prevAcceptStates = toSet(b, 0);
> 
>      for(int i=min;i<max;i++) {
>        int numStates = b.getNumStates(); @@ -247,16 +248,26 @@ final public
> class Operations {
>        for(int s : prevAcceptStates) {
>          b.addEpsilon(s, numStates);
>        }
> -      prevAcceptStates.clear();
> -      for(int s : a.getAcceptStates()) {
> -        prevAcceptStates.add(numStates+s);
> -      }
> +      prevAcceptStates = toSet(a, numStates);
>      }
> 
>      b.finishState();
> 
>      return b;
>    }
> +
> +  private static Set<Integer> toSet(Automaton a, int offset) {
> +    int numStates = a.getNumStates();
> +    FixedBitSet isAccept = a.getAcceptStates();
> +    Set<Integer> result = new HashSet<Integer>();
> +    int upto = 0;
> +    while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) {
> +      result.add(offset+upto);
> +      upto++;
> +    }
> +
> +    return result;
> +  }
> 
>    /**
>     * Returns a (deterministic) automaton that accepts the complement of the
> @@ -913,13 +924,16 @@ final public class Operations {
> 
>      LinkedList<Integer> workList = new LinkedList<>();
>      BitSet live = new BitSet(numStates);
> -    for (int s : a.getAcceptStates()) {
> +    FixedBitSet acceptBits = a.getAcceptStates();
> +    int s = 0;
> +    while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) {
>        live.set(s);
>        workList.add(s);
> +      s++;
>      }
> 
>      while (workList.isEmpty() == false) {
> -      int s = workList.removeFirst();
> +      s = workList.removeFirst();
>        int count = a2.initTransition(s, t);
>        for(int i=0;i<count;i++) {
>          a2.getNextTransition(t);
> @@ -971,6 +985,7 @@ final public class Operations {
>      assert hasDeadStates(result) == false;
>      return result;
>    }
> +
>    /**
>     * Finds the largest entry whose value is less than or equal to c, or 0 if
>     * there is no such entry.
> @@ -988,7 +1003,8 @@ final public class Operations {
>    }
> 
>    /**
> -   * Returns true if the language of this automaton is finite.
> +   * Returns true if the language of this automaton is finite.  The
> +   * automaton must not have any dead states.
>     */
>    public static boolean isFinite(Automaton a) {
>      if (a.getNumStates() == 0) {
> @@ -998,7 +1014,7 @@ final public class Operations {
>    }
> 
>    /**
> -   * Checks whether there is a loop containing s. (This is sufficient since
> +   * Checks whether there is a loop containing state. (This is
> + sufficient since
>     * there are never transitions to dead states.)
>     */
>    // TODO: not great that this is recursive... in theory a @@ -1144,12 
> +1160,14
> @@ final public class Operations {
> 
>      Automaton result = builder.finish();
> 
> -    for(int s : a.getAcceptStates()) {
> -      assert s < numStates;
> +    int s = 0;
> +    FixedBitSet acceptStates = a.getAcceptStates();
> +    while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) {
>        result.addEpsilon(0, s+1);
>        if (initialStates != null) {
>          initialStates.add(s+1);
>        }
> +      s++;
>      }
> 
>      result.finishState();



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to