Why not use java.util.BitSet here? It automatically oversizes! ----- Uwe Schindler H.-H.-Meier-Allee 63, D-28213 Bremen http://www.thetaphi.de eMail: [email protected]
> -----Original Message----- > From: [email protected] [mailto:[email protected]] > Sent: Wednesday, June 18, 2014 5:42 PM > To: [email protected] > Subject: svn commit: r1603489 - in > /lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene > /util/automaton: Automaton.java Operations.java > > Author: mikemccand > Date: Wed Jun 18 15:41:49 2014 > New Revision: 1603489 > > URL: http://svn.apache.org/r1603489 > Log: > LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve > javadocs > > Modified: > > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Automaton.java > > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Operations.java > > Modified: > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Automaton.java > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor > e/src/java/org/apache/lucene/util/automaton/Automaton.java?rev=160348 > 9&r1=1603488&r2=1603489&view=diff > ========================================================== > ==================== > --- > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Automaton.java (original) > +++ > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen > +++ e/util/automaton/Automaton.java Wed Jun 18 15:41:49 2014 > @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.Set; > > import org.apache.lucene.util.ArrayUtil; > +import org.apache.lucene.util.FixedBitSet; > import org.apache.lucene.util.InPlaceMergeSorter; > import org.apache.lucene.util.Sorter; > > @@ -71,7 +72,7 @@ public class Automaton { > /** Holds toState, min, max for each transition. */ > private int[] transitions = new int[6]; > > - private final Set<Integer> acceptStates = new HashSet<Integer>(); > + private FixedBitSet isAccept = new FixedBitSet(4); > > /** True if no state has two transitions leaving with the same label. */ > private boolean deterministic = true; @@ -86,18 +87,23 @@ public class > Automaton { > int state = nextState/2; > states[nextState] = -1; > nextState += 2; > + if (state >= isAccept.length()) { > + FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1)); > + newBits.or(isAccept); > + isAccept = newBits; > + } > return state; > } > > /** Set or clear this state as an accept state. */ > - public void setAccept(int state, boolean isAccept) { > + public void setAccept(int state, boolean accept) { > if (state >= getNumStates()) { > throw new IllegalArgumentException("state=" + state + " is out of > bounds > (numStates=" + getNumStates() + ")"); > } > - if (isAccept) { > - acceptStates.add(state); > + if (accept) { > + isAccept.set(state); > } else { > - acceptStates.remove(state); > + isAccept.clear(state); > } > } > > @@ -119,14 +125,14 @@ public class Automaton { > return transitions; > } > > - /** Returns accept states. */ > - public Set<Integer> getAcceptStates() { > - return acceptStates; > + /** Returns accept states. If the bit is set then that state is an > + accept state. */ FixedBitSet getAcceptStates() { > + return isAccept; > } > > /** Returns true if this state is an accept state. */ > public boolean isAccept(int state) { > - return acceptStates.contains(state); > + return isAccept.get(state); > } > > /** Add a new transition with min = max = label. */ @@ -195,12 +201,19 > @@ public class Automaton { > if (states[nextState+i] != -1) { > states[nextState+i] += nextTransition; > } > - int state = i/2; > } > nextState += other.nextState; > - > - for(int s : other.getAcceptStates()) { > - setAccept(stateOffset+s, true); > + if (isAccept.length() < nextState/2) { > + FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2, > 1)); > + newBits.or(isAccept); > + isAccept = newBits; > + } > + int otherNumStates = other.getNumStates(); > + FixedBitSet otherAcceptStates = other.getAcceptStates(); > + int state = 0; > + while (state < otherNumStates && (state = > otherAcceptStates.nextSetBit(state)) != -1) { > + setAccept(stateOffset + state, true); > + state++; > } > > // Bulk copy and then fixup dest for each transition: > > Modified: > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Operations.java > URL: > http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor > e/src/java/org/apache/lucene/util/automaton/Operations.java?rev=160348 > 9&r1=1603488&r2=1603489&view=diff > ========================================================== > ==================== > --- > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ > util/automaton/Operations.java (original) > +++ > lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen > +++ e/util/automaton/Operations.java Wed Jun 18 15:41:49 2014 > @@ -42,6 +42,7 @@ import java.util.Set; > > import org.apache.lucene.util.ArrayUtil; import > org.apache.lucene.util.BytesRef; > +import org.apache.lucene.util.FixedBitSet; > import org.apache.lucene.util.IntsRef; > import org.apache.lucene.util.RamUsageEstimator; > > @@ -239,7 +240,7 @@ final public class Operations { > b = concatenate(as); > } > > - Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates()); > + Set<Integer> prevAcceptStates = toSet(b, 0); > > for(int i=min;i<max;i++) { > int numStates = b.getNumStates(); @@ -247,16 +248,26 @@ final public > class Operations { > for(int s : prevAcceptStates) { > b.addEpsilon(s, numStates); > } > - prevAcceptStates.clear(); > - for(int s : a.getAcceptStates()) { > - prevAcceptStates.add(numStates+s); > - } > + prevAcceptStates = toSet(a, numStates); > } > > b.finishState(); > > return b; > } > + > + private static Set<Integer> toSet(Automaton a, int offset) { > + int numStates = a.getNumStates(); > + FixedBitSet isAccept = a.getAcceptStates(); > + Set<Integer> result = new HashSet<Integer>(); > + int upto = 0; > + while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) { > + result.add(offset+upto); > + upto++; > + } > + > + return result; > + } > > /** > * Returns a (deterministic) automaton that accepts the complement of the > @@ -913,13 +924,16 @@ final public class Operations { > > LinkedList<Integer> workList = new LinkedList<>(); > BitSet live = new BitSet(numStates); > - for (int s : a.getAcceptStates()) { > + FixedBitSet acceptBits = a.getAcceptStates(); > + int s = 0; > + while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) { > live.set(s); > workList.add(s); > + s++; > } > > while (workList.isEmpty() == false) { > - int s = workList.removeFirst(); > + s = workList.removeFirst(); > int count = a2.initTransition(s, t); > for(int i=0;i<count;i++) { > a2.getNextTransition(t); > @@ -971,6 +985,7 @@ final public class Operations { > assert hasDeadStates(result) == false; > return result; > } > + > /** > * Finds the largest entry whose value is less than or equal to c, or 0 if > * there is no such entry. > @@ -988,7 +1003,8 @@ final public class Operations { > } > > /** > - * Returns true if the language of this automaton is finite. > + * Returns true if the language of this automaton is finite. The > + * automaton must not have any dead states. > */ > public static boolean isFinite(Automaton a) { > if (a.getNumStates() == 0) { > @@ -998,7 +1014,7 @@ final public class Operations { > } > > /** > - * Checks whether there is a loop containing s. (This is sufficient since > + * Checks whether there is a loop containing state. (This is > + sufficient since > * there are never transitions to dead states.) > */ > // TODO: not great that this is recursive... in theory a @@ -1144,12 > +1160,14 > @@ final public class Operations { > > Automaton result = builder.finish(); > > - for(int s : a.getAcceptStates()) { > - assert s < numStates; > + int s = 0; > + FixedBitSet acceptStates = a.getAcceptStates(); > + while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) { > result.addEpsilon(0, s+1); > if (initialStates != null) { > initialStates.add(s+1); > } > + s++; > } > > result.finishState(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
