OK I'll switch to BitSet. Mike McCandless
http://blog.mikemccandless.com On Wed, Jun 18, 2014 at 11:44 AM, Uwe Schindler <[email protected]> wrote: > Why not use java.util.BitSet here? It automatically oversizes! > > ----- > Uwe Schindler > H.-H.-Meier-Allee 63, D-28213 Bremen > http://www.thetaphi.de > eMail: [email protected] > > >> -----Original Message----- >> From: [email protected] [mailto:[email protected]] >> Sent: Wednesday, June 18, 2014 5:42 PM >> To: [email protected] >> Subject: svn commit: r1603489 - in >> /lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene >> /util/automaton: Automaton.java Operations.java >> >> Author: mikemccand >> Date: Wed Jun 18 15:41:49 2014 >> New Revision: 1603489 >> >> URL: http://svn.apache.org/r1603489 >> Log: >> LUCENE-5752: cutover to FixedBitSet to mark the accept states; improve >> javadocs >> >> Modified: >> >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Automaton.java >> >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Operations.java >> >> Modified: >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Automaton.java >> URL: >> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor >> e/src/java/org/apache/lucene/util/automaton/Automaton.java?rev=160348 >> 9&r1=1603488&r2=1603489&view=diff >> ========================================================== >> ==================== >> --- >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Automaton.java (original) >> +++ >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen >> +++ e/util/automaton/Automaton.java Wed Jun 18 15:41:49 2014 >> @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.Set; >> >> import org.apache.lucene.util.ArrayUtil; >> +import org.apache.lucene.util.FixedBitSet; >> import org.apache.lucene.util.InPlaceMergeSorter; >> import org.apache.lucene.util.Sorter; >> >> @@ -71,7 +72,7 @@ public class Automaton { >> /** Holds toState, min, max for each transition. */ >> private int[] transitions = new int[6]; >> >> - private final Set<Integer> acceptStates = new HashSet<Integer>(); >> + private FixedBitSet isAccept = new FixedBitSet(4); >> >> /** True if no state has two transitions leaving with the same label. */ >> private boolean deterministic = true; @@ -86,18 +87,23 @@ public class >> Automaton { >> int state = nextState/2; >> states[nextState] = -1; >> nextState += 2; >> + if (state >= isAccept.length()) { >> + FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(state+1, 1)); >> + newBits.or(isAccept); >> + isAccept = newBits; >> + } >> return state; >> } >> >> /** Set or clear this state as an accept state. */ >> - public void setAccept(int state, boolean isAccept) { >> + public void setAccept(int state, boolean accept) { >> if (state >= getNumStates()) { >> throw new IllegalArgumentException("state=" + state + " is out of >> bounds >> (numStates=" + getNumStates() + ")"); >> } >> - if (isAccept) { >> - acceptStates.add(state); >> + if (accept) { >> + isAccept.set(state); >> } else { >> - acceptStates.remove(state); >> + isAccept.clear(state); >> } >> } >> >> @@ -119,14 +125,14 @@ public class Automaton { >> return transitions; >> } >> >> - /** Returns accept states. */ >> - public Set<Integer> getAcceptStates() { >> - return acceptStates; >> + /** Returns accept states. If the bit is set then that state is an >> + accept state. */ FixedBitSet getAcceptStates() { >> + return isAccept; >> } >> >> /** Returns true if this state is an accept state. */ >> public boolean isAccept(int state) { >> - return acceptStates.contains(state); >> + return isAccept.get(state); >> } >> >> /** Add a new transition with min = max = label. */ @@ -195,12 +201,19 >> @@ public class Automaton { >> if (states[nextState+i] != -1) { >> states[nextState+i] += nextTransition; >> } >> - int state = i/2; >> } >> nextState += other.nextState; >> - >> - for(int s : other.getAcceptStates()) { >> - setAccept(stateOffset+s, true); >> + if (isAccept.length() < nextState/2) { >> + FixedBitSet newBits = new FixedBitSet(ArrayUtil.oversize(nextState/2, >> 1)); >> + newBits.or(isAccept); >> + isAccept = newBits; >> + } >> + int otherNumStates = other.getNumStates(); >> + FixedBitSet otherAcceptStates = other.getAcceptStates(); >> + int state = 0; >> + while (state < otherNumStates && (state = >> otherAcceptStates.nextSetBit(state)) != -1) { >> + setAccept(stateOffset + state, true); >> + state++; >> } >> >> // Bulk copy and then fixup dest for each transition: >> >> Modified: >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Operations.java >> URL: >> http://svn.apache.org/viewvc/lucene/dev/branches/lucene5752/lucene/cor >> e/src/java/org/apache/lucene/util/automaton/Operations.java?rev=160348 >> 9&r1=1603488&r2=1603489&view=diff >> ========================================================== >> ==================== >> --- >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucene/ >> util/automaton/Operations.java (original) >> +++ >> lucene/dev/branches/lucene5752/lucene/core/src/java/org/apache/lucen >> +++ e/util/automaton/Operations.java Wed Jun 18 15:41:49 2014 >> @@ -42,6 +42,7 @@ import java.util.Set; >> >> import org.apache.lucene.util.ArrayUtil; import >> org.apache.lucene.util.BytesRef; >> +import org.apache.lucene.util.FixedBitSet; >> import org.apache.lucene.util.IntsRef; >> import org.apache.lucene.util.RamUsageEstimator; >> >> @@ -239,7 +240,7 @@ final public class Operations { >> b = concatenate(as); >> } >> >> - Set<Integer> prevAcceptStates = new HashSet<>(b.getAcceptStates()); >> + Set<Integer> prevAcceptStates = toSet(b, 0); >> >> for(int i=min;i<max;i++) { >> int numStates = b.getNumStates(); @@ -247,16 +248,26 @@ final public >> class Operations { >> for(int s : prevAcceptStates) { >> b.addEpsilon(s, numStates); >> } >> - prevAcceptStates.clear(); >> - for(int s : a.getAcceptStates()) { >> - prevAcceptStates.add(numStates+s); >> - } >> + prevAcceptStates = toSet(a, numStates); >> } >> >> b.finishState(); >> >> return b; >> } >> + >> + private static Set<Integer> toSet(Automaton a, int offset) { >> + int numStates = a.getNumStates(); >> + FixedBitSet isAccept = a.getAcceptStates(); >> + Set<Integer> result = new HashSet<Integer>(); >> + int upto = 0; >> + while (upto < numStates && (upto = isAccept.nextSetBit(upto)) != -1) { >> + result.add(offset+upto); >> + upto++; >> + } >> + >> + return result; >> + } >> >> /** >> * Returns a (deterministic) automaton that accepts the complement of the >> @@ -913,13 +924,16 @@ final public class Operations { >> >> LinkedList<Integer> workList = new LinkedList<>(); >> BitSet live = new BitSet(numStates); >> - for (int s : a.getAcceptStates()) { >> + FixedBitSet acceptBits = a.getAcceptStates(); >> + int s = 0; >> + while (s < numStates && (s = acceptBits.nextSetBit(s)) != -1) { >> live.set(s); >> workList.add(s); >> + s++; >> } >> >> while (workList.isEmpty() == false) { >> - int s = workList.removeFirst(); >> + s = workList.removeFirst(); >> int count = a2.initTransition(s, t); >> for(int i=0;i<count;i++) { >> a2.getNextTransition(t); >> @@ -971,6 +985,7 @@ final public class Operations { >> assert hasDeadStates(result) == false; >> return result; >> } >> + >> /** >> * Finds the largest entry whose value is less than or equal to c, or 0 if >> * there is no such entry. >> @@ -988,7 +1003,8 @@ final public class Operations { >> } >> >> /** >> - * Returns true if the language of this automaton is finite. >> + * Returns true if the language of this automaton is finite. The >> + * automaton must not have any dead states. >> */ >> public static boolean isFinite(Automaton a) { >> if (a.getNumStates() == 0) { >> @@ -998,7 +1014,7 @@ final public class Operations { >> } >> >> /** >> - * Checks whether there is a loop containing s. (This is sufficient since >> + * Checks whether there is a loop containing state. (This is >> + sufficient since >> * there are never transitions to dead states.) >> */ >> // TODO: not great that this is recursive... in theory a @@ -1144,12 >> +1160,14 >> @@ final public class Operations { >> >> Automaton result = builder.finish(); >> >> - for(int s : a.getAcceptStates()) { >> - assert s < numStates; >> + int s = 0; >> + FixedBitSet acceptStates = a.getAcceptStates(); >> + while (s < numStates && (s = acceptStates.nextSetBit(s)) != -1) { >> result.addEpsilon(0, s+1); >> if (initialStates != null) { >> initialStates.add(s+1); >> } >> + s++; >> } >> >> result.finishState(); > > > > --------------------------------------------------------------------- > To unsubscribe, e-mail: [email protected] > For additional commands, e-mail: [email protected] > --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
