Lucene.Net.TestFramework: Renamed Util\automaton\ to Util\Automaton\
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/6a55c21f Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/6a55c21f Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/6a55c21f Branch: refs/heads/api-work Commit: 6a55c21f8ea386a68935ca98dc64a60ce03002c3 Parents: 49a0460 Author: Shad Storhaug <[email protected]> Authored: Sun Feb 26 03:39:03 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Mon Feb 27 06:18:00 2017 +0700 ---------------------------------------------------------------------- .../Lucene.Net.TestFramework.csproj | 2 +- .../Util/Automaton/AutomatonTestUtil.cs | 575 +++++++++++++++++++ .../Util/automaton/AutomatonTestUtil.cs | 575 ------------------- 3 files changed, 576 insertions(+), 576 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj index 351f632..788338e 100644 --- a/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj +++ b/src/Lucene.Net.TestFramework/Lucene.Net.TestFramework.csproj @@ -434,7 +434,7 @@ <SubType>Code</SubType> </Compile> <Compile Include="Util\ApiScanTestBase.cs" /> - <Compile Include="Util\automaton\AutomatonTestUtil.cs"> + <Compile Include="Util\Automaton\AutomatonTestUtil.cs"> <SubType>Code</SubType> </Compile> <Compile Include="Util\BaseDocIdSetTestCase.cs"> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs b/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs new file mode 100644 index 0000000..2c0e1f9 --- /dev/null +++ b/src/Lucene.Net.TestFramework/Util/Automaton/AutomatonTestUtil.cs @@ -0,0 +1,575 @@ +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using NUnit.Framework; + +namespace Lucene.Net.Util.Automaton +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Lucene.Net.Randomized.Generators; + + /// <summary> + /// Utilities for testing automata. + /// <p> + /// Capable of generating random regular expressions, + /// and automata, and also provides a number of very + /// basic unoptimized implementations (*slow) for testing. + /// </summary> + public class AutomatonTestUtil + { + /// <summary> + /// Returns random string, including full unicode range. </summary> + public static string RandomRegexp(Random r) + { + while (true) + { + string regexp = RandomRegexpString(r); + // we will also generate some undefined unicode queries + if (!UnicodeUtil.ValidUTF16String(regexp.ToCharArray())) + { + continue; + } + try + { + new RegExp(regexp, RegExp.NONE); + return regexp; + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + } + } + } + + private static string RandomRegexpString(Random r) + { + int end = r.Next(20); + if (end == 0) + { + // allow 0 length + return ""; + } + char[] buffer = new char[end]; + for (int i = 0; i < end; i++) + { + int t = r.Next(15); + if (0 == t && i < end - 1) + { + // Make a surrogate pair + // High surrogate + buffer[i++] = (char)TestUtil.NextInt(r, 0xd800, 0xdbff); + // Low surrogate + buffer[i] = (char)TestUtil.NextInt(r, 0xdc00, 0xdfff); + } + else if (t <= 1) + { + buffer[i] = (char)r.Next(0x80); + } + else if (2 == t) + { + buffer[i] = (char)TestUtil.NextInt(r, 0x80, 0x800); + } + else if (3 == t) + { + buffer[i] = (char)TestUtil.NextInt(r, 0x800, 0xd7ff); + } + else if (4 == t) + { + buffer[i] = (char)TestUtil.NextInt(r, 0xe000, 0xffff); + } + else if (5 == t) + { + buffer[i] = '.'; + } + else if (6 == t) + { + buffer[i] = '?'; + } + else if (7 == t) + { + buffer[i] = '*'; + } + else if (8 == t) + { + buffer[i] = '+'; + } + else if (9 == t) + { + buffer[i] = '('; + } + else if (10 == t) + { + buffer[i] = ')'; + } + else if (11 == t) + { + buffer[i] = '-'; + } + else if (12 == t) + { + buffer[i] = '['; + } + else if (13 == t) + { + buffer[i] = ']'; + } + else if (14 == t) + { + buffer[i] = '|'; + } + } + return new string(buffer, 0, end); + } + + /// <summary> + /// picks a random int code point, avoiding surrogates; + /// throws IllegalArgumentException if this transition only + /// accepts surrogates + /// </summary> + private static int GetRandomCodePoint(Random r, Transition t) + { + int code; + if (t.Max < UnicodeUtil.UNI_SUR_HIGH_START || t.Min > UnicodeUtil.UNI_SUR_HIGH_END) + { + // easy: entire range is before or after surrogates + code = t.Min + r.Next(t.Max - t.Min + 1); + } + else if (t.Min >= UnicodeUtil.UNI_SUR_HIGH_START) + { + if (t.Max > UnicodeUtil.UNI_SUR_LOW_END) + { + // after surrogates + code = 1 + UnicodeUtil.UNI_SUR_LOW_END + r.Next(t.Max - UnicodeUtil.UNI_SUR_LOW_END); + } + else + { + throw new System.ArgumentException("transition accepts only surrogates: " + t); + } + } + else if (t.Max <= UnicodeUtil.UNI_SUR_LOW_END) + { + if (t.Min < UnicodeUtil.UNI_SUR_HIGH_START) + { + // before surrogates + code = t.Min + r.Next(UnicodeUtil.UNI_SUR_HIGH_START - t.Min); + } + else + { + throw new System.ArgumentException("transition accepts only surrogates: " + t); + } + } + else + { + // range includes all surrogates + int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.Min; + int gap2 = t.Max - UnicodeUtil.UNI_SUR_LOW_END; + int c = r.Next(gap1 + gap2); + if (c < gap1) + { + code = t.Min + c; + } + else + { + code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1; + } + } + + Assert.True(code >= t.Min && code <= t.Max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END), "code=" + code + " min=" + t.Min + " max=" + t.Max); + return code; + } + + /// <summary> + /// Lets you retrieve random strings accepted + /// by an Automaton. + /// <p> + /// Once created, call <seealso cref="#getRandomAcceptedString(Random)"/> + /// to get a new string (in UTF-32 codepoints). + /// </summary> + public class RandomAcceptedStrings + { + internal readonly IDictionary<Transition, bool?> LeadsToAccept; + internal readonly Automaton a; + + private class ArrivingTransition + { + internal readonly State From; + internal readonly Transition t; + + public ArrivingTransition(State from, Transition t) + { + this.From = from; + this.t = t; + } + } + + public RandomAcceptedStrings(Automaton a) + { + this.a = a; + if (!String.IsNullOrEmpty(a.Singleton)) + { + LeadsToAccept = null; + return; + } + + // must use IdentityHashmap because two Transitions w/ + // different start nodes can be considered the same + LeadsToAccept = new IdentityHashMap<Transition, bool?>(); + IDictionary<State, IList<ArrivingTransition>> allArriving = new Dictionary<State, IList<ArrivingTransition>>(); + + LinkedList<State> q = new LinkedList<State>(); + HashSet<State> seen = new HashSet<State>(); + + // reverse map the transitions, so we can quickly look + // up all arriving transitions to a given state + foreach (State s in a.GetNumberedStates()) + { + for (int i = 0; i < s.numTransitions; i++) + { + Transition t = s.TransitionsArray[i]; + IList<ArrivingTransition> tl; + allArriving.TryGetValue(t.Dest, out tl); + if (tl == null) + { + tl = new List<ArrivingTransition>(); + allArriving[t.Dest] = tl; + } + tl.Add(new ArrivingTransition(s, t)); + } + if (s.Accept) + { + q.AddLast(s); + seen.Add(s); + } + } + + // Breadth-first search, from accept states, + // backwards: + while (q.Count > 0) + { + State s = q.First.Value; + q.Remove(s); + IList<ArrivingTransition> arriving; + allArriving.TryGetValue(s, out arriving); + if (arriving != null) + { + foreach (ArrivingTransition at in arriving) + { + State from = at.From; + if (!seen.Contains(from)) + { + q.AddLast(from); + seen.Add(from); + LeadsToAccept[at.t] = true; + } + } + } + } + } + + public int[] GetRandomAcceptedString(Random r) + { + IList<int?> soFar = new List<int?>(); + if (a.IsSingleton) + { + // accepts only one + var s = a.Singleton; + + int charUpto = 0; + while (charUpto < s.Length) + { + int cp = Character.CodePointAt(s, charUpto); + charUpto += Character.CharCount(cp); + soFar.Add(cp); + } + } + else + { + var s = a.GetInitialState(); + + while (true) + { + if (s.Accept) + { + if (s.numTransitions == 0) + { + // stop now + break; + } + else + { + if (r.NextBoolean()) + { + break; + } + } + } + + if (s.numTransitions == 0) + { + throw new Exception("this automaton has dead states"); + } + + bool cheat = r.NextBoolean(); + + Transition t; + if (cheat) + { + // pick a transition that we know is the fastest + // path to an accept state + IList<Transition> toAccept = new List<Transition>(); + for (int i = 0; i < s.numTransitions; i++) + { + Transition t0 = s.TransitionsArray[i]; + if (LeadsToAccept.ContainsKey(t0)) + { + toAccept.Add(t0); + } + } + if (toAccept.Count == 0) + { + // this is OK -- it means we jumped into a cycle + t = s.TransitionsArray[r.Next(s.numTransitions)]; + } + else + { + t = toAccept[r.Next(toAccept.Count)]; + } + } + else + { + t = s.TransitionsArray[r.Next(s.numTransitions)]; + } + soFar.Add(GetRandomCodePoint(r, t)); + s = t.Dest; + } + } + + return ArrayUtil.ToInt32Array(soFar); + } + } + + /// <summary> + /// return a random NFA/DFA for testing </summary> + public static Automaton RandomAutomaton(Random random) + { + // get two random Automata from regexps + Automaton a1 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton(); + if (random.NextBoolean()) + { + a1 = BasicOperations.Complement(a1); + } + + Automaton a2 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton(); + if (random.NextBoolean()) + { + a2 = BasicOperations.Complement(a2); + } + + // combine them in random ways + switch (random.Next(4)) + { + case 0: + return BasicOperations.Concatenate(a1, a2); + + case 1: + return BasicOperations.Union(a1, a2); + + case 2: + return BasicOperations.Intersection(a1, a2); + + default: + return BasicOperations.Minus(a1, a2); + } + } + + /// <summary> + /// below are original, unoptimized implementations of DFA operations for testing. + /// These are from brics automaton, full license (BSD) below: + /// </summary> + + /* + * dk.brics.automaton + * + * Copyright (c) 2001-2009 Anders Moeller + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + /// <summary> + /// Simple, original brics implementation of Brzozowski minimize() + /// </summary> + public static void MinimizeSimple(Automaton a) + { + if (!String.IsNullOrEmpty(a.Singleton)) + { + return; + } + DeterminizeSimple(a, SpecialOperations.Reverse(a)); + DeterminizeSimple(a, SpecialOperations.Reverse(a)); + } + + /// <summary> + /// Simple, original brics implementation of determinize() + /// </summary> + public static void DeterminizeSimple(Automaton a) + { + if (a.IsDeterministic || a.IsSingleton) + { + return; + } + HashSet<State> initialset = new ValueHashSet<State>(); + initialset.Add(a.initial); + DeterminizeSimple(a, initialset); + } + + /// <summary> + /// Simple, original brics implementation of determinize() + /// Determinizes the given automaton using the given set of initial states. + /// </summary> + public static void DeterminizeSimple(Automaton a, ISet<State> initialset) + { + int[] points = a.GetStartPoints(); + // subset construction + IDictionary<ISet<State>, ISet<State>> sets = new Dictionary<ISet<State>, ISet<State>>(); + LinkedList<ISet<State>> worklist = new LinkedList<ISet<State>>(); + IDictionary<ISet<State>, State> newstate = new Dictionary<ISet<State>, State>(); + sets[initialset] = initialset; + worklist.AddLast(initialset); + a.initial = new State(); + newstate[initialset] = a.initial; + while (worklist.Count > 0) + { + ISet<State> s = worklist.First.Value; + worklist.Remove(s); + State r = newstate[s]; + foreach (State q in s) + { + if (q.Accept) + { + r.Accept = true; + break; + } + } + for (int n = 0; n < points.Length; n++) + { + ISet<State> p = new ValueHashSet<State>(); + foreach (State q in s) + { + foreach (Transition t in q.GetTransitions()) + { + if (t.Min <= points[n] && points[n] <= t.Max) + { + p.Add(t.to); + } + } + } + if (!sets.ContainsKey(p)) + { + sets[p] = p; + worklist.AddLast(p); + newstate[p] = new State(); + } + State q_ = newstate[p]; + int min = points[n]; + int max; + if (n + 1 < points.Length) + { + max = points[n + 1] - 1; + } + else + { + max = Character.MAX_CODE_POINT; + } + r.AddTransition(new Transition(min, max, q_)); + } + } + a.IsDeterministic = true; + a.ClearNumberedStates(); + a.RemoveDeadTransitions(); + } + + /// <summary> + /// Returns true if the language of this automaton is finite. + /// <p> + /// WARNING: this method is slow, it will blow up if the automaton is large. + /// this is only used to test the correctness of our faster implementation. + /// </summary> + public static bool IsFiniteSlow(Automaton a) + { + if (!String.IsNullOrEmpty(a.Singleton)) + { + return true; + } + return IsFiniteSlow(a.GetInitialState(), new HashSet<State>()); + } + + /// <summary> + /// Checks whether there is a loop containing s. (this is sufficient since + /// there are never transitions to dead states.) + /// </summary> + // TODO: not great that this is recursive... in theory a + // large automata could exceed java's stack + private static bool IsFiniteSlow(State s, HashSet<State> path) + { + path.Add(s); + foreach (Transition t in s.GetTransitions()) + { + if (path.Contains(t.Dest) || !IsFiniteSlow(t.Dest, path)) + { + return false; + } + } + path.Remove(s); + return true; + } + + /// <summary> + /// Checks that an automaton has no detached states that are unreachable + /// from the initial state. + /// </summary> + public static void AssertNoDetachedStates(Automaton a) + { + int numStates = a.GetNumberOfStates(); + a.ClearNumberedStates(); // force recomputation of cached numbered states + Assert.True(numStates == a.GetNumberOfStates(), "automaton has " + (numStates - a.GetNumberOfStates()) + " detached states"); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/6a55c21f/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs b/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs deleted file mode 100644 index 2c0e1f9..0000000 --- a/src/Lucene.Net.TestFramework/Util/automaton/AutomatonTestUtil.cs +++ /dev/null @@ -1,575 +0,0 @@ -using Lucene.Net.Support; -using System; -using System.Collections.Generic; -using NUnit.Framework; - -namespace Lucene.Net.Util.Automaton -{ - /* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - using Lucene.Net.Randomized.Generators; - - /// <summary> - /// Utilities for testing automata. - /// <p> - /// Capable of generating random regular expressions, - /// and automata, and also provides a number of very - /// basic unoptimized implementations (*slow) for testing. - /// </summary> - public class AutomatonTestUtil - { - /// <summary> - /// Returns random string, including full unicode range. </summary> - public static string RandomRegexp(Random r) - { - while (true) - { - string regexp = RandomRegexpString(r); - // we will also generate some undefined unicode queries - if (!UnicodeUtil.ValidUTF16String(regexp.ToCharArray())) - { - continue; - } - try - { - new RegExp(regexp, RegExp.NONE); - return regexp; - } -#pragma warning disable 168 - catch (Exception e) -#pragma warning restore 168 - { - } - } - } - - private static string RandomRegexpString(Random r) - { - int end = r.Next(20); - if (end == 0) - { - // allow 0 length - return ""; - } - char[] buffer = new char[end]; - for (int i = 0; i < end; i++) - { - int t = r.Next(15); - if (0 == t && i < end - 1) - { - // Make a surrogate pair - // High surrogate - buffer[i++] = (char)TestUtil.NextInt(r, 0xd800, 0xdbff); - // Low surrogate - buffer[i] = (char)TestUtil.NextInt(r, 0xdc00, 0xdfff); - } - else if (t <= 1) - { - buffer[i] = (char)r.Next(0x80); - } - else if (2 == t) - { - buffer[i] = (char)TestUtil.NextInt(r, 0x80, 0x800); - } - else if (3 == t) - { - buffer[i] = (char)TestUtil.NextInt(r, 0x800, 0xd7ff); - } - else if (4 == t) - { - buffer[i] = (char)TestUtil.NextInt(r, 0xe000, 0xffff); - } - else if (5 == t) - { - buffer[i] = '.'; - } - else if (6 == t) - { - buffer[i] = '?'; - } - else if (7 == t) - { - buffer[i] = '*'; - } - else if (8 == t) - { - buffer[i] = '+'; - } - else if (9 == t) - { - buffer[i] = '('; - } - else if (10 == t) - { - buffer[i] = ')'; - } - else if (11 == t) - { - buffer[i] = '-'; - } - else if (12 == t) - { - buffer[i] = '['; - } - else if (13 == t) - { - buffer[i] = ']'; - } - else if (14 == t) - { - buffer[i] = '|'; - } - } - return new string(buffer, 0, end); - } - - /// <summary> - /// picks a random int code point, avoiding surrogates; - /// throws IllegalArgumentException if this transition only - /// accepts surrogates - /// </summary> - private static int GetRandomCodePoint(Random r, Transition t) - { - int code; - if (t.Max < UnicodeUtil.UNI_SUR_HIGH_START || t.Min > UnicodeUtil.UNI_SUR_HIGH_END) - { - // easy: entire range is before or after surrogates - code = t.Min + r.Next(t.Max - t.Min + 1); - } - else if (t.Min >= UnicodeUtil.UNI_SUR_HIGH_START) - { - if (t.Max > UnicodeUtil.UNI_SUR_LOW_END) - { - // after surrogates - code = 1 + UnicodeUtil.UNI_SUR_LOW_END + r.Next(t.Max - UnicodeUtil.UNI_SUR_LOW_END); - } - else - { - throw new System.ArgumentException("transition accepts only surrogates: " + t); - } - } - else if (t.Max <= UnicodeUtil.UNI_SUR_LOW_END) - { - if (t.Min < UnicodeUtil.UNI_SUR_HIGH_START) - { - // before surrogates - code = t.Min + r.Next(UnicodeUtil.UNI_SUR_HIGH_START - t.Min); - } - else - { - throw new System.ArgumentException("transition accepts only surrogates: " + t); - } - } - else - { - // range includes all surrogates - int gap1 = UnicodeUtil.UNI_SUR_HIGH_START - t.Min; - int gap2 = t.Max - UnicodeUtil.UNI_SUR_LOW_END; - int c = r.Next(gap1 + gap2); - if (c < gap1) - { - code = t.Min + c; - } - else - { - code = UnicodeUtil.UNI_SUR_LOW_END + c - gap1 + 1; - } - } - - Assert.True(code >= t.Min && code <= t.Max && (code < UnicodeUtil.UNI_SUR_HIGH_START || code > UnicodeUtil.UNI_SUR_LOW_END), "code=" + code + " min=" + t.Min + " max=" + t.Max); - return code; - } - - /// <summary> - /// Lets you retrieve random strings accepted - /// by an Automaton. - /// <p> - /// Once created, call <seealso cref="#getRandomAcceptedString(Random)"/> - /// to get a new string (in UTF-32 codepoints). - /// </summary> - public class RandomAcceptedStrings - { - internal readonly IDictionary<Transition, bool?> LeadsToAccept; - internal readonly Automaton a; - - private class ArrivingTransition - { - internal readonly State From; - internal readonly Transition t; - - public ArrivingTransition(State from, Transition t) - { - this.From = from; - this.t = t; - } - } - - public RandomAcceptedStrings(Automaton a) - { - this.a = a; - if (!String.IsNullOrEmpty(a.Singleton)) - { - LeadsToAccept = null; - return; - } - - // must use IdentityHashmap because two Transitions w/ - // different start nodes can be considered the same - LeadsToAccept = new IdentityHashMap<Transition, bool?>(); - IDictionary<State, IList<ArrivingTransition>> allArriving = new Dictionary<State, IList<ArrivingTransition>>(); - - LinkedList<State> q = new LinkedList<State>(); - HashSet<State> seen = new HashSet<State>(); - - // reverse map the transitions, so we can quickly look - // up all arriving transitions to a given state - foreach (State s in a.GetNumberedStates()) - { - for (int i = 0; i < s.numTransitions; i++) - { - Transition t = s.TransitionsArray[i]; - IList<ArrivingTransition> tl; - allArriving.TryGetValue(t.Dest, out tl); - if (tl == null) - { - tl = new List<ArrivingTransition>(); - allArriving[t.Dest] = tl; - } - tl.Add(new ArrivingTransition(s, t)); - } - if (s.Accept) - { - q.AddLast(s); - seen.Add(s); - } - } - - // Breadth-first search, from accept states, - // backwards: - while (q.Count > 0) - { - State s = q.First.Value; - q.Remove(s); - IList<ArrivingTransition> arriving; - allArriving.TryGetValue(s, out arriving); - if (arriving != null) - { - foreach (ArrivingTransition at in arriving) - { - State from = at.From; - if (!seen.Contains(from)) - { - q.AddLast(from); - seen.Add(from); - LeadsToAccept[at.t] = true; - } - } - } - } - } - - public int[] GetRandomAcceptedString(Random r) - { - IList<int?> soFar = new List<int?>(); - if (a.IsSingleton) - { - // accepts only one - var s = a.Singleton; - - int charUpto = 0; - while (charUpto < s.Length) - { - int cp = Character.CodePointAt(s, charUpto); - charUpto += Character.CharCount(cp); - soFar.Add(cp); - } - } - else - { - var s = a.GetInitialState(); - - while (true) - { - if (s.Accept) - { - if (s.numTransitions == 0) - { - // stop now - break; - } - else - { - if (r.NextBoolean()) - { - break; - } - } - } - - if (s.numTransitions == 0) - { - throw new Exception("this automaton has dead states"); - } - - bool cheat = r.NextBoolean(); - - Transition t; - if (cheat) - { - // pick a transition that we know is the fastest - // path to an accept state - IList<Transition> toAccept = new List<Transition>(); - for (int i = 0; i < s.numTransitions; i++) - { - Transition t0 = s.TransitionsArray[i]; - if (LeadsToAccept.ContainsKey(t0)) - { - toAccept.Add(t0); - } - } - if (toAccept.Count == 0) - { - // this is OK -- it means we jumped into a cycle - t = s.TransitionsArray[r.Next(s.numTransitions)]; - } - else - { - t = toAccept[r.Next(toAccept.Count)]; - } - } - else - { - t = s.TransitionsArray[r.Next(s.numTransitions)]; - } - soFar.Add(GetRandomCodePoint(r, t)); - s = t.Dest; - } - } - - return ArrayUtil.ToInt32Array(soFar); - } - } - - /// <summary> - /// return a random NFA/DFA for testing </summary> - public static Automaton RandomAutomaton(Random random) - { - // get two random Automata from regexps - Automaton a1 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton(); - if (random.NextBoolean()) - { - a1 = BasicOperations.Complement(a1); - } - - Automaton a2 = (new RegExp(AutomatonTestUtil.RandomRegexp(random), RegExp.NONE)).ToAutomaton(); - if (random.NextBoolean()) - { - a2 = BasicOperations.Complement(a2); - } - - // combine them in random ways - switch (random.Next(4)) - { - case 0: - return BasicOperations.Concatenate(a1, a2); - - case 1: - return BasicOperations.Union(a1, a2); - - case 2: - return BasicOperations.Intersection(a1, a2); - - default: - return BasicOperations.Minus(a1, a2); - } - } - - /// <summary> - /// below are original, unoptimized implementations of DFA operations for testing. - /// These are from brics automaton, full license (BSD) below: - /// </summary> - - /* - * dk.brics.automaton - * - * Copyright (c) 2001-2009 Anders Moeller - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - - /// <summary> - /// Simple, original brics implementation of Brzozowski minimize() - /// </summary> - public static void MinimizeSimple(Automaton a) - { - if (!String.IsNullOrEmpty(a.Singleton)) - { - return; - } - DeterminizeSimple(a, SpecialOperations.Reverse(a)); - DeterminizeSimple(a, SpecialOperations.Reverse(a)); - } - - /// <summary> - /// Simple, original brics implementation of determinize() - /// </summary> - public static void DeterminizeSimple(Automaton a) - { - if (a.IsDeterministic || a.IsSingleton) - { - return; - } - HashSet<State> initialset = new ValueHashSet<State>(); - initialset.Add(a.initial); - DeterminizeSimple(a, initialset); - } - - /// <summary> - /// Simple, original brics implementation of determinize() - /// Determinizes the given automaton using the given set of initial states. - /// </summary> - public static void DeterminizeSimple(Automaton a, ISet<State> initialset) - { - int[] points = a.GetStartPoints(); - // subset construction - IDictionary<ISet<State>, ISet<State>> sets = new Dictionary<ISet<State>, ISet<State>>(); - LinkedList<ISet<State>> worklist = new LinkedList<ISet<State>>(); - IDictionary<ISet<State>, State> newstate = new Dictionary<ISet<State>, State>(); - sets[initialset] = initialset; - worklist.AddLast(initialset); - a.initial = new State(); - newstate[initialset] = a.initial; - while (worklist.Count > 0) - { - ISet<State> s = worklist.First.Value; - worklist.Remove(s); - State r = newstate[s]; - foreach (State q in s) - { - if (q.Accept) - { - r.Accept = true; - break; - } - } - for (int n = 0; n < points.Length; n++) - { - ISet<State> p = new ValueHashSet<State>(); - foreach (State q in s) - { - foreach (Transition t in q.GetTransitions()) - { - if (t.Min <= points[n] && points[n] <= t.Max) - { - p.Add(t.to); - } - } - } - if (!sets.ContainsKey(p)) - { - sets[p] = p; - worklist.AddLast(p); - newstate[p] = new State(); - } - State q_ = newstate[p]; - int min = points[n]; - int max; - if (n + 1 < points.Length) - { - max = points[n + 1] - 1; - } - else - { - max = Character.MAX_CODE_POINT; - } - r.AddTransition(new Transition(min, max, q_)); - } - } - a.IsDeterministic = true; - a.ClearNumberedStates(); - a.RemoveDeadTransitions(); - } - - /// <summary> - /// Returns true if the language of this automaton is finite. - /// <p> - /// WARNING: this method is slow, it will blow up if the automaton is large. - /// this is only used to test the correctness of our faster implementation. - /// </summary> - public static bool IsFiniteSlow(Automaton a) - { - if (!String.IsNullOrEmpty(a.Singleton)) - { - return true; - } - return IsFiniteSlow(a.GetInitialState(), new HashSet<State>()); - } - - /// <summary> - /// Checks whether there is a loop containing s. (this is sufficient since - /// there are never transitions to dead states.) - /// </summary> - // TODO: not great that this is recursive... in theory a - // large automata could exceed java's stack - private static bool IsFiniteSlow(State s, HashSet<State> path) - { - path.Add(s); - foreach (Transition t in s.GetTransitions()) - { - if (path.Contains(t.Dest) || !IsFiniteSlow(t.Dest, path)) - { - return false; - } - } - path.Remove(s); - return true; - } - - /// <summary> - /// Checks that an automaton has no detached states that are unreachable - /// from the initial state. - /// </summary> - public static void AssertNoDetachedStates(Automaton a) - { - int numStates = a.GetNumberOfStates(); - a.ClearNumberedStates(); // force recomputation of cached numbered states - Assert.True(numStates == a.GetNumberOfStates(), "automaton has " + (numStates - a.GetNumberOfStates()) + " detached states"); - } - } -} \ No newline at end of file
