This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit b4c473e328553439db2ac03c0a56c752b6459163 Author: Shad Storhaug <[email protected]> AuthorDate: Thu Jul 23 09:41:28 2020 +0700 PERFORMANCE: Lucene.Net.TestFramework: Fixed FSTTester to delete while iterating forward instead of using .ElementAt() to iterate in reverse, which takes about 3x longer (see #261) --- src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs | 105 +++++++++++---------- src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs | 17 +--- 2 files changed, 57 insertions(+), 65 deletions(-) diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs index 333f253..69c9f7a 100644 --- a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs +++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs @@ -5,9 +5,9 @@ using Lucene.Net.Support; using Lucene.Net.Util.Packed; using System; using System.Collections; +using System.Collections.Concurrent; using System.Collections.Generic; using System.IO; -using System.Linq; using System.Text; using JCG = J2N.Collections.Generic; using Console = Lucene.Net.Util.SystemConsole; @@ -823,8 +823,9 @@ namespace Lucene.Net.Util.Fst // build all prefixes - // LUCENENET: We don't want to use a J2N dictionary here because of performance - IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new Dictionary<Int32sRef, CountMinOutput<T>>(); + // LUCENENET: We use ConcurrentDictionary<TKey, TValue> because Dictionary<TKey, TValue> doesn't support + // deletion while iterating, but ConcurrentDictionary does. + IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new ConcurrentDictionary<Int32sRef, CountMinOutput<T>>(); Int32sRef scratch = new Int32sRef(10); foreach (InputOutput<T> pair in pairs) { @@ -867,69 +868,69 @@ namespace Lucene.Net.Util.Fst Console.WriteLine("TEST: now prune"); } - // prune 'em - // LUCENENET NOTE: Altered this a bit to go in reverse rather than use an enumerator since - // in .NET you cannot delete records while enumerating forward through a dictionary. - for (int i = prefixes.Count - 1; i >= 0; i--) + using (var it = prefixes.GetEnumerator()) { - KeyValuePair<Int32sRef, CountMinOutput<T>> ent = prefixes.ElementAt(i); - Int32sRef prefix = ent.Key; - CountMinOutput<T> cmo = ent.Value; - if (LuceneTestCase.Verbose) - { - Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); - } - bool keep; - if (prune1 > 0) + while (it.MoveNext()) { - keep = cmo.Count >= prune1; - } - else - { - Debug.Assert(prune2 > 0); - if (prune2 > 1 && cmo.Count >= prune2) - { - keep = true; - } - else if (prefix.Length > 0) + var ent = it.Current; + Int32sRef prefix = ent.Key; + CountMinOutput<T> cmo = ent.Value; + if (LuceneTestCase.Verbose) { - // consult our parent - scratch.Length = prefix.Length - 1; - Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length); - prefixes.TryGetValue(scratch, out CountMinOutput<T> cmo2); - //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); - keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); + Console.WriteLine(" term prefix=" + InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + cmo.IsFinal); } - else if (cmo.Count >= prune2) + bool keep; + if (prune1 > 0) { - keep = true; + keep = cmo.Count >= prune1; } else { - keep = false; + Debug.Assert(prune2 > 0); + if (prune2 > 1 && cmo.Count >= prune2) + { + keep = true; + } + else if (prefix.Length > 0) + { + // consult our parent + scratch.Length = prefix.Length - 1; + Array.Copy(prefix.Int32s, prefix.Offset, scratch.Int32s, 0, scratch.Length); + keep = prefixes.TryGetValue(scratch, out CountMinOutput<T> cmo2) && cmo2 != null && ((prune2 > 1 && cmo2.Count >= prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1))); + //System.out.println(" parent count = " + (cmo2 == null ? -1 : cmo2.count)); + } + else if (cmo.Count >= prune2) + { + keep = true; + } + else + { + keep = false; + } } - } - if (!keep) - { - prefixes.Remove(prefix); - //System.out.println(" remove"); - } - else - { - // clear isLeaf for all ancestors - //System.out.println(" keep"); - scratch.CopyInt32s(prefix); - scratch.Length--; - while (scratch.Length >= 0) + if (!keep) + { + //it.remove(); + prefixes.Remove(ent); + //System.out.println(" remove"); + } + else { - if (prefixes.TryGetValue(scratch, out CountMinOutput<T> cmo2) && cmo2 != null) + // clear isLeaf for all ancestors + //System.out.println(" keep"); + scratch.CopyInt32s(prefix); + scratch.Length--; + while (scratch.Length >= 0) { - //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); - cmo2.IsLeaf = false; + if (prefixes.TryGetValue(scratch, out CountMinOutput<T> cmo2) && cmo2 != null) + { + //System.out.println(" clear isLeaf " + inputToString(inputMode, scratch)); + cmo2.IsLeaf = false; + } + scratch.Length--; } - scratch.Length--; } } } diff --git a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs index ecab235..bd051c0 100644 --- a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs +++ b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs @@ -268,18 +268,9 @@ namespace Lucene.Net.Util.Fst [Test] - [Slow] public virtual void TestRandomWords() { - // LUCENENET specific: NUnit will crash with an OOM if we do the full test - // with verbosity enabled. So, making this a manual setting that can be - // turned on if, and only if, needed for debugging. If the setting is turned - // on, we are decresing the number of iterations by 9/10, which seems to - // keep it from crashing. - bool isVerbose = false; // Enable manually - int maxNumWords = isVerbose ? 500 : 1000; - - TestRandomWords(maxNumWords, AtLeast(2), isVerbose); + TestRandomWords(1000, AtLeast(2)); //testRandomWords(100, 1); } @@ -295,12 +286,12 @@ namespace Lucene.Net.Util.Fst } } - private void TestRandomWords(int maxNumWords, int numIter, bool VERBOSE) + private void TestRandomWords(int maxNumWords, int numIter) { Random random = new Random(Random.Next()); for (int iter = 0; iter < numIter; iter++) { - if (VERBOSE) + if (Verbose) { Console.WriteLine("\nTEST: iter " + iter); } @@ -323,7 +314,7 @@ namespace Lucene.Net.Util.Fst [Nightly] public virtual void TestBigSet() { - TestRandomWords(TestUtil.NextInt32(Random, 50000, 60000), 1, false); + TestRandomWords(TestUtil.NextInt32(Random, 50000, 60000), 1); } // Build FST for all unique terms in the test line docs
