This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit b4c473e328553439db2ac03c0a56c752b6459163
Author: Shad Storhaug <[email protected]>
AuthorDate: Thu Jul 23 09:41:28 2020 +0700

    PERFORMANCE: Lucene.Net.TestFramework: Fixed FSTTester to delete while 
iterating forward instead of using .ElementAt() to iterate in reverse, which 
takes about 3x longer (see #261)
---
 src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs | 105 +++++++++++----------
 src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs          |  17 +---
 2 files changed, 57 insertions(+), 65 deletions(-)

diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs 
b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
index 333f253..69c9f7a 100644
--- a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
+++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
@@ -5,9 +5,9 @@ using Lucene.Net.Support;
 using Lucene.Net.Util.Packed;
 using System;
 using System.Collections;
+using System.Collections.Concurrent;
 using System.Collections.Generic;
 using System.IO;
-using System.Linq;
 using System.Text;
 using JCG = J2N.Collections.Generic;
 using Console = Lucene.Net.Util.SystemConsole;
@@ -823,8 +823,9 @@ namespace Lucene.Net.Util.Fst
 
             // build all prefixes
 
-            // LUCENENET: We don't want to use a J2N dictionary here because 
of performance
-            IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new 
Dictionary<Int32sRef, CountMinOutput<T>>();
+            // LUCENENET: We use ConcurrentDictionary<TKey, TValue> because 
Dictionary<TKey, TValue> doesn't support
+            // deletion while iterating, but ConcurrentDictionary does.
+            IDictionary<Int32sRef, CountMinOutput<T>> prefixes = new 
ConcurrentDictionary<Int32sRef, CountMinOutput<T>>();
             Int32sRef scratch = new Int32sRef(10);
             foreach (InputOutput<T> pair in pairs)
             {
@@ -867,69 +868,69 @@ namespace Lucene.Net.Util.Fst
                 Console.WriteLine("TEST: now prune");
             }
 
-
             // prune 'em
-            // LUCENENET NOTE: Altered this a bit to go in reverse rather than 
use an enumerator since
-            // in .NET you cannot delete records while enumerating forward 
through a dictionary.
-            for (int i = prefixes.Count - 1; i >= 0; i--)
+            using (var it = prefixes.GetEnumerator())
             {
-                KeyValuePair<Int32sRef, CountMinOutput<T>> ent = 
prefixes.ElementAt(i);
-                Int32sRef prefix = ent.Key;
-                CountMinOutput<T> cmo = ent.Value;
-                if (LuceneTestCase.Verbose)
-                {
-                    Console.WriteLine("  term prefix=" + 
InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + 
cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + 
cmo.IsFinal);
-                }
-                bool keep;
-                if (prune1 > 0)
+                while (it.MoveNext())
                 {
-                    keep = cmo.Count >= prune1;
-                }
-                else
-                {
-                    Debug.Assert(prune2 > 0);
-                    if (prune2 > 1 && cmo.Count >= prune2)
-                    {
-                        keep = true;
-                    }
-                    else if (prefix.Length > 0)
+                    var ent = it.Current;
+                    Int32sRef prefix = ent.Key;
+                    CountMinOutput<T> cmo = ent.Value;
+                    if (LuceneTestCase.Verbose)
                     {
-                        // consult our parent
-                        scratch.Length = prefix.Length - 1;
-                        Array.Copy(prefix.Int32s, prefix.Offset, 
scratch.Int32s, 0, scratch.Length);
-                        prefixes.TryGetValue(scratch, out CountMinOutput<T> 
cmo2);
-                        //System.out.println("    parent count = " + (cmo2 == 
null ? -1 : cmo2.count));
-                        keep = cmo2 != null && ((prune2 > 1 && cmo2.Count >= 
prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
+                        Console.WriteLine("  term prefix=" + 
InputToString(inputMode, prefix, false) + " count=" + cmo.Count + " isLeaf=" + 
cmo.IsLeaf + " output=" + outputs.OutputToString(cmo.Output) + " isFinal=" + 
cmo.IsFinal);
                     }
-                    else if (cmo.Count >= prune2)
+                    bool keep;
+                    if (prune1 > 0)
                     {
-                        keep = true;
+                        keep = cmo.Count >= prune1;
                     }
                     else
                     {
-                        keep = false;
+                        Debug.Assert(prune2 > 0);
+                        if (prune2 > 1 && cmo.Count >= prune2)
+                        {
+                            keep = true;
+                        }
+                        else if (prefix.Length > 0)
+                        {
+                            // consult our parent
+                            scratch.Length = prefix.Length - 1;
+                            Array.Copy(prefix.Int32s, prefix.Offset, 
scratch.Int32s, 0, scratch.Length);
+                            keep = prefixes.TryGetValue(scratch, out 
CountMinOutput<T> cmo2) && cmo2 != null && ((prune2 > 1 && cmo2.Count >= 
prune2) || (prune2 == 1 && (cmo2.Count >= 2 || prefix.Length <= 1)));
+                            //System.out.println("    parent count = " + (cmo2 
== null ? -1 : cmo2.count));
+                        }
+                        else if (cmo.Count >= prune2)
+                        {
+                            keep = true;
+                        }
+                        else
+                        {
+                            keep = false;
+                        }
                     }
-                }
 
-                if (!keep)
-                {
-                    prefixes.Remove(prefix);
-                    //System.out.println("    remove");
-                }
-                else
-                {
-                    // clear isLeaf for all ancestors
-                    //System.out.println("    keep");
-                    scratch.CopyInt32s(prefix);
-                    scratch.Length--;
-                    while (scratch.Length >= 0)
+                    if (!keep)
+                    {
+                        //it.remove();
+                        prefixes.Remove(ent);
+                        //System.out.println("    remove");
+                    }
+                    else
                     {
-                        if (prefixes.TryGetValue(scratch, out 
CountMinOutput<T> cmo2) && cmo2 != null)
+                        // clear isLeaf for all ancestors
+                        //System.out.println("    keep");
+                        scratch.CopyInt32s(prefix);
+                        scratch.Length--;
+                        while (scratch.Length >= 0)
                         {
-                            //System.out.println("    clear isLeaf " + 
inputToString(inputMode, scratch));
-                            cmo2.IsLeaf = false;
+                            if (prefixes.TryGetValue(scratch, out 
CountMinOutput<T> cmo2) && cmo2 != null)
+                            {
+                                //System.out.println("    clear isLeaf " + 
inputToString(inputMode, scratch));
+                                cmo2.IsLeaf = false;
+                            }
+                            scratch.Length--;
                         }
-                        scratch.Length--;
                     }
                 }
             }
diff --git a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs 
b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
index ecab235..bd051c0 100644
--- a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
+++ b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
@@ -268,18 +268,9 @@ namespace Lucene.Net.Util.Fst
 
 
         [Test]
-        [Slow]
         public virtual void TestRandomWords()
         {
-            // LUCENENET specific: NUnit will crash with an OOM if we do the 
full test
-            // with verbosity enabled. So, making this a manual setting that 
can be
-            // turned on if, and only if, needed for debugging. If the setting 
is turned
-            // on, we are decresing the number of iterations by 9/10, which 
seems to
-            // keep it from crashing.
-            bool isVerbose = false; // Enable manually
-            int maxNumWords = isVerbose ? 500 : 1000;
-          
-            TestRandomWords(maxNumWords, AtLeast(2), isVerbose);
+            TestRandomWords(1000, AtLeast(2));
             //testRandomWords(100, 1);
         }
 
@@ -295,12 +286,12 @@ namespace Lucene.Net.Util.Fst
             }
         }
 
-        private void TestRandomWords(int maxNumWords, int numIter, bool 
VERBOSE)
+        private void TestRandomWords(int maxNumWords, int numIter)
         {
             Random random = new Random(Random.Next());
             for (int iter = 0; iter < numIter; iter++)
             {
-                if (VERBOSE)
+                if (Verbose)
                 {
                     Console.WriteLine("\nTEST: iter " + iter);
                 }
@@ -323,7 +314,7 @@ namespace Lucene.Net.Util.Fst
         [Nightly]
         public virtual void TestBigSet()
         {
-            TestRandomWords(TestUtil.NextInt32(Random, 50000, 60000), 1, 
false);
+            TestRandomWords(TestUtil.NextInt32(Random, 50000, 60000), 1);
         }
 
         // Build FST for all unique terms in the test line docs

Reply via email to