[lucenenet] 09/14: PERFORMANCE: Lucene.Net.Analysis.In.IndicNormalizer: Replaced static constructor with inline LoadScripts() method. Moved location of scripts field to ensure decompositions is initialized first.

nightowl888 Sun, 30 Oct 2022 23:19:16 -0700

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git


commit fca99681e4d47c2d5d48ee562a21e7e73c06e884
Author: Shad Storhaug <[email protected]>
AuthorDate: Tue Oct 25 07:55:03 2022 +0700

    PERFORMANCE: Lucene.Net.Analysis.In.IndicNormalizer: Replaced static 
constructor with inline LoadScripts() method. Moved location of scripts field 
to ensure decompositions is initialized first.
---
 .../Analysis/In/IndicNormalizer.cs                 | 65 ++++++++++++----------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs 
b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
index 0bddf84e2..10dc257a6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/In/IndicNormalizer.cs
@@ -1,4 +1,4 @@
-// Lucene version compatibility level 4.8.1
+// Lucene version compatibility level 4.8.1
 using Lucene.Net.Analysis.Util;
 using Lucene.Net.Util;
 using System;
@@ -53,18 +53,7 @@ namespace Lucene.Net.Analysis.In
             }
         }
 
-        private static readonly IDictionary<Regex, ScriptData> scripts = new 
Dictionary<Regex, ScriptData>() // LUCENENET: Avoid static constructors (see 
https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
-        {
-            { new Regex(@"\p{IsDevanagari}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.DEVANAGARI, 0x0900) },
-            { new Regex(@"\p{IsBengali}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.BENGALI, 0x0980) },
-            { new Regex(@"\p{IsGurmukhi}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.GURMUKHI, 0x0A00) },
-            { new Regex(@"\p{IsGujarati}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.GUJARATI, 0x0A80) },
-            { new Regex(@"\p{IsOriya}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.ORIYA, 0x0B00) },
-            { new Regex(@"\p{IsTamil}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.TAMIL, 0x0B80) },
-            { new Regex(@"\p{IsTelugu}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.TELUGU, 0x0C00) },
-            { new Regex(@"\p{IsKannada}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.KANNADA, 0x0C80) },
-            { new Regex(@"\p{IsMalayalam}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.MALAYALAM, 0x0D00) },
-        };
+        // LUCENENET: scripts moved below declaration of decompositions so it 
can be populated inline
 
         [Flags]
         internal enum UnicodeBlock
@@ -80,22 +69,7 @@ namespace Lucene.Net.Analysis.In
             MALAYALAM = 256
         }
 
-        static IndicNormalizer()
-        {
-            foreach (ScriptData sd in scripts.Values)
-            {
-                sd.decompMask = new OpenBitSet(0x7F);
-                for (int i = 0; i < decompositions.Length; i++)
-                {
-                    int ch = decompositions[i][0];
-                    int flags = decompositions[i][4];
-                    if ((flags & (int)sd.flag) != 0)
-                    {
-                        sd.decompMask.Set(ch);
-                    }
-                }
-            }
-        }
+        // LUCENENET: static initialization done inline instead of in 
constructor
 
         /// <summary>
         /// Decompositions according to Unicode 5.2, 
@@ -258,6 +232,39 @@ namespace Lucene.Net.Analysis.In
             new int[] { 0x73, 0x4B,   -1, 0x13, (int)UnicodeBlock.GURMUKHI }
         };
 
+        private static readonly IDictionary<Regex, ScriptData> scripts = 
LoadScripts(); // LUCENENET: Avoid static constructors (see 
https://github.com/apache/lucenenet/pull/224#issuecomment-469284006)
+
+        private static IDictionary<Regex, ScriptData> LoadScripts()
+        {
+            IDictionary<Regex, ScriptData> result = new Dictionary<Regex, 
ScriptData>(capacity: 9)
+            {
+                { new Regex(@"\p{IsDevanagari}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.DEVANAGARI, 0x0900) },
+                { new Regex(@"\p{IsBengali}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.BENGALI, 0x0980) },
+                { new Regex(@"\p{IsGurmukhi}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.GURMUKHI, 0x0A00) },
+                { new Regex(@"\p{IsGujarati}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.GUJARATI, 0x0A80) },
+                { new Regex(@"\p{IsOriya}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.ORIYA, 0x0B00) },
+                { new Regex(@"\p{IsTamil}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.TAMIL, 0x0B80) },
+                { new Regex(@"\p{IsTelugu}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.TELUGU, 0x0C00) },
+                { new Regex(@"\p{IsKannada}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.KANNADA, 0x0C80) },
+                { new Regex(@"\p{IsMalayalam}", RegexOptions.Compiled), new 
ScriptData(UnicodeBlock.MALAYALAM, 0x0D00) },
+            };
+
+            foreach (ScriptData sd in result.Values)
+            {
+                sd.decompMask = new OpenBitSet(0x7F);
+                for (int i = 0; i < decompositions.Length; i++)
+                {
+                    int ch = decompositions[i][0];
+                    int flags = decompositions[i][4];
+                    if ((flags & (int)sd.flag) != 0)
+                    {
+                        sd.decompMask.Set(ch);
+                    }
+                }
+            }
+
+            return result;
+        }
 
         /// <summary>
         /// Normalizes input text, and returns the new length.

[lucenenet] 09/14: PERFORMANCE: Lucene.Net.Analysis.In.IndicNormalizer: Replaced static constructor with inline LoadScripts() method. Moved location of scripts field to ensure decompositions is initialized first.

Reply via email to