(lucenenet) branch master updated: PERFORMANCE: Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare equality and calculate hash code without allocating, as it is completely unnecessary.

nightowl888 Fri, 19 Jan 2024 05:57:31 -0800

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git



The following commit(s) were added to refs/heads/master by this push:
     new 11eca1f53 PERFORMANCE: 
Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare equality and 
calculate hash code without allocating, as it is completely unnecessary.
11eca1f53 is described below

commit 11eca1f53a9775877e8aca4d4b80b349d1d37a23
Author: Shad Storhaug <[email protected]>
AuthorDate: Fri Jan 19 20:00:32 2024 +0700

    PERFORMANCE: Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare 
equality and calculate hash code without allocating, as it is completely 
unnecessary.
---
 .../Taxonomy/WriterCache/CategoryPathUtils.cs      |  6 +-
 .../Taxonomy/WriterCache/CharBlockArray.cs         | 72 ++++++++++++++++++++++
 .../Taxonomy/WriterCache/CompactLabelToOrdinal.cs  |  3 +-
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs 
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
index 8dd51fd25..096c6f691 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
@@ -58,7 +58,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
             for (int i = 0; i < length; i++)
             {
                 int len = charBlockArray[offset++];
-                hash = hash * 31 + charBlockArray.Subsequence(offset, 
len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
+                // LUCENENET specific - calculate the hash code without the 
allocation caused by Subsequence
+                hash = hash * 31 + charBlockArray.GetHashCode(offset, len); // 
LUCENENET: Corrected 2nd parameter
                 offset += len;
             }
             return hash;
@@ -88,7 +89,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
                     return false;
                 }
 
-                if 
(!cp.Components[i].Equals(charBlockArray.Subsequence(offset, len).ToString(), 
StringComparison.Ordinal)) // LUCENENET: Corrected 2nd Subsequence parameter
+                // LUCENENET specific - calculate the hash code without the 
allocation caused by Subsequence() and ToString()
+                if (!charBlockArray.Equals(offset, len, 
cp.Components[i].AsSpan())) // LUCENENET: Corrected 2nd parameter
                 {
                     return false;
                 }
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs 
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
index 8fa8b4097..3331091be 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
@@ -5,6 +5,7 @@ using Lucene.Net.Support.IO;
 using System;
 using System.Collections.Generic;
 using System.IO;
+using System.Runtime.CompilerServices;
 using System.Text;
 using JCG = J2N.Collections.Generic;
 
@@ -120,11 +121,13 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
             this.blocks.Add(this.current);
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal virtual int BlockIndex(int index)
         {
             return index / blockSize;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal virtual int IndexInBlock(int index)
         {
             return index % blockSize;
@@ -317,5 +320,74 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
         {
             return new CharBlockArray(@in);
         }
+
+
+        // LUCENENET specific - Lucene allocated memory using Subsequence and
+        // then called hashCode(), which calculated based on the value of the 
subsequence.
+        // However, in .NET this uses the indexer of the StringBuilder that 
Subsequence returned,
+        // which is super slow
+        // (see: 
https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.chars).
+        // But this operation doesn't require an allocation at all if we 
simply calculate the
+        // value based off of the chars that are in the CharArrayBlock.
+        //
+        // This is a combination of Subsequence(int, int) and the 
J2N.Text.CharSequenceComparer.Ordinal.GetHashCode()
+        // implementation. The hash code calculated must be kept in sync with 
the J2N implementation
+        // (which originated in Apache Harmony) in order to return the correct 
result.
+        internal int GetHashCode(int startIndex, int length)
+        {
+            if (length == 0)
+                return 0;
+            int hash = 0;
+            int remaining = length;
+            int blockIdx = BlockIndex(startIndex);
+            int indexInBlock = IndexInBlock(startIndex);
+            while (remaining > 0)
+            {
+                Block b = blocks[blockIdx++];
+                int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+                int end = indexInBlock + numToCheck;
+                var chars = b.chars;
+                for (int i = indexInBlock; i < end; i++)
+                {
+                    // Hash code calculation from J2N/Apache Harmony
+                    hash = chars[i] + ((hash << 5) - hash);
+                }
+                remaining -= numToCheck;
+                indexInBlock = 0; // 2nd+ iterations read from start of the 
block
+            }
+            return hash;
+        }
+
+        /// <summary>
+        /// Compares a slice of this <see cref="CharBlockArray"/> to <paramref 
name="other"/>
+        /// for binary (ordinal) equality. Does not allocate any memory.
+        /// <para/>
+        /// LUCENENET specific.
+        /// </summary>
+        /// <param name="startIndex">The start index of this <see 
cref="CharBlockArray"/>.</param>
+        /// <param name="length">The length of characters to compare.</param>
+        /// <param name="other">The other character sequence to check for 
equality.</param>
+        /// <returns><c>true</c> if the two character sequences are equal; 
otherwise <c>false</c></returns>
+        internal bool Equals(int startIndex, int length, ReadOnlySpan<char> 
other)
+        {
+            if (other.Length != length) return false;
+
+            int remaining = length;
+            int blockIdx = BlockIndex(startIndex);
+            int indexInBlock = IndexInBlock(startIndex);
+            int otherIndex = 0;
+            while (remaining > 0)
+            {
+                Block b = blocks[blockIdx++];
+                int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+                var charsToCheck = b.chars.AsSpan(indexInBlock, numToCheck);
+                if (!other.Slice(otherIndex, numToCheck).Equals(charsToCheck, 
StringComparison.Ordinal))
+                    return false;
+                remaining -= numToCheck;
+                otherIndex += numToCheck;
+                indexInBlock = 0; // 2nd+ iterations read from start of the 
block
+            }
+            return true;
+        }
     }
 }
\ No newline at end of file
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs 
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
index 4b766e93d..2f6ed655b 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
@@ -441,7 +441,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
                         for (int i = 0; i < length; i++)
                         {
                             int len = (ushort)l2o.labelRepository[offset++];
-                            hash = hash * 31 + 
l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET: 
Corrected 2nd Subsequence parameter
+                            // LUCENENET specific - calculate the hash code 
without the allocation caused by Subsequence
+                            hash = hash * 31 + 
l2o.labelRepository.GetHashCode(offset, len); // LUCENENET: Corrected 2nd 
parameter
                             offset += len;
                         }
                     }

(lucenenet) branch master updated: PERFORMANCE: Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare equality and calculate hash code without allocating, as it is completely unnecessary.

Reply via email to