This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new 11eca1f53 PERFORMANCE:
Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare equality and
calculate hash code without allocating, as it is completely unnecessary.
11eca1f53 is described below
commit 11eca1f53a9775877e8aca4d4b80b349d1d37a23
Author: Shad Storhaug <[email protected]>
AuthorDate: Fri Jan 19 20:00:32 2024 +0700
PERFORMANCE: Lucene.Net.Facet.Taxonomy.WriterCache.CharBlockArray: Compare
equality and calculate hash code without allocating, as it is completely
unnecessary.
---
.../Taxonomy/WriterCache/CategoryPathUtils.cs | 6 +-
.../Taxonomy/WriterCache/CharBlockArray.cs | 72 ++++++++++++++++++++++
.../Taxonomy/WriterCache/CompactLabelToOrdinal.cs | 3 +-
3 files changed, 78 insertions(+), 3 deletions(-)
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
index 8dd51fd25..096c6f691 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CategoryPathUtils.cs
@@ -58,7 +58,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
for (int i = 0; i < length; i++)
{
int len = charBlockArray[offset++];
- hash = hash * 31 + charBlockArray.Subsequence(offset,
len).GetHashCode(); // LUCENENET: Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code without the
allocation caused by Subsequence
+ hash = hash * 31 + charBlockArray.GetHashCode(offset, len); //
LUCENENET: Corrected 2nd parameter
offset += len;
}
return hash;
@@ -88,7 +89,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
return false;
}
- if
(!cp.Components[i].Equals(charBlockArray.Subsequence(offset, len).ToString(),
StringComparison.Ordinal)) // LUCENENET: Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code without the
allocation caused by Subsequence() and ToString()
+ if (!charBlockArray.Equals(offset, len,
cp.Components[i].AsSpan())) // LUCENENET: Corrected 2nd parameter
{
return false;
}
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
index 8fa8b4097..3331091be 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
@@ -5,6 +5,7 @@ using Lucene.Net.Support.IO;
using System;
using System.Collections.Generic;
using System.IO;
+using System.Runtime.CompilerServices;
using System.Text;
using JCG = J2N.Collections.Generic;
@@ -120,11 +121,13 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
this.blocks.Add(this.current);
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal virtual int BlockIndex(int index)
{
return index / blockSize;
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
internal virtual int IndexInBlock(int index)
{
return index % blockSize;
@@ -317,5 +320,74 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
{
return new CharBlockArray(@in);
}
+
+
+ // LUCENENET specific - Lucene allocated memory using Subsequence and
+ // then called hashCode(), which calculated based on the value of the
subsequence.
+ // However, in .NET this uses the indexer of the StringBuilder that
Subsequence returned,
+ // which is super slow
+ // (see:
https://learn.microsoft.com/en-us/dotnet/api/system.text.stringbuilder.chars).
+ // But this operation doesn't require an allocation at all if we
simply calculate the
+ // value based off of the chars that are in the CharArrayBlock.
+ //
+ // This is a combination of Subsequence(int, int) and the
J2N.Text.CharSequenceComparer.Ordinal.GetHashCode()
+ // implementation. The hash code calculated must be kept in sync with
the J2N implementation
+ // (which originated in Apache Harmony) in order to return the correct
result.
+ internal int GetHashCode(int startIndex, int length)
+ {
+ if (length == 0)
+ return 0;
+ int hash = 0;
+ int remaining = length;
+ int blockIdx = BlockIndex(startIndex);
+ int indexInBlock = IndexInBlock(startIndex);
+ while (remaining > 0)
+ {
+ Block b = blocks[blockIdx++];
+ int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+ int end = indexInBlock + numToCheck;
+ var chars = b.chars;
+ for (int i = indexInBlock; i < end; i++)
+ {
+ // Hash code calculation from J2N/Apache Harmony
+ hash = chars[i] + ((hash << 5) - hash);
+ }
+ remaining -= numToCheck;
+ indexInBlock = 0; // 2nd+ iterations read from start of the
block
+ }
+ return hash;
+ }
+
+ /// <summary>
+ /// Compares a slice of this <see cref="CharBlockArray"/> to <paramref
name="other"/>
+ /// for binary (ordinal) equality. Does not allocate any memory.
+ /// <para/>
+ /// LUCENENET specific.
+ /// </summary>
+ /// <param name="startIndex">The start index of this <see
cref="CharBlockArray"/>.</param>
+ /// <param name="length">The length of characters to compare.</param>
+ /// <param name="other">The other character sequence to check for
equality.</param>
+ /// <returns><c>true</c> if the two character sequences are equal;
otherwise <c>false</c></returns>
+ internal bool Equals(int startIndex, int length, ReadOnlySpan<char>
other)
+ {
+ if (other.Length != length) return false;
+
+ int remaining = length;
+ int blockIdx = BlockIndex(startIndex);
+ int indexInBlock = IndexInBlock(startIndex);
+ int otherIndex = 0;
+ while (remaining > 0)
+ {
+ Block b = blocks[blockIdx++];
+ int numToCheck = Math.Min(remaining, b.length - indexInBlock);
+ var charsToCheck = b.chars.AsSpan(indexInBlock, numToCheck);
+ if (!other.Slice(otherIndex, numToCheck).Equals(charsToCheck,
StringComparison.Ordinal))
+ return false;
+ remaining -= numToCheck;
+ otherIndex += numToCheck;
+ indexInBlock = 0; // 2nd+ iterations read from start of the
block
+ }
+ return true;
+ }
}
}
\ No newline at end of file
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
index 4b766e93d..2f6ed655b 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CompactLabelToOrdinal.cs
@@ -441,7 +441,8 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
for (int i = 0; i < length; i++)
{
int len = (ushort)l2o.labelRepository[offset++];
- hash = hash * 31 +
l2o.labelRepository.Subsequence(offset, len).GetHashCode(); // LUCENENET:
Corrected 2nd Subsequence parameter
+ // LUCENENET specific - calculate the hash code
without the allocation caused by Subsequence
+ hash = hash * 31 +
l2o.labelRepository.GetHashCode(offset, len); // LUCENENET: Corrected 2nd
parameter
offset += len;
}
}