This is an automated email from the ASF dual-hosted git repository.
paulirwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new 0e54d6617 Implement ISpanAppendable in CharBlockArray and
CharTermAttributeImpl (#1028)
0e54d6617 is described below
commit 0e54d6617c281491077df401a3d3f65134e05dd4
Author: Paul Irwin <[email protected]>
AuthorDate: Tue Nov 19 13:49:13 2024 -0700
Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl
(#1028)
* Implement ISpanAppendable in CharBlockArray and CharTermAttributeImpl
* Add tests for ReadOnlySpan overloads of CharBlockArray.Append and
CharTermAttributeImpl.Append
---
.../Taxonomy/WriterCache/CharBlockArray.cs | 44 ++++++++++++++++++----
.../Taxonomy/WriterCache/TestCharBlockArray.cs | 32 ++++++++++++++--
.../TokenAttributes/TestCharTermAttributeImpl.cs | 24 ++++++++++++
.../Analysis/TokenAttributes/CharTermAttribute.cs | 18 ++++-----
.../TokenAttributes/CharTermAttributeImpl.cs | 24 ++++++++++--
5 files changed, 117 insertions(+), 25 deletions(-)
diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
index f64c7eab3..7a253a0bd 100644
--- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
+++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs
@@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
/// @lucene.experimental
/// </summary>
// LUCENENET NOTE: The serialization features here are strictly for
testing purposes,
- // therefore it doesn't make any difference what type of serialization is
used.
- // To make things simpler, we are using BinaryReader and BinaryWriter
since
+ // therefore it doesn't make any difference what type of serialization is
used.
+ // To make things simpler, we are using BinaryReader and BinaryWriter since
// BinaryFormatter is not implemented in .NET Standard 1.x.
- internal class CharBlockArray : IAppendable, ICharSequence
+ internal class CharBlockArray : IAppendable, ICharSequence,
+ ISpanAppendable /* LUCENENET specific */
{
private const long serialVersionUID = 1L;
@@ -65,8 +66,6 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
return clone;
}
-
-
// LUCENENET specific
public void Serialize(Stream writer)
{
@@ -235,7 +234,6 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex),
$"Index and length must refer to a location within the string. For example
{nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");
-
int offset = startIndex;
int remain = length;
while (remain > 0)
@@ -310,7 +308,6 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex),
$"Index and length must refer to a location within the string. For example
{nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}.");
-
int offset = startIndex;
int remain = length;
while (remain > 0)
@@ -409,6 +406,32 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
return this;
}
+ public virtual CharBlockArray Append(ReadOnlySpan<char> value)
+ {
+ int offset = 0;
+ int remain = value.Length;
+ while (remain > 0)
+ {
+ if (this.current.length == this.blockSize)
+ {
+ AddBlock();
+ }
+ int toCopy = remain;
+ int remainingInBlock = this.blockSize - this.current.length;
+ if (remainingInBlock < toCopy)
+ {
+ toCopy = remainingInBlock;
+ }
+ value.Slice(offset,
toCopy).CopyTo(this.current.chars.AsSpan(this.current.length));
+ offset += toCopy;
+ remain -= toCopy;
+ this.current.length += toCopy;
+ }
+
+ this.length += value.Length;
+ return this;
+ }
+
#nullable restore
#region IAppendable Members
@@ -431,6 +454,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
IAppendable IAppendable.Append(ICharSequence value, int startIndex,
int count) => Append(value, startIndex, count);
+ #endregion
+
+ #region ISpanAppendable Members
+
+ ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) =>
Append(value);
#endregion
@@ -612,4 +640,4 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
return true;
}
}
-}
\ No newline at end of file
+}
diff --git
a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs
b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs
index 8b2c66771..295b01afc 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs
@@ -44,9 +44,9 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
// CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder()
// .onUnmappableCharacter(CodingErrorAction.REPLACE)
// .onMalformedInput(CodingErrorAction.REPLACE);
- //
- // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
- // new EncoderReplacementFallback("?"),
+ //
+ // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage,
+ // new EncoderReplacementFallback("?"),
// new DecoderReplacementFallback("?"));
for (int i = 0; i < n; i++)
@@ -287,6 +287,30 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestSpanAppendableInterface()
+ {
+ CharBlockArray t = new CharBlockArray();
+
+ // Test with a span
+ t.Append("12345678".AsSpan());
+ Assert.AreEqual("12345678", t.ToString());
+
+ // test with a span slice
+ t.Append("0123456789".AsSpan(3, 5 - 3));
+ Assert.AreEqual("1234567834", t.ToString());
+
+ // test with a long span
+ t = new CharBlockArray();
+
t.Append("01234567890123456789012345678901234567890123456789".AsSpan());
+
Assert.AreEqual("01234567890123456789012345678901234567890123456789",
t.ToString());
+
+ // test with a long span slice
+
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 -
3));
+
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789",
t.ToString());
+ }
+
private sealed class CharSequenceAnonymousClass : ICharSequence
{
private string longTestString;
@@ -319,4 +343,4 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache
}
}
}
-}
\ No newline at end of file
+}
diff --git
a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs
b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs
index c7fe4d28d..2df8210f4 100644
--- a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs
+++ b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs
@@ -1,5 +1,6 @@
using J2N.IO;
using J2N.Text;
+using Lucene.Net.Attributes;
using NUnit.Framework;
using System;
using System.Collections.Generic;
@@ -328,6 +329,29 @@ namespace Lucene.Net.Analysis.TokenAttributes
Assert.AreEqual("4567890123456" + longTestString, t.ToString());
}
+ [Test]
+ [LuceneNetSpecific]
+ public virtual void TestSpanAppendableInterface()
+ {
+ CharTermAttribute t = new CharTermAttribute();
+
+ // Test with a span
+ t.Append("12345678".AsSpan());
+ Assert.AreEqual("12345678", t.ToString());
+
+ // test with a span slice
+ t.Append("0123456789".AsSpan(3, 5 - 3));
+ Assert.AreEqual("1234567834", t.ToString());
+
+ // test with a long span
+
t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan());
+
Assert.AreEqual("01234567890123456789012345678901234567890123456789",
t.ToString());
+
+ // test with a long span slice
+
t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 -
3));
+
Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789",
t.ToString());
+ }
+
private sealed class CharSequenceAnonymousClass : ICharSequence
{
private readonly string longTestString;
diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
index e15b1517d..fa3cede77 100644
--- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
+++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs
@@ -43,7 +43,7 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// you can then directly alter. If the array is too
/// small for your token, use <see cref="ResizeBuffer(int)"/>
/// to increase it. After
- /// altering the buffer be sure to call <see cref="SetLength(int)"/>
+ /// altering the buffer be sure to call <see cref="SetLength(int)"/>
/// to record the number of valid
/// characters that were placed into the termBuffer.
/// <para>
@@ -76,15 +76,15 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// the termBuffer array. Use this to truncate the termBuffer
/// or to synchronize with external manipulation of the termBuffer.
/// Note: to grow the size of the array,
- /// use <see cref="ResizeBuffer(int)"/> first.
- /// NOTE: This is exactly the same operation as calling the <see
cref="Length"/> setter, the primary
+ /// use <see cref="ResizeBuffer(int)"/> first.
+ /// NOTE: This is exactly the same operation as calling the <see
cref="Length"/> setter, the primary
/// difference is that this method returns a reference to the current
object so it can be chained.
/// <code>
/// obj.SetLength(30).Append("hey you");
/// </code>
/// </summary>
/// <param name="length"> the truncated length </param>
- ICharTermAttribute SetLength(int length);
+ ICharTermAttribute SetLength(int length);
/// <summary>
/// Sets the length of the termBuffer to zero.
@@ -197,8 +197,8 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// </summary>
/// <param name="value">The sequence of characters to append.</param>
/// <remarks>
- /// LUCENENET specific method, added because the .NET <see
cref="string"/> data type
- /// doesn't implement <see cref="ICharSequence"/>.
+ /// LUCENENET specific method, added because the .NET <see
cref="string"/> data type
+ /// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value);
@@ -228,8 +228,8 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// <paramref name="startIndex"/> + <paramref name="count"/> is
greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
- /// LUCENENET specific method, added because the .NET <see
cref="string"/> data type
- /// doesn't implement <see cref="ICharSequence"/>.
+ /// LUCENENET specific method, added because the .NET <see
cref="string"/> data type
+ /// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(string value, int startIndex, int
count); // LUCENENET TODO: API - change to startIndex/length to match .NET
@@ -270,7 +270,7 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// <paramref name="startIndex"/> + <paramref name="count"/> is
greater than the length of <paramref name="value"/>.
/// </exception>
/// <remarks>
- /// LUCENENET specific method, added because the .NET <see
cref="StringBuilder"/> data type
+ /// LUCENENET specific method, added because the .NET <see
cref="StringBuilder"/> data type
/// doesn't implement <see cref="ICharSequence"/>.
/// </remarks>
new ICharTermAttribute Append(StringBuilder value, int startIndex, int
count);
diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
index bd04b96d0..8ab9aca19 100644
--- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
+++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs
@@ -34,7 +34,8 @@ namespace Lucene.Net.Analysis.TokenAttributes
/// <summary>
/// Default implementation of <see cref="ICharTermAttribute"/>. </summary>
- public class CharTermAttribute : Attribute, ICharTermAttribute,
ITermToBytesRefAttribute, IAppendable // LUCENENET specific: Not implementing
ICloneable per Microsoft's recommendation
+ public class CharTermAttribute : Attribute, ICharTermAttribute,
ITermToBytesRefAttribute, IAppendable, // LUCENENET specific: Not implementing
ICloneable per Microsoft's recommendation
+ ISpanAppendable /* LUCENENET specific */
{
private const int MIN_BUFFER_SIZE = 10;
@@ -85,7 +86,7 @@ namespace Lucene.Net.Analysis.TokenAttributes
{
// Not big enough; create a new array with slight
// over allocation and preserve content
-
+
// LUCENENET: Resize rather than copy
Array.Resize(ref termBuffer, ArrayUtil.Oversize(newSize,
RamUsageEstimator.NUM_BYTES_CHAR));
}
@@ -196,7 +197,6 @@ namespace Lucene.Net.Analysis.TokenAttributes
// *** Appendable interface ***
-
public CharTermAttribute Append(string value, int startIndex, int
charCount)
{
// LUCENENET: Changed semantics to be the same as the
StringBuilder in .NET
@@ -358,6 +358,17 @@ namespace Lucene.Net.Analysis.TokenAttributes
return this;
}
+ public CharTermAttribute Append(ReadOnlySpan<char> value)
+ {
+ if (value.Length == 0)
+ return this;
+
+ value.CopyTo(InternalResizeBuffer(termLength +
value.Length).AsSpan(termLength));
+ Length += value.Length;
+
+ return this;
+ }
+
private char[] InternalResizeBuffer(int length)
{
if (termBuffer.Length < length)
@@ -524,7 +535,12 @@ namespace Lucene.Net.Analysis.TokenAttributes
IAppendable IAppendable.Append(ICharSequence value, int startIndex,
int count) => Append(value, startIndex, count);
+ #endregion
+
+ #region ISpanAppendable Members
+
+ ISpanAppendable ISpanAppendable.Append(ReadOnlySpan<char> value) =>
Append(value);
#endregion
}
-}
\ No newline at end of file
+}