This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 98c52a8649cbb7e9c6f209556a84c49f59ae3ec8 Author: Shad Storhaug <[email protected]> AuthorDate: Thu Oct 27 01:13:34 2022 +0700 PERFORMANCE: Lucene.Net.Analysis.Util.CharacterUtils: Use spans and stackalloc to reduce heap allocations when lowercasing. Added system property named "maxStackLimit" that defaults to 2048 bytes. --- .build/dependencies.props | 1 + .../Analysis/Util/CharacterUtils.cs | 40 +++++++++++++++------- .../Lucene.Net.Analysis.Common.csproj | 8 +++++ .../Configuration/TestConfigurationService.cs | 8 +++++ .../Startup.cs | 3 +- src/Lucene.Net.Tests/Support/TestApiConsistency.cs | 2 +- src/Lucene.Net/Lucene.Net.csproj | 1 + src/Lucene.Net/Util/Constants.cs | 7 +++- 8 files changed, 55 insertions(+), 15 deletions(-) diff --git a/.build/dependencies.props b/.build/dependencies.props index 7a9ad2f27..1cfb3fc69 100644 --- a/.build/dependencies.props +++ b/.build/dependencies.props @@ -73,6 +73,7 @@ <RandomizedTestingGeneratorsPackageVersion>2.7.8</RandomizedTestingGeneratorsPackageVersion> <SharpZipLibPackageVersion>1.1.0</SharpZipLibPackageVersion> <Spatial4nPackageVersion>0.4.1.1</Spatial4nPackageVersion> + <SystemMemoryPackageVersion>4.5.4</SystemMemoryPackageVersion> <SystemReflectionEmitPackageVersion>4.3.0</SystemReflectionEmitPackageVersion> <SystemReflectionEmitILGenerationPackageVersion>4.3.0</SystemReflectionEmitILGenerationPackageVersion> <SystemReflectionTypeExtensionsPackageVersion>4.3.0</SystemReflectionTypeExtensionsPackageVersion> diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs index 8458ca33e..179e095b2 100644 --- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs +++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs @@ -183,10 +183,17 @@ namespace Lucene.Net.Analysis.Util Debugging.Assert(offset <= 0 && offset <= buffer.Length); } - // Slight optimization, eliminating a few method calls internally - CultureInfo.InvariantCulture.TextInfo - .ToLower(new string(buffer, offset, length)) - .CopyTo(0, buffer, offset, length); + // Reduce allocations by using the stack and spans + var source = new ReadOnlySpan<char>(buffer, offset, length); + var destination = buffer.AsSpan(offset, length); + var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length]; + source.ToLower(spare, CultureInfo.InvariantCulture); + spare.CopyTo(destination); + + //// Slight optimization, eliminating a few method calls internally + //CultureInfo.InvariantCulture.TextInfo + // .ToLower(new string(buffer, offset, length)) + // .CopyTo(0, buffer, offset, length); //// Optimization provided by Vincent Van Den Berghe: //// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower @@ -194,8 +201,9 @@ namespace Lucene.Net.Analysis.Util // .ToLowerInvariant() // .CopyTo(0, buffer, offset, length); - // Original (slow) Lucene implementation: - //for (int i = offset; i < limit; ) + //// Original (slow) Lucene implementation: + //int limit = length - offset; + //for (int i = offset; i < limit;) //{ // i += Character.ToChars( // Character.ToLower( @@ -217,10 +225,17 @@ namespace Lucene.Net.Analysis.Util Debugging.Assert(offset <= 0 && offset <= buffer.Length); } - // Slight optimization, eliminating a few method calls internally - CultureInfo.InvariantCulture.TextInfo - .ToUpper(new string(buffer, offset, length)) - .CopyTo(0, buffer, offset, length); + // Reduce 2 heap allocations by using the stack and spans + var source = new ReadOnlySpan<char>(buffer, offset, length); + var destination = buffer.AsSpan(offset, length); + var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length]; + source.ToUpper(spare, CultureInfo.InvariantCulture); + spare.CopyTo(destination); + + //// Slight optimization, eliminating a few method calls internally + //CultureInfo.InvariantCulture.TextInfo + // .ToUpper(new string(buffer, offset, length)) + // .CopyTo(0, buffer, offset, length); //// Optimization provided by Vincent Van Den Berghe: //// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower @@ -228,8 +243,9 @@ namespace Lucene.Net.Analysis.Util // .ToUpperInvariant() // .CopyTo(0, buffer, offset, length); - // Original (slow) Lucene implementation: - //for (int i = offset; i < limit; ) + //// Original (slow) Lucene implementation: + //int limit = length - offset; + //for (int i = offset; i < limit;) //{ // i += Character.ToChars( // Character.ToUpper( diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj index 4fcfaef03..5a64acc9e 100644 --- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj +++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj @@ -50,6 +50,14 @@ <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" /> </ItemGroup> + <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' "> + <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" /> + </ItemGroup> + + <ItemGroup Condition=" '$(TargetFramework)' == 'net462' "> + <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" /> + </ItemGroup> + <ItemGroup Condition=" '$(TargetFramework)' == 'net462' "> <Reference Include="System.Xml" /> </ItemGroup> diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs index 1090f77ad..a2d662e87 100644 --- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs +++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs @@ -34,5 +34,13 @@ namespace Lucene.Net.Configuration Assert.AreEqual("barValue", ConfigurationSettings.CurrentConfiguration["bar"]); Assert.AreEqual("bazValue", ConfigurationSettings.CurrentConfiguration["baz"]); } + + [Test] + public void TestCustomMaxStackByteLimit() + { + // This custom value is configured in Startup.cs. + // 5000 chosen because it is not likely to ever be made a default. + Assert.AreEqual(5000, Constants.MaxStackByteLimit); + } } } diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs index 0edb611cd..e21ad9594 100644 --- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs +++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs @@ -41,7 +41,8 @@ public class Startup : LuceneTestFrameworkInitializer { ["foo"] = "fooValue", ["bar"] = "barValue", - ["baz"] = "bazValue" + ["baz"] = "bazValue", + ["maxStackByteLimit"] = "5000", }); ConfigureServices(serviceCollection, configurationBuilder); IServiceProvider services = serviceCollection.BuildServiceProvider(); diff --git a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs index 8d7bfc57b..6ac4ff2e3 100644 --- a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs +++ b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs @@ -38,7 +38,7 @@ namespace Lucene.Net [TestCase(typeof(Lucene.Net.Analysis.Analyzer))] public override void TestPrivateFieldNames(Type typeFromTargetAssembly) { - base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions"); + base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions|^Lucene\.Net\.Util\.Constants\.MaxStackByteLimit"); } [Test, LuceneNetSpecific] diff --git a/src/Lucene.Net/Lucene.Net.csproj b/src/Lucene.Net/Lucene.Net.csproj index f6a8bfa36..3e70f21c6 100644 --- a/src/Lucene.Net/Lucene.Net.csproj +++ b/src/Lucene.Net/Lucene.Net.csproj @@ -134,6 +134,7 @@ <InternalsVisibleTo Include="Lucene.Net.Tests.Spatial" /> <InternalsVisibleTo Include="Lucene.Net.Tests.Suggest" /> <InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework" /> + <InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework.DependencyInjection" /> </ItemGroup> </Project> diff --git a/src/Lucene.Net/Util/Constants.cs b/src/Lucene.Net/Util/Constants.cs index 60addcacb..4183d7de4 100644 --- a/src/Lucene.Net/Util/Constants.cs +++ b/src/Lucene.Net/Util/Constants.cs @@ -1,4 +1,4 @@ -using System; +using System; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; #if NETFRAMEWORK @@ -30,6 +30,11 @@ namespace Lucene.Net.Util /// </summary> public static class Constants // LUCENENET specific - made static because all members are static and constructor in Lucene was private { + /// <summary> + /// The maximum stack allocation size before switching to making allocations on the heap. + /// </summary> + internal static int MaxStackByteLimit = SystemProperties.GetPropertyAsInt32("maxStackByteLimit", defaultValue: 2048); // LUCENENET specific + // LUCENENET NOTE: IMPORTANT - this line must be placed before RUNTIME_VERSION so it can be parsed. private static readonly Regex VERSION = new Regex(@"(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)", RegexOptions.Compiled);
