This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new 2d4d332ca Fixed encoding provider loading on .NET Framework (for .NET
Standard 2.0 target) (#1036)
2d4d332ca is described below
commit 2d4d332cae21cda0e44e62fac0e7296f78229b50
Author: Shad Storhaug <[email protected]>
AuthorDate: Wed Nov 20 01:37:27 2024 +0700
Fixed encoding provider loading on .NET Framework (for .NET Standard 2.0
target) (#1036)
* Lucene.Net.Analysis.Ja.Tools.ConnectionCostsWriter: Added using for
Lucene.Net.Support
* Lucene.Net.Analysis.Kuromoji + Lucene.Net.Analysis.SmartCn: Added
EncodingProviderInitializer classes to ensure we don't load the encoding
provider on a .NET Framework runtime when targeting netstandard2.0 (fixes #1025)
---
.../JapaneseTokenizerFactory.cs | 7 +--
.../Support/Util/EncodingProviderInitializer.cs | 52 ++++++++++++++++++++++
.../Tools/ConnectionCostsWriter.cs | 3 +-
.../Tools/DictionaryBuilder.cs | 10 ++---
src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs | 7 +--
.../Support/Util/EncodingProviderInitializer.cs | 52 ++++++++++++++++++++++
src/Lucene.Net.Tests.Analysis.Common/Startup.cs | 7 ++-
7 files changed, 120 insertions(+), 18 deletions(-)
diff --git a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
index e1a183e57..5595375e4 100644
--- a/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Kuromoji/JapaneseTokenizerFactory.cs
@@ -74,11 +74,8 @@ namespace Lucene.Net.Analysis.Ja
static JapaneseTokenizerFactory()
{
-#if FEATURE_ENCODINGPROVIDERS
- // Support for EUC-JP encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
- var encodingProvider =
System.Text.CodePagesEncodingProvider.Instance;
- System.Text.Encoding.RegisterProvider(encodingProvider);
-#endif
+ // LUCENENET: Support for EUC-JP encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
+ EncodingProviderInitializer.EnsureInitialized();
}
public virtual void Inform(IResourceLoader loader)
diff --git
a/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs
b/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs
new file mode 100644
index 000000000..b2ba1ce6c
--- /dev/null
+++
b/src/Lucene.Net.Analysis.Kuromoji/Support/Util/EncodingProviderInitializer.cs
@@ -0,0 +1,52 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>
+ /// Loads the <see cref="System.Text.EncodingProvider"/> for the current
runtime for support of
+ /// EUC-JP encoding.
+ /// </summary>
+ internal static class EncodingProviderInitializer
+ {
+ private static int initialized;
+
+ private static bool IsNetFramework =>
+#if NETSTANDARD2_0
+ RuntimeInformation.FrameworkDescription.StartsWith(".NET
Framework", StringComparison.OrdinalIgnoreCase);
+#elif NET40_OR_GREATER
+ true;
+#else
+ false;
+#endif
+
+ [Conditional("FEATURE_ENCODINGPROVIDERS")]
+ public static void EnsureInitialized()
+ {
+ // Only allow a single thread to call this
+ if (0 != Interlocked.CompareExchange(ref initialized, 1, 0))
return;
+
+#if FEATURE_ENCODINGPROVIDERS
+ if (!IsNetFramework)
+ {
+ Initialize();
+ }
+#endif
+ }
+
+#if FEATURE_ENCODINGPROVIDERS
+ // NOTE: CodePagesEncodingProvider.Instance loads early, so we need
this in a separate method to ensure
+ // that it isn't executed until after we know which runtime we are on.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static void Initialize()
+ {
+ // Support for EUC-JP encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+ }
+#endif
+ }
+}
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
index 936999db2..b6c6c1bd5 100644
--- a/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/ConnectionCostsWriter.cs
@@ -2,6 +2,7 @@
using Lucene.Net.Codecs;
using Lucene.Net.Diagnostics;
using Lucene.Net.Store;
+using Lucene.Net.Support;
using System.Diagnostics;
using System.IO;
@@ -37,7 +38,7 @@ namespace Lucene.Net.Analysis.Ja.Util
this.forwardSize = forwardSize;
this.backwardSize = backwardSize;
//this.costs = new short[backwardSize][forwardSize];
- this.costs =
Support.RectangularArrays.ReturnRectangularArray<short>(backwardSize,
forwardSize);
+ this.costs =
RectangularArrays.ReturnRectangularArray<short>(backwardSize, forwardSize);
}
public void Add(int forwardId, int backwardId, int cost)
diff --git a/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
index c6f222290..19a5abbb6 100644
--- a/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
+++ b/src/Lucene.Net.Analysis.Kuromoji/Tools/DictionaryBuilder.cs
@@ -1,4 +1,5 @@
-using System;
+using Lucene.Net.Util;
+using System;
using Console = Lucene.Net.Util.SystemConsole;
namespace Lucene.Net.Analysis.Ja.Util
@@ -35,11 +36,8 @@ namespace Lucene.Net.Analysis.Ja.Util
static DictionaryBuilder()
{
-#if FEATURE_ENCODINGPROVIDERS
- // Support for EUC-JP encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
- var encodingProvider =
System.Text.CodePagesEncodingProvider.Instance;
- System.Text.Encoding.RegisterProvider(encodingProvider);
-#endif
+ // LUCENENET: Support for EUC-JP encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
+ EncodingProviderInitializer.EnsureInitialized();
}
public static void Build(DictionaryFormat format,
diff --git a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
index 9ae93b526..70c85a440 100644
--- a/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
+++ b/src/Lucene.Net.Analysis.SmartCn/AnalyzerProfile.cs
@@ -58,11 +58,8 @@ namespace Lucene.Net.Analysis.Cn.Smart
// from ever being loaded).
private static void Init()
{
-#if FEATURE_ENCODINGPROVIDERS
- // Support for GB2312 encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
- var encodingProvider =
System.Text.CodePagesEncodingProvider.Instance;
- System.Text.Encoding.RegisterProvider(encodingProvider);
-#endif
+ // LUCENENET: Support for GB2312 encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
+ EncodingProviderInitializer.EnsureInitialized();
string dirName = "smartcn-data";
//string propName = "analysis.properties";
diff --git
a/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs
b/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs
new file mode 100644
index 000000000..827ad2d5f
--- /dev/null
+++
b/src/Lucene.Net.Analysis.SmartCn/Support/Util/EncodingProviderInitializer.cs
@@ -0,0 +1,52 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Text;
+using System.Threading;
+
+namespace Lucene.Net.Util
+{
+ /// <summary>
+ /// Loads the <see cref="System.Text.EncodingProvider"/> for the current
runtime for support of
+ /// GB2312 encoding.
+ /// </summary>
+ internal static class EncodingProviderInitializer
+ {
+ private static int initialized;
+
+ private static bool IsNetFramework =>
+#if NETSTANDARD2_0
+ RuntimeInformation.FrameworkDescription.StartsWith(".NET
Framework", StringComparison.OrdinalIgnoreCase);
+#elif NET40_OR_GREATER
+ true;
+#else
+ false;
+#endif
+
+ [Conditional("FEATURE_ENCODINGPROVIDERS")]
+ public static void EnsureInitialized()
+ {
+ // Only allow a single thread to call this
+ if (0 != Interlocked.CompareExchange(ref initialized, 1, 0))
return;
+
+#if FEATURE_ENCODINGPROVIDERS
+ if (!IsNetFramework)
+ {
+ Initialize();
+ }
+#endif
+ }
+
+#if FEATURE_ENCODINGPROVIDERS
+ // NOTE: CodePagesEncodingProvider.Instance loads early, so we need
this in a separate method to ensure
+ // that it isn't executed until after we know which runtime we are on.
+ [MethodImpl(MethodImplOptions.NoInlining)]
+ private static void Initialize()
+ {
+ // Support for GB2312 encoding. See:
https://docs.microsoft.com/en-us/dotnet/api/system.text.codepagesencodingprovider?view=netcore-2.0
+ Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
+ }
+#endif
+ }
+}
diff --git a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
index 07033b1b0..c0400a351 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Startup.cs
@@ -27,7 +27,12 @@ public class Startup : LuceneTestFrameworkInitializer
// require it to be added as well when using Hunspell, but there is no
reason to load
// the code pages by default in Lucene.Net.Analysis.Common. It should
be added by consumers
// or Hunspell that require it.
+ //
+ // Note this is in the test project, which never uses netstandard2.0.
If we were using
+ // netstandard2.0, we would need an extra check to deteremine if we
are on .NET Framework,
+ // which doesn't support encoding providers. See
EncodingProviderInitializer in the
+ // Lucene.Net.Analysis.Kuromoji project.
System.Text.Encoding.RegisterProvider(System.Text.CodePagesEncodingProvider.Instance);
#endif
}
-}
\ No newline at end of file
+}