(lucenenet) branch master updated: BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 (#1075)

paulirwin Sun, 05 Jan 2025 19:38:06 -0800

This is an automated email from the ASF dual-hosted git repository.

paulirwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git



The following commit(s) were added to refs/heads/master by this push:
     new 12b8941fe BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 
(#1075)
12b8941fe is described below

commit 12b8941fe32539070f4b4cc9b8e0780b6f307e52
Author: Paul Irwin <[email protected]>
AuthorDate: Sun Jan 5 20:37:54 2025 -0700

    BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 (#1075)
    
    * SWEEP: Use BOM-less UTF-8 encoding for writes, #1027
    
    * Remove OfflineSorter.DEFAULT_ENCODING field and replace with 
IOUtils.CHARSET_UTF_8
    
    * Rename IOUtils.CHARSET_UTF_8 to ENCODING_UTF_8_NO_BOM
---
 .../ByTask/Tasks/WriteEnwikiLineDocTask.cs         |  7 +++--
 .../ByTask/Tasks/WriteLineDocTask.cs               |  9 +++---
 .../Quality/Trec/QueryDriver.cs                    |  2 +-
 src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs   |  5 ++--
 src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs |  3 +-
 src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs | 22 +++++++--------
 src/Lucene.Net.TestFramework/Util/TestUtil.cs      |  4 +--
 .../Analysis/Hunspell/Test64kAffixes.cs            | 33 +++++++++++-----------
 .../Analysis/Util/TestFilesystemResourceLoader.cs  |  5 ++--
 .../ByTask/Feeds/DocMakerTest.cs                   |  3 +-
 .../ByTask/Feeds/LineDocSourceTest.cs              | 13 +++++----
 .../ByTask/Utils/StreamUtilsTest.cs                | 19 +++++++------
 src/Lucene.Net.Tests.Demo/TestDemo.cs              |  6 ++--
 .../Taxonomy/TestTaxonomyFacetCounts.cs            |  2 +-
 src/Lucene.Net.Tests/Index/TestCheckIndex.cs       |  3 +-
 .../Index/TestDocInverterPerFieldErrorInfo.cs      |  5 ++--
 .../Index/TestIndexWriterDelete.cs                 |  4 +--
 src/Lucene.Net.Tests/Index/TestPayloads.cs         |  6 ++--
 src/Lucene.Net.Tests/Search/Spans/TestBasics.cs    | 22 +++++++--------
 src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs          |  3 +-
 src/Lucene.Net.Tests/Util/TestOfflineSorter.cs     |  4 +--
 .../Compressing/CompressingStoredFieldsReader.cs   |  2 +-
 .../Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs  |  2 +-
 .../Codecs/Lucene40/Lucene40StoredFieldsReader.cs  |  2 +-
 src/Lucene.Net/Index/Term.cs                       |  4 ++-
 src/Lucene.Net/Support/StandardCharsets.cs         |  6 +++-
 src/Lucene.Net/Util/IOUtils.cs                     | 11 ++++++--
 src/Lucene.Net/Util/OfflineSorter.cs               | 28 ++++++++----------
 28 files changed, 126 insertions(+), 109 deletions(-)

diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs 
b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
index 7e61359b0..a12280f21 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
@@ -2,6 +2,7 @@
 using Lucene.Net.Benchmarks.ByTask.Utils;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
+using Lucene.Net.Support;
 using System;
 using System.IO;
 using System.Text;
@@ -26,9 +27,9 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
      */
 
     /// <summary>
-    /// A <see cref="WriteLineDocTask"/> which for Wikipedia input, will write 
category pages 
+    /// A <see cref="WriteLineDocTask"/> which for Wikipedia input, will write 
category pages
     /// to another file, while remaining pages will be written to the original 
file.
-    /// The categories file is derived from the original file, by adding a 
prefix "categories-". 
+    /// The categories file is derived from the original file, by adding a 
prefix "categories-".
     /// </summary>
     public class WriteEnwikiLineDocTask : WriteLineDocTask
     {
@@ -38,7 +39,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                   : base(runData)
         {
             Stream @out = StreamUtils.GetOutputStream(CategoriesLineFile(new 
FileInfo(m_fname)));
-            categoryLineFileOut = new StreamWriter(@out, Encoding.UTF8);
+            categoryLineFileOut = new StreamWriter(@out, 
StandardCharsets.UTF_8);
             WriteHeader(categoryLineFileOut);
         }
 
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs 
b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
index d724cfc51..ddb872797 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
@@ -3,6 +3,7 @@ using Lucene.Net.Benchmarks.ByTask.Feeds;
 using Lucene.Net.Benchmarks.ByTask.Utils;
 using Lucene.Net.Documents;
 using Lucene.Net.Index;
+using Lucene.Net.Support;
 using Lucene.Net.Support.Threading;
 using Lucene.Net.Util;
 using System;
@@ -49,8 +50,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
     ///     <item><term>line.file.out</term><description>the name of the file 
to write the output to. That parameter is mandatory. <b>NOTE:</b> the file is 
re-created.</description></item>
     ///     <item><term>line.fields</term><description>which fields should be 
written in each line. (optional, default: <see 
cref="DEFAULT_FIELDS"/>).</description></item>
     ///     <item><term>sufficient.fields</term><description>
-    ///         list of field names, separated by comma, which, 
-    ///         if all of them are missing, the document will be skipped. For 
example, to require 
+    ///         list of field names, separated by comma, which,
+    ///         if all of them are missing, the document will be skipped. For 
example, to require
     ///         that at least one of f1,f2 is not empty, specify: "f1,f2" in 
this field. To specify
     ///         that no field is required, i.e. that even empty docs should be 
emitted, specify <b>","</b>
     ///         (optional, default: <see cref="DEFAULT_SUFFICIENT_FIELDS"/>).
@@ -112,10 +113,10 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
                 throw new ArgumentException("line.file.out must be set");
             }
             Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname));
-            m_lineFileOut = new StreamWriter(@out, Encoding.UTF8);
+            m_lineFileOut = new StreamWriter(@out, StandardCharsets.UTF_8);
             docMaker = runData.DocMaker;
 
-            // init fields 
+            // init fields
             string f2r = config.Get("line.fields", null);
             if (f2r is null)
             {
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs 
b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
index b75eb55b5..d08dab75a 100644
--- a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
@@ -71,7 +71,7 @@ namespace Lucene.Net.Benchmarks.Quality.Trec
 
             FileInfo topicsFile = new FileInfo(args[0]);
             FileInfo qrelsFile = new FileInfo(args[1]);
-            SubmissionReport submitLog = new SubmissionReport(new 
StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), 
Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene");
+            SubmissionReport submitLog = new SubmissionReport(new 
StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write), 
IOUtils.ENCODING_UTF_8_NO_BOM /* huh, no nio.Charset ctor? */), "lucene");
             using Store.FSDirectory dir = Store.FSDirectory.Open(new 
DirectoryInfo(args[3]));
             using IndexReader reader = DirectoryReader.Open(dir);
             string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to 
Title-only if not specified.
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs 
b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
index be1f79aa2..8d0440326 100644
--- a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
@@ -1,4 +1,5 @@
-using System;
+using Lucene.Net.Support;
+using System;
 using System.IO;
 using System.Text;
 using System.Text.RegularExpressions;
@@ -118,7 +119,7 @@ namespace Lucene.Net.Benchmarks.Utils
                         string outFile = 
System.IO.Path.Combine(outputDir.FullName, sgmFile.Name + "-"
                             + (docNumber++) + ".txt");
                         // System.out.println("Writing " + outFile);
-                        StreamWriter writer = new StreamWriter(new 
FileStream(outFile, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                        StreamWriter writer = new StreamWriter(new 
FileStream(outFile, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8);
                         writer.Write(@out);
                         writer.Dispose();
                         outBuffer.Length = 0;
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs 
b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
index 5504248a3..7a50f3fb5 100644
--- a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
@@ -1,6 +1,7 @@
 using Lucene.Net.Benchmarks.ByTask.Feeds;
 using Lucene.Net.Benchmarks.ByTask.Utils;
 using Lucene.Net.Documents;
+using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
 using System.Globalization;
@@ -88,7 +89,7 @@ namespace Lucene.Net.Benchmarks.Utils
 
             try
             {
-                using TextWriter writer = new StreamWriter(new 
FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                using TextWriter writer = new StreamWriter(new 
FileStream(f.FullName, FileMode.Create, FileAccess.Write), 
StandardCharsets.UTF_8);
                 writer.Write(contents.ToString());
             }
             catch (Exception ioe) when (ioe.IsIOException())
diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs 
b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
index e173c6f72..cc4917321 100644
--- a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
+++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
@@ -318,16 +318,16 @@ namespace Lucene.Net.Util.Fst
 
             bool willRewrite = random.NextBoolean();
 
-            Builder<T> builder = new Builder<T>(inputMode == 0 ? 
FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 
-                                                prune1, prune2, 
-                                                prune1 == 0 && prune2 == 0, 
-                                                allowRandomSuffixSharing ? 
random.NextBoolean() : true, 
-                                                allowRandomSuffixSharing ? 
TestUtil.NextInt32(random, 1, 10) : int.MaxValue, 
-                                                outputs, 
-                                                null, 
-                                                willRewrite, 
-                                                PackedInt32s.DEFAULT, 
-                                                true, 
+            Builder<T> builder = new Builder<T>(inputMode == 0 ? 
FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
+                                                prune1, prune2,
+                                                prune1 == 0 && prune2 == 0,
+                                                allowRandomSuffixSharing ? 
random.NextBoolean() : true,
+                                                allowRandomSuffixSharing ? 
TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
+                                                outputs,
+                                                null,
+                                                willRewrite,
+                                                PackedInt32s.DEFAULT,
+                                                true,
                                                 15);
             if (LuceneTestCase.Verbose)
             {
@@ -386,7 +386,7 @@ namespace Lucene.Net.Util.Fst
 
             if (LuceneTestCase.Verbose && pairs.Count <= 20 && fst != null)
             {
-                using (TextWriter w = new StreamWriter(new 
FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
+                using (TextWriter w = new StreamWriter(new 
FileStream("out.dot", FileMode.OpenOrCreate), StandardCharsets.UTF_8))
                 {
                     Util.ToDot(fst, w, false, false);
                 }
diff --git a/src/Lucene.Net.TestFramework/Util/TestUtil.cs 
b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
index d270d069e..ad25bf418 100644
--- a/src/Lucene.Net.TestFramework/Util/TestUtil.cs
+++ b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
@@ -167,7 +167,7 @@ namespace Lucene.Net.Util
             ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
             CheckIndex checker = new CheckIndex(dir);
             checker.CrossCheckTermVectors = crossCheckTermVectors;
-            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+            checker.InfoStream = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
             if (indexStatus is null || indexStatus.Clean == false)
             {
@@ -203,7 +203,7 @@ namespace Lucene.Net.Util
         {
             // LUCENENET: dispose the StreamWriter and ByteArrayOutputStream 
when done
             using ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
-            using StreamWriter infoStream = new StreamWriter(bos, 
Encoding.UTF8, leaveOpen: true, bufferSize: 1024);
+            using StreamWriter infoStream = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: true, bufferSize: 1024);
 
             reader.CheckIntegrity();
             CheckIndex.Status.FieldNormStatus fieldNormStatus = 
Index.CheckIndex.TestFieldNorms(reader, infoStream);
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
index e210ee9fd..ec4824a7c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
@@ -1,5 +1,6 @@
 // Lucene version compatibility level 4.10.4
 using J2N;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System.Collections.Generic;
@@ -35,25 +36,25 @@ namespace Lucene.Net.Analysis.Hunspell
             FileInfo affix = new 
FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.aff"));
             FileInfo dict = new 
FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.dic"));
 
-            using var affixWriter = new StreamWriter(
-                new FileStream(affix.FullName, FileMode.OpenOrCreate), 
Encoding.UTF8);
-
-            // 65k affixes with flag 1, then an affix with flag 2
-            affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n");
-            for (int i = 0; i < 65536; i++)
+            using (var affixWriter = new StreamWriter(
+                       new FileStream(affix.FullName, FileMode.OpenOrCreate), 
StandardCharsets.UTF_8))
             {
-                affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n");
-            }
-            affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n");
-            affixWriter.Dispose();
-
-            using var dictWriter = new StreamWriter(
-                new FileStream(dict.FullName, FileMode.OpenOrCreate), 
Encoding.UTF8);
+                // 65k affixes with flag 1, then an affix with flag 2
+                affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n");
+                for (int i = 0; i < 65536; i++)
+                {
+                    affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n");
+                }
 
+                affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n");
+            } // affixWriter.Dispose();
 
-            // drink signed with affix 2 (takes -s)
-            dictWriter.Write("1\ndrink/2\n");
-            dictWriter.Dispose();
+            using (var dictWriter = new StreamWriter(
+                       new FileStream(dict.FullName, FileMode.OpenOrCreate), 
StandardCharsets.UTF_8))
+            {
+                // drink signed with affix 2 (takes -s)
+                dictWriter.Write("1\ndrink/2\n");
+            } // dictWriter.Dispose();
 
             using Stream affStream = new FileStream(affix.FullName, 
FileMode.OpenOrCreate, FileAccess.ReadWrite);
             using Stream dictStream = new FileStream(dict.FullName, 
FileMode.OpenOrCreate, FileAccess.ReadWrite);
diff --git 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
index 06db2c28e..d8166892c 100644
--- 
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
+++ 
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
@@ -1,5 +1,6 @@
 // Lucene version compatibility level 4.8.1
 using J2N;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
@@ -78,7 +79,7 @@ namespace Lucene.Net.Analysis.Util
             DirectoryInfo @base = CreateTempDir("fsResourceLoaderBase");
             try
             {
-                TextWriter os = new StreamWriter(new 
FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"), 
FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                TextWriter os = new StreamWriter(new 
FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"), 
FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8);
                 try
                 {
                     os.Write("foobar\n");
@@ -120,4 +121,4 @@ namespace Lucene.Net.Analysis.Util
             assertEquals("foobar", 
WordlistLoader.GetLines(rl.OpenResource("template.txt"), 
Encoding.UTF8).First());
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs 
b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
index d8acb4f66..acd70715b 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
@@ -5,6 +5,7 @@ using Lucene.Net.Documents;
 using Lucene.Net.Index;
 using Lucene.Net.Search;
 using Lucene.Net.Support;
+using Lucene.Net.Util;
 using NUnit.Framework;
 using System.Collections.Generic;
 using System.IO;
@@ -170,7 +171,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
             // DocMaker did not close its ContentSource if resetInputs was 
called twice,
             // leading to a file handle leak.
             FileInfo f = new FileInfo(Path.Combine(getWorkDir().FullName, 
"docMakerLeak.txt"));
-            TextWriter ps = new StreamWriter(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write), Encoding.UTF8);
+            TextWriter ps = new StreamWriter(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM);
             ps.WriteLine("one title\t" + (J2N.Time.NanoTime() / 
J2N.Time.MillisecondsPerNanosecond) + "\tsome content"); // LUCENENET: Use 
NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable 
results
             ps.Dispose();
 
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs 
b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
index c9b0cad90..ad909a49c 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Tasks;
 using Lucene.Net.Benchmarks.ByTask.Utils;
 using Lucene.Net.Index;
 using Lucene.Net.Search;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
@@ -41,7 +42,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
         {
             Stream @out = new FileStream(file.FullName, FileMode.Create, 
FileAccess.Write);
             @out = new BZip2OutputStream(@out); // 
csFactory.createCompressorOutputStream("bzip2", @out);
-            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
             writeDocsToFile(writer, addHeader, null);
             writer.Dispose();
         }
@@ -59,7 +60,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
                 writer.Write(DocMaker.BODY_FIELD);
                 if (otherFields != null)
                 {
-                    // additional field names in the header 
+                    // additional field names in the header
                     foreach (Object fn in otherFields.Keys)
                     {
                         writer.Write(WriteLineDocTask.SEP);
@@ -72,7 +73,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
             
doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
             if (otherFields != null)
             {
-                // additional field values in the doc line 
+                // additional field values in the doc line
                 foreach (Object fv in otherFields.Values)
                 {
                     doc.append(WriteLineDocTask.SEP).append(fv.toString());
@@ -85,7 +86,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
         private void createRegularLineFile(FileInfo file, bool addHeader)
         {
             Stream @out = new FileStream(file.FullName, FileMode.Create, 
FileAccess.Write);
-            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
             writeDocsToFile(writer, addHeader, null);
             writer.Dispose();
         }
@@ -93,7 +94,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
         private void createRegularLineFileWithMoreFields(FileInfo file, params 
String[] extraFields)
         {
             Stream @out = new FileStream(file.FullName, FileMode.Create, 
FileAccess.Write);
-            TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+            TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
             Dictionary<string, string> p = new Dictionary<string, string>();
             foreach (String f in extraFields)
             {
@@ -231,7 +232,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
             for (int i = 0; i < testCases.Length; i++)
             {
                 FileInfo file = new 
FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
-                TextWriter writer = new StreamWriter(new 
FileStream(file.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+                TextWriter writer = new StreamWriter(new 
FileStream(file.FullName, FileMode.Create, FileAccess.Write), 
StandardCharsets.UTF_8);
                 writer.Write(testCases[i]);
                 writer.WriteLine();
                 writer.Dispose();
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs 
b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
index eb2aaec78..d8e175f74 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
@@ -1,4 +1,5 @@
 using ICSharpCode.SharpZipLib.BZip2;
+using Lucene.Net.Support;
 using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
@@ -27,7 +28,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
 
     public class StreamUtilsTest : BenchmarkTestCase
     {
-        private static readonly String TEXT = "Some-Text...";
+        private static readonly string TEXT = "Some-Text...";
         private DirectoryInfo testDir;
 
         [Test]
@@ -82,15 +83,15 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
             assertReadText(autoOutFile("TEXT"));
         }
 
-        private FileInfo rawTextFile(String ext)
+        private FileInfo rawTextFile(string ext)
         {
             FileInfo f = new FileInfo(Path.Combine(testDir.FullName, 
"testfile." + ext));
-            using (TextWriter w = new StreamWriter(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write), Encoding.UTF8))
+            using (TextWriter w = new StreamWriter(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8))
                 w.WriteLine(TEXT);
             return f;
         }
 
-        private FileInfo rawGzipFile(String ext)
+        private FileInfo rawGzipFile(string ext)
         {
             FileInfo f = new FileInfo(Path.Combine(testDir.FullName, 
"testfile." + ext));
             using (Stream os = new GZipStream(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write), CompressionMode.Compress)) //new 
CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP,
 new FileOutputStream(f));
@@ -98,7 +99,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
             return f;
         }
 
-        private FileInfo rawBzip2File(String ext)
+        private FileInfo rawBzip2File(string ext)
         {
             FileInfo f = new FileInfo(Path.Combine(testDir.FullName, 
"testfile." + ext));
             Stream os = new BZip2OutputStream(new FileStream(f.FullName, 
FileMode.Create, FileAccess.Write));  // new 
CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2,
 new FileOutputStream(f));
@@ -106,7 +107,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
             return f;
         }
 
-        private FileInfo autoOutFile(String ext)
+        private FileInfo autoOutFile(string ext)
         {
             FileInfo f = new FileInfo(Path.Combine(testDir.FullName, 
"testfile." + ext));
             Stream os = StreamUtils.GetOutputStream(f);
@@ -116,7 +117,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
 
         private void writeText(Stream os)
         {
-            TextWriter w = new StreamWriter(os, Encoding.UTF8);
+            TextWriter w = new StreamWriter(os, StandardCharsets.UTF_8);
             w.WriteLine(TEXT);
             w.Dispose();
         }
@@ -124,8 +125,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
         private void assertReadText(FileInfo f)
         {
             Stream ir = StreamUtils.GetInputStream(f);
-            TextReader r = new StreamReader(ir, Encoding.UTF8);
-            String line = r.ReadLine();
+            TextReader r = new StreamReader(ir, StandardCharsets.UTF_8);
+            string line = r.ReadLine();
             assertEquals("Wrong text found in " + f.Name, TEXT, line);
             r.Dispose();
         }
diff --git a/src/Lucene.Net.Tests.Demo/TestDemo.cs 
b/src/Lucene.Net.Tests.Demo/TestDemo.cs
index 356405bc0..16b2379a5 100644
--- a/src/Lucene.Net.Tests.Demo/TestDemo.cs
+++ b/src/Lucene.Net.Tests.Demo/TestDemo.cs
@@ -37,7 +37,7 @@ namespace Lucene.Net.Demo
                 var fakeSystemOut = new StreamWriter(bytes, 
Encoding.GetEncoding(0));
                 Console.SetOut(fakeSystemOut);
                 // LUCENENET specific: changed the arguments to act more like 
the dotnet.exe commands.
-                // * only optional arguments start with - 
+                // * only optional arguments start with -
                 // * options have a long form that starts with --
                 // * required arguments must be supplied without - or -- and 
in a specific order
                 // Since the demo is meant to be seen by end users, these 
changes were necessary to make
@@ -80,13 +80,13 @@ namespace Lucene.Net.Demo
 
             DirectoryInfo indexDir = CreateTempDir("DemoTest");
             // LUCENENET specific: changed the arguments to act more like the 
dotnet.exe commands.
-            // * only optional arguments start with - 
+            // * only optional arguments start with -
             // * options have a long form that starts with --
             // * required arguments must be supplied without - or -- and in a 
specific order
             // Since the demo is meant to be seen by end users, these changes 
were necessary to make
             // it consistent with the lucene-cli utility.
             // NOTE: There is no -create in lucene, but it has the same effect 
as if --update were left out
-            IndexFiles.Main(new string[] { indexDir.FullName, 
filesDir.FullName }); 
+            IndexFiles.Main(new string[] { indexDir.FullName, 
filesDir.FullName });
             //IndexFiles.Main(new string[] { "-create", "-docs", 
filesDir.FullName, "-index", indexDir.FullName });
             TestOneSearch(indexDir, "apache", 3);
             TestOneSearch(indexDir, "patent", 8);
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs 
b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
index 206ef6f1b..02f8764f7 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
@@ -134,7 +134,7 @@ namespace Lucene.Net.Facet.Taxonomy
             string result;
             using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
             {
-                using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8, 
2048, true) { AutoFlush = true })
+                using (StreamWriter w = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM, 2048, true) { AutoFlush = true })
                 {
                     PrintTaxonomyStats.PrintStats(taxoReader, w, true);
                 }
diff --git a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs 
b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
index bb1c3a7ad..443ea738c 100644
--- a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
+++ b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
@@ -1,6 +1,7 @@
 using Lucene.Net.Documents;
 using Lucene.Net.Index.Extensions;
 using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
 using NUnit.Framework;
 using System.Collections.Generic;
 using System.IO;
@@ -63,7 +64,7 @@ namespace Lucene.Net.Index
 
             ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
             CheckIndex checker = new CheckIndex(dir);
-            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+            checker.InfoStream = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             if (Verbose)
             {
                 checker.InfoStream = Console.Out;
diff --git a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs 
b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
index bca4cfb3a..130b175f9 100644
--- a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
+++ b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
@@ -1,6 +1,7 @@
 using Lucene.Net.Analysis;
 using Lucene.Net.Documents;
 using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
 using NUnit.Framework;
 using System;
 using System.IO;
@@ -90,7 +91,7 @@ namespace Lucene.Net.Index
             IndexWriter writer;
             IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, 
new ThrowingAnalyzer());
             ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
-            StreamWriter infoPrintStream = new StreamWriter(infoBytes, 
Encoding.UTF8);
+            StreamWriter infoPrintStream = new StreamWriter(infoBytes, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             TextWriterInfoStream printStreamInfoStream = new 
TextWriterInfoStream(infoPrintStream);
             c.SetInfoStream(printStreamInfoStream);
             writer = new IndexWriter(dir, c);
@@ -119,7 +120,7 @@ namespace Lucene.Net.Index
             IndexWriter writer;
             IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT, 
new ThrowingAnalyzer());
             ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
-            StreamWriter infoPrintStream = new StreamWriter(infoBytes, 
Encoding.UTF8);
+            StreamWriter infoPrintStream = new StreamWriter(infoBytes, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             TextWriterInfoStream printStreamInfoStream = new 
TextWriterInfoStream(infoPrintStream);
             c.SetInfoStream(printStreamInfoStream);
             writer = new IndexWriter(dir, c);
diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs 
b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
index a64e9af0d..525750e98 100644
--- a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
+++ b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
@@ -1400,7 +1400,7 @@ namespace Lucene.Net.Index
             ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
             //MemoryStream bos = new MemoryStream(1024);
             CheckIndex checker = new CheckIndex(dir);
-            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+            checker.InfoStream = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
             Assert.IsTrue(indexStatus.Clean);
             checker.FlushInfoStream();
@@ -1413,7 +1413,7 @@ namespace Lucene.Net.Index
             w.Dispose();
 
             bos = new ByteArrayOutputStream(1024);
-            checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+            checker.InfoStream = new StreamWriter(bos, 
IOUtils.ENCODING_UTF_8_NO_BOM);
             indexStatus = checker.DoCheckIndex(null);
             Assert.IsTrue(indexStatus.Clean);
             checker.FlushInfoStream();
diff --git a/src/Lucene.Net.Tests/Index/TestPayloads.cs 
b/src/Lucene.Net.Tests/Index/TestPayloads.cs
index e35cc6aa5..1affe6ba5 100644
--- a/src/Lucene.Net.Tests/Index/TestPayloads.cs
+++ b/src/Lucene.Net.Tests/Index/TestPayloads.cs
@@ -84,7 +84,7 @@ namespace Lucene.Net.Index
             // enabled in only some documents
             d.Add(NewTextField("f3", "this field has payloads in some docs", 
Field.Store.NO));
             // only add payload data for field f2
-            analyzer.SetPayloadData("f2", 
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
+            analyzer.SetPayloadData("f2", 
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
             writer.AddDocument(d);
             // flush
             writer.Dispose();
@@ -106,8 +106,8 @@ namespace Lucene.Net.Index
             d.Add(NewTextField("f2", "this field has payloads in all docs", 
Field.Store.NO));
             d.Add(NewTextField("f3", "this field has payloads in some docs", 
Field.Store.NO));
             // add payload data for field f2 and f3
-            analyzer.SetPayloadData("f2", 
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
-            analyzer.SetPayloadData("f3", 
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
+            analyzer.SetPayloadData("f2", 
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
+            analyzer.SetPayloadData("f3", 
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 3);
             writer.AddDocument(d);
 
             // force merge
diff --git a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs 
b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
index d54678ae9..0e08ed67f 100644
--- a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
+++ b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
@@ -77,7 +77,7 @@ namespace Lucene.Net.Search.Spans
                 if (m_input.IncrementToken())
                 {
 #pragma warning disable 612, 618
-                    payloadAttr.Payload = new BytesRef(("pos: " + 
pos).GetBytes(IOUtils.CHARSET_UTF_8));
+                    payloadAttr.Payload = new BytesRef(("pos: " + 
pos).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
 #pragma warning restore 612, 618
                     pos++;
                     return true;
@@ -533,7 +533,7 @@ namespace Lucene.Net.Search.Spans
         {
             SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
 #pragma warning disable 612, 618
-            BytesRef pay = new BytesRef(("pos: " + 
5).GetBytes(IOUtils.CHARSET_UTF_8));
+            BytesRef pay = new BytesRef(("pos: " + 
5).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
 #pragma warning restore 612, 618
             SpanQuery query = new SpanPayloadCheckQuery(term1, new 
JCG.List<byte[]>() { pay.Bytes });
             CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175, 
1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345, 
1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495, 
1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665, 
1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835, 
1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985, 
1995 });
@@ -549,8 +549,8 @@ namespace Lucene.Net.Search.Spans
             clauses[1] = term2;
             snq = new SpanNearQuery(clauses, 0, true);
 #pragma warning disable 612, 618
-            pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
-            pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
+            pay = new BytesRef(("pos: " + 
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            pay2 = new BytesRef(("pos: " + 
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
 #pragma warning restore 612, 618
             list = new JCG.List<byte[]>();
             list.Add(pay.Bytes);
@@ -563,9 +563,9 @@ namespace Lucene.Net.Search.Spans
             clauses[2] = new SpanTermQuery(new Term("field", "five"));
             snq = new SpanNearQuery(clauses, 0, true);
 #pragma warning disable 612, 618
-            pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
-            pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
-            BytesRef pay3 = new BytesRef(("pos: " + 
2).GetBytes(IOUtils.CHARSET_UTF_8));
+            pay = new BytesRef(("pos: " + 
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            pay2 = new BytesRef(("pos: " + 
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            BytesRef pay3 = new BytesRef(("pos: " + 
2).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
 #pragma warning restore 612, 618
             list = new JCG.List<byte[]>();
             list.Add(pay.Bytes);
@@ -597,10 +597,10 @@ namespace Lucene.Net.Search.Spans
 
             var payloads = new JCG.List<byte[]>();
 #pragma warning disable 612, 618
-            BytesRef pay = new BytesRef(("pos: " + 
0).GetBytes(IOUtils.CHARSET_UTF_8));
-            BytesRef pay2 = new BytesRef(("pos: " + 
1).GetBytes(IOUtils.CHARSET_UTF_8));
-            BytesRef pay3 = new BytesRef(("pos: " + 
3).GetBytes(IOUtils.CHARSET_UTF_8));
-            BytesRef pay4 = new BytesRef(("pos: " + 
4).GetBytes(IOUtils.CHARSET_UTF_8));
+            BytesRef pay = new BytesRef(("pos: " + 
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            BytesRef pay2 = new BytesRef(("pos: " + 
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            BytesRef pay3 = new BytesRef(("pos: " + 
3).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+            BytesRef pay4 = new BytesRef(("pos: " + 
4).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
 #pragma warning restore 612, 618
             payloads.Add(pay.Bytes);
             payloads.Add(pay2.Bytes);
diff --git a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs 
b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
index f21e68457..e9de419c3 100644
--- a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
+++ b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
@@ -2,6 +2,7 @@
 using J2N.Threading.Atomic;
 using Lucene.Net.Diagnostics;
 using Lucene.Net.Index.Extensions;
+using Lucene.Net.Support;
 using Lucene.Net.Util.Automaton;
 using NUnit.Framework;
 using RandomizedTesting.Generators;
@@ -589,7 +590,7 @@ namespace Lucene.Net.Util.Fst
                     Console.WriteLine(ord + " terms; " + fst.NodeCount + " 
nodes; " + fst.ArcCount + " arcs; " + fst.ArcWithOutputCount + " arcs w/ 
output; tot size " + fst.GetSizeInBytes());
                     if (fst.NodeCount < 100)
                     {
-                        TextWriter w = new StreamWriter(new 
FileStream("out.dot", FileMode.Create), Encoding.UTF8);
+                        TextWriter w = new StreamWriter(new 
FileStream("out.dot", FileMode.Create), StandardCharsets.UTF_8);
                         Util.ToDot(fst, w, false, false);
                         w.Dispose();
                         Console.WriteLine("Wrote FST to out.dot");
diff --git a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs 
b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
index 3f9ae8929..19a3cc20d 100644
--- a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
+++ b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
@@ -86,7 +86,7 @@ namespace Lucene.Net.Util
         public virtual void TestSingleLine()
         {
 #pragma warning disable 612, 618
-            CheckSort(new OfflineSorter(), new byte[][] { "Single line 
only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+            CheckSort(new OfflineSorter(), new byte[][] { "Single line 
only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
 #pragma warning restore 612, 618
         }
 
@@ -95,7 +95,7 @@ namespace Lucene.Net.Util
         public virtual void TestSingleLine_AsStream()
         {
 #pragma warning disable 612, 618
-            CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line 
only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+            CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line 
only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
 #pragma warning restore 612, 618
         }
 
diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs 
b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
index 14fd869e2..16b053f4d 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
@@ -205,7 +205,7 @@ namespace Lucene.Net.Codecs.Compressing
                     data = new byte[length];
                     @in.ReadBytes(data, 0, length);
 #pragma warning disable 612, 618
-                    visitor.StringField(info, 
IOUtils.CHARSET_UTF_8.GetString(data));
+                    visitor.StringField(info, 
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(data));
 #pragma warning restore 612, 618
                     break;
 
diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs 
b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
index 442a08e71..8615bb283 100644
--- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
@@ -334,7 +334,7 @@ namespace Lucene.Net.Codecs.Lucene3x
                 }
                 else
                 {
-                    visitor.StringField(info, 
IOUtils.CHARSET_UTF_8.GetString(bytes));
+                    visitor.StringField(info, 
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
                 }
             }
         }
diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs 
b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
index b6736feb2..d21f35871 100644
--- a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
@@ -239,7 +239,7 @@ namespace Lucene.Net.Codecs.Lucene40
                 else
                 {
 #pragma warning disable 612, 618
-                    visitor.StringField(info, 
IOUtils.CHARSET_UTF_8.GetString(bytes));
+                    visitor.StringField(info, 
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
 #pragma warning restore 612, 618
                 }
             }
diff --git a/src/Lucene.Net/Index/Term.cs b/src/Lucene.Net/Index/Term.cs
index 6e245af2a..38eda37a3 100644
--- a/src/Lucene.Net/Index/Term.cs
+++ b/src/Lucene.Net/Index/Term.cs
@@ -1,4 +1,5 @@
 using J2N.Text;
+using Lucene.Net.Support;
 using System;
 using System.Text;
 
@@ -90,7 +91,8 @@ namespace Lucene.Net.Index
         public static string ToString(BytesRef termText)
         {
             // the term might not be text, but usually is. so we make a best 
effort
-            Encoding decoder = new UTF8Encoding(false, true);
+            // LUCENENET TODO: determine if we should use 
DecoderFallback.ExceptionFallback here
+            Encoding decoder = StandardCharsets.UTF_8;
             try
             {
                 return decoder.GetString(termText.Bytes, termText.Offset, 
termText.Length);
diff --git a/src/Lucene.Net/Support/StandardCharsets.cs 
b/src/Lucene.Net/Support/StandardCharsets.cs
index 781f3b3ae..73ef0f909 100644
--- a/src/Lucene.Net/Support/StandardCharsets.cs
+++ b/src/Lucene.Net/Support/StandardCharsets.cs
@@ -22,5 +22,9 @@ namespace Lucene.Net.Support;
 
 internal static class StandardCharsets
 {
-    public static readonly Encoding UTF_8 = IOUtils.CHARSET_UTF_8;
+    /// <inheritdoc cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>
+    /// <remarks>
+    /// This is a convenience reference to <see 
cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>.
+    /// </remarks>
+    public static readonly Encoding UTF_8 = IOUtils.ENCODING_UTF_8_NO_BOM;
 }
diff --git a/src/Lucene.Net/Util/IOUtils.cs b/src/Lucene.Net/Util/IOUtils.cs
index dc17cdbdd..c3141b00d 100644
--- a/src/Lucene.Net/Util/IOUtils.cs
+++ b/src/Lucene.Net/Util/IOUtils.cs
@@ -45,16 +45,21 @@ namespace Lucene.Net.Util
         /// UTF-8 <see cref="Encoding"/> instance to prevent repeated
         /// <see cref="Encoding.UTF8"/> lookups and match Java's behavior
         /// with respect to a lack of a byte-order mark (BOM).
+        /// <para />
+        /// It is important to use this encoding over <see 
cref="Encoding.UTF8"/>
+        /// particularly when writing data, to ensure that the BOM is not 
written.
+        /// For reading data, either this or <see cref="Encoding.UTF8"/> can 
be used,
+        /// as both will correctly interpret data with or without a BOM.
         /// </summary>
-        public static readonly Encoding CHARSET_UTF_8 = new UTF8Encoding(
+        public static readonly Encoding ENCODING_UTF_8_NO_BOM = new 
UTF8Encoding(
             encoderShouldEmitUTF8Identifier: false,
             throwOnInvalidBytes: true);
 
         /// <summary>
         /// UTF-8 charset string.
-        /// <para/>Where possible, use <see cref="Encoding.UTF8"/> instead,
+        /// <para/>Where possible, use <see cref="ENCODING_UTF_8_NO_BOM"/> 
instead,
         /// as using the <see cref="string"/> constant may slow things down. 
</summary>
-        /// <seealso cref="Encoding.UTF8"/>
+        /// <seealso cref="ENCODING_UTF_8_NO_BOM"/>
         public static readonly string UTF_8 = "UTF-8";
 
         /// <summary>
diff --git a/src/Lucene.Net/Util/OfflineSorter.cs 
b/src/Lucene.Net/Util/OfflineSorter.cs
index c1114b18c..968368ca2 100644
--- a/src/Lucene.Net/Util/OfflineSorter.cs
+++ b/src/Lucene.Net/Util/OfflineSorter.cs
@@ -41,12 +41,6 @@ namespace Lucene.Net.Util
     /// </summary>
     public sealed class OfflineSorter
     {
-        /// <summary>
-        /// The default encoding (UTF-8 without a byte order mark) used by 
<see cref="ByteSequencesReader"/> and <see cref="ByteSequencesWriter"/>.
-        /// This encoding should always be used when calling the constructor 
overloads that accept <see cref="BinaryReader"/> or <see cref="BinaryWriter"/>.
-        /// </summary>
-        public static readonly Encoding DEFAULT_ENCODING = new 
UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
-
         /// <summary>
         /// The recommended buffer size to use on <see cref="Sort(FileStream, 
FileStream)"/> or when creating a
         /// <see cref="ByteSequencesReader"/> and <see 
cref="ByteSequencesWriter"/>.
@@ -196,10 +190,10 @@ namespace Lucene.Net.Util
             /// </summary>
             public override string ToString()
             {
-                return string.Format(CultureInfo.InvariantCulture, 
-                    "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00} 
sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram 
limit={7:0.00} MB", 
-                    TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 
1000.0d, MergeTime / 1000.0d, 
-                    Lines, TempMergeFiles, MergeRounds, 
+                return string.Format(CultureInfo.InvariantCulture,
+                    "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00} 
sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram 
limit={7:0.00} MB",
+                    TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime / 
1000.0d, MergeTime / 1000.0d,
+                    Lines, TempMergeFiles, MergeRounds,
                     (double)BufferSize / MB);
             }
         }
@@ -606,7 +600,7 @@ namespace Lucene.Net.Util
             /// Constructs a <see cref="ByteSequencesWriter"/> to the provided 
<see cref="FileStream"/>. </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="stream"/> is <c>null</c>.</exception>
             public ByteSequencesWriter(FileStream stream)
-                : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen: 
false))
+                : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, 
leaveOpen: false))
             {
             }
 
@@ -614,7 +608,7 @@ namespace Lucene.Net.Util
             /// Constructs a <see cref="ByteSequencesWriter"/> to the provided 
<see cref="FileStream"/>. </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="stream"/> is <c>null</c>.</exception>
             public ByteSequencesWriter(FileStream stream, bool leaveOpen)
-                : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen))
+                : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM, 
leaveOpen))
             {
             }
 
@@ -638,7 +632,7 @@ namespace Lucene.Net.Util
             /// <summary>
             /// Constructs a <see cref="ByteSequencesWriter"/> to the provided 
<see cref="BinaryWriter"/>.
             /// <b>NOTE:</b> To match Lucene, pass the <paramref 
name="writer"/>'s constructor the
-            /// <see cref="DEFAULT_ENCODING"/>, which is UTF-8 without a byte 
order mark.
+            /// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8 
without a byte order mark.
             /// </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="writer"/> is <c>null</c>.</exception>
             public ByteSequencesWriter(BinaryWriter writer)
@@ -728,7 +722,7 @@ namespace Lucene.Net.Util
             /// Constructs a <see cref="ByteSequencesReader"/> from the 
provided <see cref="FileStream"/>. </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="stream"/> is <c>null</c>.</exception>
             public ByteSequencesReader(FileStream stream)
-                : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen: 
false))
+                : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, 
leaveOpen: false))
             {
             }
 
@@ -736,7 +730,7 @@ namespace Lucene.Net.Util
             /// Constructs a <see cref="ByteSequencesReader"/> from the 
provided <see cref="FileStream"/>. </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="stream"/> is <c>null</c>.</exception>
             public ByteSequencesReader(FileStream stream, bool leaveOpen)
-                : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen))
+                : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM, 
leaveOpen))
             {
             }
 
@@ -762,7 +756,7 @@ namespace Lucene.Net.Util
             /// Constructs a <see cref="ByteSequencesReader"/> from the 
provided <see cref="BinaryReader"/>.
             /// <para/>
             /// <b>NOTE:</b> To match Lucene, pass the <paramref 
name="reader"/>'s constructor the
-            /// <see cref="DEFAULT_ENCODING"/>, which is UTF-8 without a byte 
order mark.
+            /// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8 
without a byte order mark.
             /// </summary>
             /// <exception cref="ArgumentNullException"><paramref 
name="reader"/> is <c>null</c>.</exception>
             public ByteSequencesReader(BinaryReader reader)
@@ -848,4 +842,4 @@ namespace Lucene.Net.Util
         /// Returns the comparer in use to sort entries </summary>
         public IComparer<BytesRef> Comparer => comparer;
     }
-}
\ No newline at end of file
+}

(lucenenet) branch master updated: BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 (#1075)

Reply via email to