This is an automated email from the ASF dual-hosted git repository.
paulirwin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
The following commit(s) were added to refs/heads/master by this push:
new 12b8941fe BREAKING: Use BOM-less UTF-8 encoding for writes, #1027
(#1075)
12b8941fe is described below
commit 12b8941fe32539070f4b4cc9b8e0780b6f307e52
Author: Paul Irwin <[email protected]>
AuthorDate: Sun Jan 5 20:37:54 2025 -0700
BREAKING: Use BOM-less UTF-8 encoding for writes, #1027 (#1075)
* SWEEP: Use BOM-less UTF-8 encoding for writes, #1027
* Remove OfflineSorter.DEFAULT_ENCODING field and replace with
IOUtils.CHARSET_UTF_8
* Rename IOUtils.CHARSET_UTF_8 to ENCODING_UTF_8_NO_BOM
---
.../ByTask/Tasks/WriteEnwikiLineDocTask.cs | 7 +++--
.../ByTask/Tasks/WriteLineDocTask.cs | 9 +++---
.../Quality/Trec/QueryDriver.cs | 2 +-
src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs | 5 ++--
src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs | 3 +-
src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs | 22 +++++++--------
src/Lucene.Net.TestFramework/Util/TestUtil.cs | 4 +--
.../Analysis/Hunspell/Test64kAffixes.cs | 33 +++++++++++-----------
.../Analysis/Util/TestFilesystemResourceLoader.cs | 5 ++--
.../ByTask/Feeds/DocMakerTest.cs | 3 +-
.../ByTask/Feeds/LineDocSourceTest.cs | 13 +++++----
.../ByTask/Utils/StreamUtilsTest.cs | 19 +++++++------
src/Lucene.Net.Tests.Demo/TestDemo.cs | 6 ++--
.../Taxonomy/TestTaxonomyFacetCounts.cs | 2 +-
src/Lucene.Net.Tests/Index/TestCheckIndex.cs | 3 +-
.../Index/TestDocInverterPerFieldErrorInfo.cs | 5 ++--
.../Index/TestIndexWriterDelete.cs | 4 +--
src/Lucene.Net.Tests/Index/TestPayloads.cs | 6 ++--
src/Lucene.Net.Tests/Search/Spans/TestBasics.cs | 22 +++++++--------
src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs | 3 +-
src/Lucene.Net.Tests/Util/TestOfflineSorter.cs | 4 +--
.../Compressing/CompressingStoredFieldsReader.cs | 2 +-
.../Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs | 2 +-
.../Codecs/Lucene40/Lucene40StoredFieldsReader.cs | 2 +-
src/Lucene.Net/Index/Term.cs | 4 ++-
src/Lucene.Net/Support/StandardCharsets.cs | 6 +++-
src/Lucene.Net/Util/IOUtils.cs | 11 ++++++--
src/Lucene.Net/Util/OfflineSorter.cs | 28 ++++++++----------
28 files changed, 126 insertions(+), 109 deletions(-)
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
index 7e61359b0..a12280f21 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteEnwikiLineDocTask.cs
@@ -2,6 +2,7 @@
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.Documents;
using Lucene.Net.Index;
+using Lucene.Net.Support;
using System;
using System.IO;
using System.Text;
@@ -26,9 +27,9 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
*/
/// <summary>
- /// A <see cref="WriteLineDocTask"/> which for Wikipedia input, will write
category pages
+ /// A <see cref="WriteLineDocTask"/> which for Wikipedia input, will write
category pages
/// to another file, while remaining pages will be written to the original
file.
- /// The categories file is derived from the original file, by adding a
prefix "categories-".
+ /// The categories file is derived from the original file, by adding a
prefix "categories-".
/// </summary>
public class WriteEnwikiLineDocTask : WriteLineDocTask
{
@@ -38,7 +39,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
: base(runData)
{
Stream @out = StreamUtils.GetOutputStream(CategoriesLineFile(new
FileInfo(m_fname)));
- categoryLineFileOut = new StreamWriter(@out, Encoding.UTF8);
+ categoryLineFileOut = new StreamWriter(@out,
StandardCharsets.UTF_8);
WriteHeader(categoryLineFileOut);
}
diff --git a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
index d724cfc51..ddb872797 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Tasks/WriteLineDocTask.cs
@@ -3,6 +3,7 @@ using Lucene.Net.Benchmarks.ByTask.Feeds;
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.Documents;
using Lucene.Net.Index;
+using Lucene.Net.Support;
using Lucene.Net.Support.Threading;
using Lucene.Net.Util;
using System;
@@ -49,8 +50,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
/// <item><term>line.file.out</term><description>the name of the file
to write the output to. That parameter is mandatory. <b>NOTE:</b> the file is
re-created.</description></item>
/// <item><term>line.fields</term><description>which fields should be
written in each line. (optional, default: <see
cref="DEFAULT_FIELDS"/>).</description></item>
/// <item><term>sufficient.fields</term><description>
- /// list of field names, separated by comma, which,
- /// if all of them are missing, the document will be skipped. For
example, to require
+ /// list of field names, separated by comma, which,
+ /// if all of them are missing, the document will be skipped. For
example, to require
/// that at least one of f1,f2 is not empty, specify: "f1,f2" in
this field. To specify
/// that no field is required, i.e. that even empty docs should be
emitted, specify <b>","</b>
/// (optional, default: <see cref="DEFAULT_SUFFICIENT_FIELDS"/>).
@@ -112,10 +113,10 @@ namespace Lucene.Net.Benchmarks.ByTask.Tasks
throw new ArgumentException("line.file.out must be set");
}
Stream @out = StreamUtils.GetOutputStream(new FileInfo(m_fname));
- m_lineFileOut = new StreamWriter(@out, Encoding.UTF8);
+ m_lineFileOut = new StreamWriter(@out, StandardCharsets.UTF_8);
docMaker = runData.DocMaker;
- // init fields
+ // init fields
string f2r = config.Get("line.fields", null);
if (f2r is null)
{
diff --git a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
index b75eb55b5..d08dab75a 100644
--- a/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
+++ b/src/Lucene.Net.Benchmark/Quality/Trec/QueryDriver.cs
@@ -71,7 +71,7 @@ namespace Lucene.Net.Benchmarks.Quality.Trec
FileInfo topicsFile = new FileInfo(args[0]);
FileInfo qrelsFile = new FileInfo(args[1]);
- SubmissionReport submitLog = new SubmissionReport(new
StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write),
Encoding.UTF8 /* huh, no nio.Charset ctor? */), "lucene");
+ SubmissionReport submitLog = new SubmissionReport(new
StreamWriter(new FileStream(args[2], FileMode.Create, FileAccess.Write),
IOUtils.ENCODING_UTF_8_NO_BOM /* huh, no nio.Charset ctor? */), "lucene");
using Store.FSDirectory dir = Store.FSDirectory.Open(new
DirectoryInfo(args[3]));
using IndexReader reader = DirectoryReader.Open(dir);
string fieldSpec = args.Length == 5 ? args[4] : "T"; // default to
Title-only if not specified.
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
index be1f79aa2..8d0440326 100644
--- a/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractReuters.cs
@@ -1,4 +1,5 @@
-using System;
+using Lucene.Net.Support;
+using System;
using System.IO;
using System.Text;
using System.Text.RegularExpressions;
@@ -118,7 +119,7 @@ namespace Lucene.Net.Benchmarks.Utils
string outFile =
System.IO.Path.Combine(outputDir.FullName, sgmFile.Name + "-"
+ (docNumber++) + ".txt");
// System.out.println("Writing " + outFile);
- StreamWriter writer = new StreamWriter(new
FileStream(outFile, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+ StreamWriter writer = new StreamWriter(new
FileStream(outFile, FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8);
writer.Write(@out);
writer.Dispose();
outBuffer.Length = 0;
diff --git a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
index 5504248a3..7a50f3fb5 100644
--- a/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
+++ b/src/Lucene.Net.Benchmark/Utils/ExtractWikipedia.cs
@@ -1,6 +1,7 @@
using Lucene.Net.Benchmarks.ByTask.Feeds;
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.Documents;
+using Lucene.Net.Support;
using System;
using System.Collections.Generic;
using System.Globalization;
@@ -88,7 +89,7 @@ namespace Lucene.Net.Benchmarks.Utils
try
{
- using TextWriter writer = new StreamWriter(new
FileStream(f.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+ using TextWriter writer = new StreamWriter(new
FileStream(f.FullName, FileMode.Create, FileAccess.Write),
StandardCharsets.UTF_8);
writer.Write(contents.ToString());
}
catch (Exception ioe) when (ioe.IsIOException())
diff --git a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
index e173c6f72..cc4917321 100644
--- a/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
+++ b/src/Lucene.Net.TestFramework/Util/Fst/FSTTester.cs
@@ -318,16 +318,16 @@ namespace Lucene.Net.Util.Fst
bool willRewrite = random.NextBoolean();
- Builder<T> builder = new Builder<T>(inputMode == 0 ?
FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
- prune1, prune2,
- prune1 == 0 && prune2 == 0,
- allowRandomSuffixSharing ?
random.NextBoolean() : true,
- allowRandomSuffixSharing ?
TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
- outputs,
- null,
- willRewrite,
- PackedInt32s.DEFAULT,
- true,
+ Builder<T> builder = new Builder<T>(inputMode == 0 ?
FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
+ prune1, prune2,
+ prune1 == 0 && prune2 == 0,
+ allowRandomSuffixSharing ?
random.NextBoolean() : true,
+ allowRandomSuffixSharing ?
TestUtil.NextInt32(random, 1, 10) : int.MaxValue,
+ outputs,
+ null,
+ willRewrite,
+ PackedInt32s.DEFAULT,
+ true,
15);
if (LuceneTestCase.Verbose)
{
@@ -386,7 +386,7 @@ namespace Lucene.Net.Util.Fst
if (LuceneTestCase.Verbose && pairs.Count <= 20 && fst != null)
{
- using (TextWriter w = new StreamWriter(new
FileStream("out.dot", FileMode.OpenOrCreate), Encoding.UTF8))
+ using (TextWriter w = new StreamWriter(new
FileStream("out.dot", FileMode.OpenOrCreate), StandardCharsets.UTF_8))
{
Util.ToDot(fst, w, false, false);
}
diff --git a/src/Lucene.Net.TestFramework/Util/TestUtil.cs
b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
index d270d069e..ad25bf418 100644
--- a/src/Lucene.Net.TestFramework/Util/TestUtil.cs
+++ b/src/Lucene.Net.TestFramework/Util/TestUtil.cs
@@ -167,7 +167,7 @@ namespace Lucene.Net.Util
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
checker.CrossCheckTermVectors = crossCheckTermVectors;
- checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+ checker.InfoStream = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM);
CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
if (indexStatus is null || indexStatus.Clean == false)
{
@@ -203,7 +203,7 @@ namespace Lucene.Net.Util
{
// LUCENENET: dispose the StreamWriter and ByteArrayOutputStream
when done
using ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
- using StreamWriter infoStream = new StreamWriter(bos,
Encoding.UTF8, leaveOpen: true, bufferSize: 1024);
+ using StreamWriter infoStream = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM, leaveOpen: true, bufferSize: 1024);
reader.CheckIntegrity();
CheckIndex.Status.FieldNormStatus fieldNormStatus =
Index.CheckIndex.TestFieldNorms(reader, infoStream);
diff --git
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
index e210ee9fd..ec4824a7c 100644
--- a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
+++ b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Hunspell/Test64kAffixes.cs
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.10.4
using J2N;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System.Collections.Generic;
@@ -35,25 +36,25 @@ namespace Lucene.Net.Analysis.Hunspell
FileInfo affix = new
FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.aff"));
FileInfo dict = new
FileInfo(System.IO.Path.Combine(tempDir.FullName, "64kaffixes.dic"));
- using var affixWriter = new StreamWriter(
- new FileStream(affix.FullName, FileMode.OpenOrCreate),
Encoding.UTF8);
-
- // 65k affixes with flag 1, then an affix with flag 2
- affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n");
- for (int i = 0; i < 65536; i++)
+ using (var affixWriter = new StreamWriter(
+ new FileStream(affix.FullName, FileMode.OpenOrCreate),
StandardCharsets.UTF_8))
{
- affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n");
- }
- affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n");
- affixWriter.Dispose();
-
- using var dictWriter = new StreamWriter(
- new FileStream(dict.FullName, FileMode.OpenOrCreate),
Encoding.UTF8);
+ // 65k affixes with flag 1, then an affix with flag 2
+ affixWriter.Write("SET UTF-8\nFLAG num\nSFX 1 Y 65536\n");
+ for (int i = 0; i < 65536; i++)
+ {
+ affixWriter.Write("SFX 1 0 " + i.ToHexString() + " .\n");
+ }
+ affixWriter.Write("SFX 2 Y 1\nSFX 2 0 s\n");
+ } // affixWriter.Dispose();
- // drink signed with affix 2 (takes -s)
- dictWriter.Write("1\ndrink/2\n");
- dictWriter.Dispose();
+ using (var dictWriter = new StreamWriter(
+ new FileStream(dict.FullName, FileMode.OpenOrCreate),
StandardCharsets.UTF_8))
+ {
+ // drink signed with affix 2 (takes -s)
+ dictWriter.Write("1\ndrink/2\n");
+ } // dictWriter.Dispose();
using Stream affStream = new FileStream(affix.FullName,
FileMode.OpenOrCreate, FileAccess.ReadWrite);
using Stream dictStream = new FileStream(dict.FullName,
FileMode.OpenOrCreate, FileAccess.ReadWrite);
diff --git
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
index 06db2c28e..d8166892c 100644
---
a/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
+++
b/src/Lucene.Net.Tests.Analysis.Common/Analysis/Util/TestFilesystemResourceLoader.cs
@@ -1,5 +1,6 @@
// Lucene version compatibility level 4.8.1
using J2N;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
@@ -78,7 +79,7 @@ namespace Lucene.Net.Analysis.Util
DirectoryInfo @base = CreateTempDir("fsResourceLoaderBase");
try
{
- TextWriter os = new StreamWriter(new
FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"),
FileMode.Create, FileAccess.Write), Encoding.UTF8);
+ TextWriter os = new StreamWriter(new
FileStream(System.IO.Path.Combine(@base.FullName, "template.txt"),
FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8);
try
{
os.Write("foobar\n");
@@ -120,4 +121,4 @@ namespace Lucene.Net.Analysis.Util
assertEquals("foobar",
WordlistLoader.GetLines(rl.OpenResource("template.txt"),
Encoding.UTF8).First());
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
index d8acb4f66..acd70715b 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/DocMakerTest.cs
@@ -5,6 +5,7 @@ using Lucene.Net.Documents;
using Lucene.Net.Index;
using Lucene.Net.Search;
using Lucene.Net.Support;
+using Lucene.Net.Util;
using NUnit.Framework;
using System.Collections.Generic;
using System.IO;
@@ -170,7 +171,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
// DocMaker did not close its ContentSource if resetInputs was
called twice,
// leading to a file handle leak.
FileInfo f = new FileInfo(Path.Combine(getWorkDir().FullName,
"docMakerLeak.txt"));
- TextWriter ps = new StreamWriter(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write), Encoding.UTF8);
+ TextWriter ps = new StreamWriter(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write), IOUtils.ENCODING_UTF_8_NO_BOM);
ps.WriteLine("one title\t" + (J2N.Time.NanoTime() /
J2N.Time.MillisecondsPerNanosecond) + "\tsome content"); // LUCENENET: Use
NanoTime() rather than CurrentTimeMilliseconds() for more accurate/reliable
results
ps.Dispose();
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
index c9b0cad90..ad909a49c 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Feeds/LineDocSourceTest.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Tasks;
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.Index;
using Lucene.Net.Search;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
@@ -41,7 +42,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
{
Stream @out = new FileStream(file.FullName, FileMode.Create,
FileAccess.Write);
@out = new BZip2OutputStream(@out); //
csFactory.createCompressorOutputStream("bzip2", @out);
- TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+ TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
writeDocsToFile(writer, addHeader, null);
writer.Dispose();
}
@@ -59,7 +60,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
writer.Write(DocMaker.BODY_FIELD);
if (otherFields != null)
{
- // additional field names in the header
+ // additional field names in the header
foreach (Object fn in otherFields.Keys)
{
writer.Write(WriteLineDocTask.SEP);
@@ -72,7 +73,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
doc.append("title").append(WriteLineDocTask.SEP).append("date").append(WriteLineDocTask.SEP).append(DocMaker.BODY_FIELD);
if (otherFields != null)
{
- // additional field values in the doc line
+ // additional field values in the doc line
foreach (Object fv in otherFields.Values)
{
doc.append(WriteLineDocTask.SEP).append(fv.toString());
@@ -85,7 +86,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
private void createRegularLineFile(FileInfo file, bool addHeader)
{
Stream @out = new FileStream(file.FullName, FileMode.Create,
FileAccess.Write);
- TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+ TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
writeDocsToFile(writer, addHeader, null);
writer.Dispose();
}
@@ -93,7 +94,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
private void createRegularLineFileWithMoreFields(FileInfo file, params
String[] extraFields)
{
Stream @out = new FileStream(file.FullName, FileMode.Create,
FileAccess.Write);
- TextWriter writer = new StreamWriter(@out, Encoding.UTF8);
+ TextWriter writer = new StreamWriter(@out, StandardCharsets.UTF_8);
Dictionary<string, string> p = new Dictionary<string, string>();
foreach (String f in extraFields)
{
@@ -231,7 +232,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
for (int i = 0; i < testCases.Length; i++)
{
FileInfo file = new
FileInfo(Path.Combine(getWorkDir().FullName, "one-line"));
- TextWriter writer = new StreamWriter(new
FileStream(file.FullName, FileMode.Create, FileAccess.Write), Encoding.UTF8);
+ TextWriter writer = new StreamWriter(new
FileStream(file.FullName, FileMode.Create, FileAccess.Write),
StandardCharsets.UTF_8);
writer.Write(testCases[i]);
writer.WriteLine();
writer.Dispose();
diff --git a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
index eb2aaec78..d8e175f74 100644
--- a/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
+++ b/src/Lucene.Net.Tests.Benchmark/ByTask/Utils/StreamUtilsTest.cs
@@ -1,4 +1,5 @@
using ICSharpCode.SharpZipLib.BZip2;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using NUnit.Framework;
using System;
@@ -27,7 +28,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
public class StreamUtilsTest : BenchmarkTestCase
{
- private static readonly String TEXT = "Some-Text...";
+ private static readonly string TEXT = "Some-Text...";
private DirectoryInfo testDir;
[Test]
@@ -82,15 +83,15 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
assertReadText(autoOutFile("TEXT"));
}
- private FileInfo rawTextFile(String ext)
+ private FileInfo rawTextFile(string ext)
{
FileInfo f = new FileInfo(Path.Combine(testDir.FullName,
"testfile." + ext));
- using (TextWriter w = new StreamWriter(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write), Encoding.UTF8))
+ using (TextWriter w = new StreamWriter(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write), StandardCharsets.UTF_8))
w.WriteLine(TEXT);
return f;
}
- private FileInfo rawGzipFile(String ext)
+ private FileInfo rawGzipFile(string ext)
{
FileInfo f = new FileInfo(Path.Combine(testDir.FullName,
"testfile." + ext));
using (Stream os = new GZipStream(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write), CompressionMode.Compress)) //new
CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.GZIP,
new FileOutputStream(f));
@@ -98,7 +99,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
return f;
}
- private FileInfo rawBzip2File(String ext)
+ private FileInfo rawBzip2File(string ext)
{
FileInfo f = new FileInfo(Path.Combine(testDir.FullName,
"testfile." + ext));
Stream os = new BZip2OutputStream(new FileStream(f.FullName,
FileMode.Create, FileAccess.Write)); // new
CompressorStreamFactory().createCompressorOutputStream(CompressorStreamFactory.BZIP2,
new FileOutputStream(f));
@@ -106,7 +107,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
return f;
}
- private FileInfo autoOutFile(String ext)
+ private FileInfo autoOutFile(string ext)
{
FileInfo f = new FileInfo(Path.Combine(testDir.FullName,
"testfile." + ext));
Stream os = StreamUtils.GetOutputStream(f);
@@ -116,7 +117,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
private void writeText(Stream os)
{
- TextWriter w = new StreamWriter(os, Encoding.UTF8);
+ TextWriter w = new StreamWriter(os, StandardCharsets.UTF_8);
w.WriteLine(TEXT);
w.Dispose();
}
@@ -124,8 +125,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Utils
private void assertReadText(FileInfo f)
{
Stream ir = StreamUtils.GetInputStream(f);
- TextReader r = new StreamReader(ir, Encoding.UTF8);
- String line = r.ReadLine();
+ TextReader r = new StreamReader(ir, StandardCharsets.UTF_8);
+ string line = r.ReadLine();
assertEquals("Wrong text found in " + f.Name, TEXT, line);
r.Dispose();
}
diff --git a/src/Lucene.Net.Tests.Demo/TestDemo.cs
b/src/Lucene.Net.Tests.Demo/TestDemo.cs
index 356405bc0..16b2379a5 100644
--- a/src/Lucene.Net.Tests.Demo/TestDemo.cs
+++ b/src/Lucene.Net.Tests.Demo/TestDemo.cs
@@ -37,7 +37,7 @@ namespace Lucene.Net.Demo
var fakeSystemOut = new StreamWriter(bytes,
Encoding.GetEncoding(0));
Console.SetOut(fakeSystemOut);
// LUCENENET specific: changed the arguments to act more like
the dotnet.exe commands.
- // * only optional arguments start with -
+ // * only optional arguments start with -
// * options have a long form that starts with --
// * required arguments must be supplied without - or -- and
in a specific order
// Since the demo is meant to be seen by end users, these
changes were necessary to make
@@ -80,13 +80,13 @@ namespace Lucene.Net.Demo
DirectoryInfo indexDir = CreateTempDir("DemoTest");
// LUCENENET specific: changed the arguments to act more like the
dotnet.exe commands.
- // * only optional arguments start with -
+ // * only optional arguments start with -
// * options have a long form that starts with --
// * required arguments must be supplied without - or -- and in a
specific order
// Since the demo is meant to be seen by end users, these changes
were necessary to make
// it consistent with the lucene-cli utility.
// NOTE: There is no -create in lucene, but it has the same effect
as if --update were left out
- IndexFiles.Main(new string[] { indexDir.FullName,
filesDir.FullName });
+ IndexFiles.Main(new string[] { indexDir.FullName,
filesDir.FullName });
//IndexFiles.Main(new string[] { "-create", "-docs",
filesDir.FullName, "-index", indexDir.FullName });
TestOneSearch(indexDir, "apache", 3);
TestOneSearch(indexDir, "patent", 8);
diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
index 206ef6f1b..02f8764f7 100644
--- a/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
+++ b/src/Lucene.Net.Tests.Facet/Taxonomy/TestTaxonomyFacetCounts.cs
@@ -134,7 +134,7 @@ namespace Lucene.Net.Facet.Taxonomy
string result;
using (ByteArrayOutputStream bos = new ByteArrayOutputStream())
{
- using (StreamWriter w = new StreamWriter(bos, Encoding.UTF8,
2048, true) { AutoFlush = true })
+ using (StreamWriter w = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM, 2048, true) { AutoFlush = true })
{
PrintTaxonomyStats.PrintStats(taxoReader, w, true);
}
diff --git a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
index bb1c3a7ad..443ea738c 100644
--- a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
+++ b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs
@@ -1,6 +1,7 @@
using Lucene.Net.Documents;
using Lucene.Net.Index.Extensions;
using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
using NUnit.Framework;
using System.Collections.Generic;
using System.IO;
@@ -63,7 +64,7 @@ namespace Lucene.Net.Index
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
CheckIndex checker = new CheckIndex(dir);
- checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+ checker.InfoStream = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM);
if (Verbose)
{
checker.InfoStream = Console.Out;
diff --git a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
index bca4cfb3a..130b175f9 100644
--- a/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
+++ b/src/Lucene.Net.Tests/Index/TestDocInverterPerFieldErrorInfo.cs
@@ -1,6 +1,7 @@
using Lucene.Net.Analysis;
using Lucene.Net.Documents;
using Lucene.Net.Support.IO;
+using Lucene.Net.Util;
using NUnit.Framework;
using System;
using System.IO;
@@ -90,7 +91,7 @@ namespace Lucene.Net.Index
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT,
new ThrowingAnalyzer());
ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
- StreamWriter infoPrintStream = new StreamWriter(infoBytes,
Encoding.UTF8);
+ StreamWriter infoPrintStream = new StreamWriter(infoBytes,
IOUtils.ENCODING_UTF_8_NO_BOM);
TextWriterInfoStream printStreamInfoStream = new
TextWriterInfoStream(infoPrintStream);
c.SetInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
@@ -119,7 +120,7 @@ namespace Lucene.Net.Index
IndexWriter writer;
IndexWriterConfig c = new IndexWriterConfig(TEST_VERSION_CURRENT,
new ThrowingAnalyzer());
ByteArrayOutputStream infoBytes = new ByteArrayOutputStream();
- StreamWriter infoPrintStream = new StreamWriter(infoBytes,
Encoding.UTF8);
+ StreamWriter infoPrintStream = new StreamWriter(infoBytes,
IOUtils.ENCODING_UTF_8_NO_BOM);
TextWriterInfoStream printStreamInfoStream = new
TextWriterInfoStream(infoPrintStream);
c.SetInfoStream(printStreamInfoStream);
writer = new IndexWriter(dir, c);
diff --git a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
index a64e9af0d..525750e98 100644
--- a/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
+++ b/src/Lucene.Net.Tests/Index/TestIndexWriterDelete.cs
@@ -1400,7 +1400,7 @@ namespace Lucene.Net.Index
ByteArrayOutputStream bos = new ByteArrayOutputStream(1024);
//MemoryStream bos = new MemoryStream(1024);
CheckIndex checker = new CheckIndex(dir);
- checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+ checker.InfoStream = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM);
CheckIndex.Status indexStatus = checker.DoCheckIndex(null);
Assert.IsTrue(indexStatus.Clean);
checker.FlushInfoStream();
@@ -1413,7 +1413,7 @@ namespace Lucene.Net.Index
w.Dispose();
bos = new ByteArrayOutputStream(1024);
- checker.InfoStream = new StreamWriter(bos, Encoding.UTF8);
+ checker.InfoStream = new StreamWriter(bos,
IOUtils.ENCODING_UTF_8_NO_BOM);
indexStatus = checker.DoCheckIndex(null);
Assert.IsTrue(indexStatus.Clean);
checker.FlushInfoStream();
diff --git a/src/Lucene.Net.Tests/Index/TestPayloads.cs
b/src/Lucene.Net.Tests/Index/TestPayloads.cs
index e35cc6aa5..1affe6ba5 100644
--- a/src/Lucene.Net.Tests/Index/TestPayloads.cs
+++ b/src/Lucene.Net.Tests/Index/TestPayloads.cs
@@ -84,7 +84,7 @@ namespace Lucene.Net.Index
// enabled in only some documents
d.Add(NewTextField("f3", "this field has payloads in some docs",
Field.Store.NO));
// only add payload data for field f2
- analyzer.SetPayloadData("f2",
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
+ analyzer.SetPayloadData("f2",
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
writer.AddDocument(d);
// flush
writer.Dispose();
@@ -106,8 +106,8 @@ namespace Lucene.Net.Index
d.Add(NewTextField("f2", "this field has payloads in all docs",
Field.Store.NO));
d.Add(NewTextField("f3", "this field has payloads in some docs",
Field.Store.NO));
// add payload data for field f2 and f3
- analyzer.SetPayloadData("f2",
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 1);
- analyzer.SetPayloadData("f3",
"somedata".GetBytes(IOUtils.CHARSET_UTF_8), 0, 3);
+ analyzer.SetPayloadData("f2",
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 1);
+ analyzer.SetPayloadData("f3",
"somedata".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM), 0, 3);
writer.AddDocument(d);
// force merge
diff --git a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
index d54678ae9..0e08ed67f 100644
--- a/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
+++ b/src/Lucene.Net.Tests/Search/Spans/TestBasics.cs
@@ -77,7 +77,7 @@ namespace Lucene.Net.Search.Spans
if (m_input.IncrementToken())
{
#pragma warning disable 612, 618
- payloadAttr.Payload = new BytesRef(("pos: " +
pos).GetBytes(IOUtils.CHARSET_UTF_8));
+ payloadAttr.Payload = new BytesRef(("pos: " +
pos).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
pos++;
return true;
@@ -533,7 +533,7 @@ namespace Lucene.Net.Search.Spans
{
SpanTermQuery term1 = new SpanTermQuery(new Term("field", "five"));
#pragma warning disable 612, 618
- BytesRef pay = new BytesRef(("pos: " +
5).GetBytes(IOUtils.CHARSET_UTF_8));
+ BytesRef pay = new BytesRef(("pos: " +
5).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
SpanQuery query = new SpanPayloadCheckQuery(term1, new
JCG.List<byte[]>() { pay.Bytes });
CheckHits(query, new int[] { 1125, 1135, 1145, 1155, 1165, 1175,
1185, 1195, 1225, 1235, 1245, 1255, 1265, 1275, 1285, 1295, 1325, 1335, 1345,
1355, 1365, 1375, 1385, 1395, 1425, 1435, 1445, 1455, 1465, 1475, 1485, 1495,
1525, 1535, 1545, 1555, 1565, 1575, 1585, 1595, 1625, 1635, 1645, 1655, 1665,
1675, 1685, 1695, 1725, 1735, 1745, 1755, 1765, 1775, 1785, 1795, 1825, 1835,
1845, 1855, 1865, 1875, 1885, 1895, 1925, 1935, 1945, 1955, 1965, 1975, 1985,
1995 });
@@ -549,8 +549,8 @@ namespace Lucene.Net.Search.Spans
clauses[1] = term2;
snq = new SpanNearQuery(clauses, 0, true);
#pragma warning disable 612, 618
- pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
- pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
+ pay = new BytesRef(("pos: " +
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ pay2 = new BytesRef(("pos: " +
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
list = new JCG.List<byte[]>();
list.Add(pay.Bytes);
@@ -563,9 +563,9 @@ namespace Lucene.Net.Search.Spans
clauses[2] = new SpanTermQuery(new Term("field", "five"));
snq = new SpanNearQuery(clauses, 0, true);
#pragma warning disable 612, 618
- pay = new BytesRef(("pos: " + 0).GetBytes(IOUtils.CHARSET_UTF_8));
- pay2 = new BytesRef(("pos: " + 1).GetBytes(IOUtils.CHARSET_UTF_8));
- BytesRef pay3 = new BytesRef(("pos: " +
2).GetBytes(IOUtils.CHARSET_UTF_8));
+ pay = new BytesRef(("pos: " +
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ pay2 = new BytesRef(("pos: " +
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ BytesRef pay3 = new BytesRef(("pos: " +
2).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
list = new JCG.List<byte[]>();
list.Add(pay.Bytes);
@@ -597,10 +597,10 @@ namespace Lucene.Net.Search.Spans
var payloads = new JCG.List<byte[]>();
#pragma warning disable 612, 618
- BytesRef pay = new BytesRef(("pos: " +
0).GetBytes(IOUtils.CHARSET_UTF_8));
- BytesRef pay2 = new BytesRef(("pos: " +
1).GetBytes(IOUtils.CHARSET_UTF_8));
- BytesRef pay3 = new BytesRef(("pos: " +
3).GetBytes(IOUtils.CHARSET_UTF_8));
- BytesRef pay4 = new BytesRef(("pos: " +
4).GetBytes(IOUtils.CHARSET_UTF_8));
+ BytesRef pay = new BytesRef(("pos: " +
0).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ BytesRef pay2 = new BytesRef(("pos: " +
1).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ BytesRef pay3 = new BytesRef(("pos: " +
3).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
+ BytesRef pay4 = new BytesRef(("pos: " +
4).GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM));
#pragma warning restore 612, 618
payloads.Add(pay.Bytes);
payloads.Add(pay2.Bytes);
diff --git a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
index f21e68457..e9de419c3 100644
--- a/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
+++ b/src/Lucene.Net.Tests/Util/Fst/TestFSTs.cs
@@ -2,6 +2,7 @@
using J2N.Threading.Atomic;
using Lucene.Net.Diagnostics;
using Lucene.Net.Index.Extensions;
+using Lucene.Net.Support;
using Lucene.Net.Util.Automaton;
using NUnit.Framework;
using RandomizedTesting.Generators;
@@ -589,7 +590,7 @@ namespace Lucene.Net.Util.Fst
Console.WriteLine(ord + " terms; " + fst.NodeCount + "
nodes; " + fst.ArcCount + " arcs; " + fst.ArcWithOutputCount + " arcs w/
output; tot size " + fst.GetSizeInBytes());
if (fst.NodeCount < 100)
{
- TextWriter w = new StreamWriter(new
FileStream("out.dot", FileMode.Create), Encoding.UTF8);
+ TextWriter w = new StreamWriter(new
FileStream("out.dot", FileMode.Create), StandardCharsets.UTF_8);
Util.ToDot(fst, w, false, false);
w.Dispose();
Console.WriteLine("Wrote FST to out.dot");
diff --git a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
index 3f9ae8929..19a3cc20d 100644
--- a/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
+++ b/src/Lucene.Net.Tests/Util/TestOfflineSorter.cs
@@ -86,7 +86,7 @@ namespace Lucene.Net.Util
public virtual void TestSingleLine()
{
#pragma warning disable 612, 618
- CheckSort(new OfflineSorter(), new byte[][] { "Single line
only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+ CheckSort(new OfflineSorter(), new byte[][] { "Single line
only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
#pragma warning restore 612, 618
}
@@ -95,7 +95,7 @@ namespace Lucene.Net.Util
public virtual void TestSingleLine_AsStream()
{
#pragma warning disable 612, 618
- CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line
only.".GetBytes(IOUtils.CHARSET_UTF_8) });
+ CheckSortAsStream(new OfflineSorter(), new byte[][] { "Single line
only.".GetBytes(IOUtils.ENCODING_UTF_8_NO_BOM) });
#pragma warning restore 612, 618
}
diff --git a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
index 14fd869e2..16b053f4d 100644
--- a/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Compressing/CompressingStoredFieldsReader.cs
@@ -205,7 +205,7 @@ namespace Lucene.Net.Codecs.Compressing
data = new byte[length];
@in.ReadBytes(data, 0, length);
#pragma warning disable 612, 618
- visitor.StringField(info,
IOUtils.CHARSET_UTF_8.GetString(data));
+ visitor.StringField(info,
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(data));
#pragma warning restore 612, 618
break;
diff --git a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
index 442a08e71..8615bb283 100644
--- a/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene3x/Lucene3xStoredFieldsReader.cs
@@ -334,7 +334,7 @@ namespace Lucene.Net.Codecs.Lucene3x
}
else
{
- visitor.StringField(info,
IOUtils.CHARSET_UTF_8.GetString(bytes));
+ visitor.StringField(info,
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
}
}
}
diff --git a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
index b6736feb2..d21f35871 100644
--- a/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
+++ b/src/Lucene.Net/Codecs/Lucene40/Lucene40StoredFieldsReader.cs
@@ -239,7 +239,7 @@ namespace Lucene.Net.Codecs.Lucene40
else
{
#pragma warning disable 612, 618
- visitor.StringField(info,
IOUtils.CHARSET_UTF_8.GetString(bytes));
+ visitor.StringField(info,
IOUtils.ENCODING_UTF_8_NO_BOM.GetString(bytes));
#pragma warning restore 612, 618
}
}
diff --git a/src/Lucene.Net/Index/Term.cs b/src/Lucene.Net/Index/Term.cs
index 6e245af2a..38eda37a3 100644
--- a/src/Lucene.Net/Index/Term.cs
+++ b/src/Lucene.Net/Index/Term.cs
@@ -1,4 +1,5 @@
using J2N.Text;
+using Lucene.Net.Support;
using System;
using System.Text;
@@ -90,7 +91,8 @@ namespace Lucene.Net.Index
public static string ToString(BytesRef termText)
{
// the term might not be text, but usually is. so we make a best
effort
- Encoding decoder = new UTF8Encoding(false, true);
+ // LUCENENET TODO: determine if we should use
DecoderFallback.ExceptionFallback here
+ Encoding decoder = StandardCharsets.UTF_8;
try
{
return decoder.GetString(termText.Bytes, termText.Offset,
termText.Length);
diff --git a/src/Lucene.Net/Support/StandardCharsets.cs
b/src/Lucene.Net/Support/StandardCharsets.cs
index 781f3b3ae..73ef0f909 100644
--- a/src/Lucene.Net/Support/StandardCharsets.cs
+++ b/src/Lucene.Net/Support/StandardCharsets.cs
@@ -22,5 +22,9 @@ namespace Lucene.Net.Support;
internal static class StandardCharsets
{
- public static readonly Encoding UTF_8 = IOUtils.CHARSET_UTF_8;
+ /// <inheritdoc cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>
+ /// <remarks>
+ /// This is a convenience reference to <see
cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>.
+ /// </remarks>
+ public static readonly Encoding UTF_8 = IOUtils.ENCODING_UTF_8_NO_BOM;
}
diff --git a/src/Lucene.Net/Util/IOUtils.cs b/src/Lucene.Net/Util/IOUtils.cs
index dc17cdbdd..c3141b00d 100644
--- a/src/Lucene.Net/Util/IOUtils.cs
+++ b/src/Lucene.Net/Util/IOUtils.cs
@@ -45,16 +45,21 @@ namespace Lucene.Net.Util
/// UTF-8 <see cref="Encoding"/> instance to prevent repeated
/// <see cref="Encoding.UTF8"/> lookups and match Java's behavior
/// with respect to a lack of a byte-order mark (BOM).
+ /// <para />
+ /// It is important to use this encoding over <see
cref="Encoding.UTF8"/>
+ /// particularly when writing data, to ensure that the BOM is not
written.
+ /// For reading data, either this or <see cref="Encoding.UTF8"/> can
be used,
+ /// as both will correctly interpret data with or without a BOM.
/// </summary>
- public static readonly Encoding CHARSET_UTF_8 = new UTF8Encoding(
+ public static readonly Encoding ENCODING_UTF_8_NO_BOM = new
UTF8Encoding(
encoderShouldEmitUTF8Identifier: false,
throwOnInvalidBytes: true);
/// <summary>
/// UTF-8 charset string.
- /// <para/>Where possible, use <see cref="Encoding.UTF8"/> instead,
+ /// <para/>Where possible, use <see cref="ENCODING_UTF_8_NO_BOM"/>
instead,
/// as using the <see cref="string"/> constant may slow things down.
</summary>
- /// <seealso cref="Encoding.UTF8"/>
+ /// <seealso cref="ENCODING_UTF_8_NO_BOM"/>
public static readonly string UTF_8 = "UTF-8";
/// <summary>
diff --git a/src/Lucene.Net/Util/OfflineSorter.cs
b/src/Lucene.Net/Util/OfflineSorter.cs
index c1114b18c..968368ca2 100644
--- a/src/Lucene.Net/Util/OfflineSorter.cs
+++ b/src/Lucene.Net/Util/OfflineSorter.cs
@@ -41,12 +41,6 @@ namespace Lucene.Net.Util
/// </summary>
public sealed class OfflineSorter
{
- /// <summary>
- /// The default encoding (UTF-8 without a byte order mark) used by
<see cref="ByteSequencesReader"/> and <see cref="ByteSequencesWriter"/>.
- /// This encoding should always be used when calling the constructor
overloads that accept <see cref="BinaryReader"/> or <see cref="BinaryWriter"/>.
- /// </summary>
- public static readonly Encoding DEFAULT_ENCODING = new
UTF8Encoding(encoderShouldEmitUTF8Identifier: false);
-
/// <summary>
/// The recommended buffer size to use on <see cref="Sort(FileStream,
FileStream)"/> or when creating a
/// <see cref="ByteSequencesReader"/> and <see
cref="ByteSequencesWriter"/>.
@@ -196,10 +190,10 @@ namespace Lucene.Net.Util
/// </summary>
public override string ToString()
{
- return string.Format(CultureInfo.InvariantCulture,
- "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00}
sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram
limit={7:0.00} MB",
- TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime /
1000.0d, MergeTime / 1000.0d,
- Lines, TempMergeFiles, MergeRounds,
+ return string.Format(CultureInfo.InvariantCulture,
+ "time={0:0.00} sec. total ({1:0.00} reading, {2:0.00}
sorting, {3:0.00} merging), lines={4}, temp files={5}, merges={6}, soft ram
limit={7:0.00} MB",
+ TotalTime / 1000.0d, ReadTime / 1000.0d, SortTime /
1000.0d, MergeTime / 1000.0d,
+ Lines, TempMergeFiles, MergeRounds,
(double)BufferSize / MB);
}
}
@@ -606,7 +600,7 @@ namespace Lucene.Net.Util
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided
<see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref
name="stream"/> is <c>null</c>.</exception>
public ByteSequencesWriter(FileStream stream)
- : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen:
false))
+ : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM,
leaveOpen: false))
{
}
@@ -614,7 +608,7 @@ namespace Lucene.Net.Util
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided
<see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref
name="stream"/> is <c>null</c>.</exception>
public ByteSequencesWriter(FileStream stream, bool leaveOpen)
- : this(new BinaryWriter(stream, DEFAULT_ENCODING, leaveOpen))
+ : this(new BinaryWriter(stream, IOUtils.ENCODING_UTF_8_NO_BOM,
leaveOpen))
{
}
@@ -638,7 +632,7 @@ namespace Lucene.Net.Util
/// <summary>
/// Constructs a <see cref="ByteSequencesWriter"/> to the provided
<see cref="BinaryWriter"/>.
/// <b>NOTE:</b> To match Lucene, pass the <paramref
name="writer"/>'s constructor the
- /// <see cref="DEFAULT_ENCODING"/>, which is UTF-8 without a byte
order mark.
+ /// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8
without a byte order mark.
/// </summary>
/// <exception cref="ArgumentNullException"><paramref
name="writer"/> is <c>null</c>.</exception>
public ByteSequencesWriter(BinaryWriter writer)
@@ -728,7 +722,7 @@ namespace Lucene.Net.Util
/// Constructs a <see cref="ByteSequencesReader"/> from the
provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref
name="stream"/> is <c>null</c>.</exception>
public ByteSequencesReader(FileStream stream)
- : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen:
false))
+ : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM,
leaveOpen: false))
{
}
@@ -736,7 +730,7 @@ namespace Lucene.Net.Util
/// Constructs a <see cref="ByteSequencesReader"/> from the
provided <see cref="FileStream"/>. </summary>
/// <exception cref="ArgumentNullException"><paramref
name="stream"/> is <c>null</c>.</exception>
public ByteSequencesReader(FileStream stream, bool leaveOpen)
- : this(new BinaryReader(stream, DEFAULT_ENCODING, leaveOpen))
+ : this(new BinaryReader(stream, IOUtils.ENCODING_UTF_8_NO_BOM,
leaveOpen))
{
}
@@ -762,7 +756,7 @@ namespace Lucene.Net.Util
/// Constructs a <see cref="ByteSequencesReader"/> from the
provided <see cref="BinaryReader"/>.
/// <para/>
/// <b>NOTE:</b> To match Lucene, pass the <paramref
name="reader"/>'s constructor the
- /// <see cref="DEFAULT_ENCODING"/>, which is UTF-8 without a byte
order mark.
+ /// <see cref="IOUtils.ENCODING_UTF_8_NO_BOM"/>, which is UTF-8
without a byte order mark.
/// </summary>
/// <exception cref="ArgumentNullException"><paramref
name="reader"/> is <c>null</c>.</exception>
public ByteSequencesReader(BinaryReader reader)
@@ -848,4 +842,4 @@ namespace Lucene.Net.Util
/// Returns the comparer in use to sort entries </summary>
public IComparer<BytesRef> Comparer => comparer;
}
-}
\ No newline at end of file
+}