This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 31bbe9f352bab16b8809476314c4a1eba0b1de8f Author: Shad Storhaug <[email protected]> AuthorDate: Wed Oct 26 18:39:28 2022 +0700 Lucene.Net.Util.TestUnicodeUtil::TestUTF8toUTF32(): Added additional tests for ICharSequence and char[] overloads, changed the original test to test string. --- src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs | 82 ++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs b/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs index fd1f8210f..27801bc99 100644 --- a/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs +++ b/src/Lucene.Net.Tests/Util/TestUnicodeUtil.cs @@ -1,4 +1,6 @@ using J2N; +using J2N.Text; +using Lucene.Net.Attributes; using NUnit.Framework; using System; using Assert = Lucene.Net.TestFramework.Assert; @@ -155,6 +157,86 @@ namespace Lucene.Net.Util [Test] public virtual void TestUTF8toUTF32() + { + BytesRef utf8 = new BytesRef(20); + Int32sRef utf32 = new Int32sRef(20); + int[] codePoints = new int[20]; + int num = AtLeast(50000); + for (int i = 0; i < num; i++) + { + string s = TestUtil.RandomUnicodeString(Random); + UnicodeUtil.UTF16toUTF8(s, 0, s.Length, utf8); + UnicodeUtil.UTF8toUTF32(utf8, utf32); + + int charUpto = 0; + int intUpto = 0; + + while (charUpto < s.Length) + { + int cp = Character.CodePointAt(s, charUpto); + codePoints[intUpto++] = cp; + charUpto += Character.CharCount(cp); + } + if (!ArrayUtil.Equals(codePoints, 0, utf32.Int32s, utf32.Offset, intUpto)) + { + Console.WriteLine("FAILED"); + for (int j = 0; j < s.Length; j++) + { + Console.WriteLine(" char[" + j + "]=" + ((int)s[j]).ToString("x")); + } + Console.WriteLine(); + Assert.AreEqual(intUpto, utf32.Length); + for (int j = 0; j < intUpto; j++) + { + Console.WriteLine(" " + utf32.Int32s[j].ToString("x") + " vs " + codePoints[j].ToString("x")); + } + Assert.Fail("mismatch"); + } + } + } + + [Test, LuceneNetSpecific] + public virtual void TestUTF8toUTF32_ICharSequence() + { + BytesRef utf8 = new BytesRef(20); + Int32sRef utf32 = new Int32sRef(20); + int[] codePoints = new int[20]; + int num = AtLeast(50000); + for (int i = 0; i < num; i++) + { + string s = TestUtil.RandomUnicodeString(Random); + UnicodeUtil.UTF16toUTF8(s.AsCharSequence(), 0, s.Length, utf8); + UnicodeUtil.UTF8toUTF32(utf8, utf32); + + int charUpto = 0; + int intUpto = 0; + + while (charUpto < s.Length) + { + int cp = Character.CodePointAt(s, charUpto); + codePoints[intUpto++] = cp; + charUpto += Character.CharCount(cp); + } + if (!ArrayUtil.Equals(codePoints, 0, utf32.Int32s, utf32.Offset, intUpto)) + { + Console.WriteLine("FAILED"); + for (int j = 0; j < s.Length; j++) + { + Console.WriteLine(" char[" + j + "]=" + ((int)s[j]).ToString("x")); + } + Console.WriteLine(); + Assert.AreEqual(intUpto, utf32.Length); + for (int j = 0; j < intUpto; j++) + { + Console.WriteLine(" " + utf32.Int32s[j].ToString("x") + " vs " + codePoints[j].ToString("x")); + } + Assert.Fail("mismatch"); + } + } + } + + [Test, LuceneNetSpecific] + public virtual void TestUTF8toUTF32_CharArray() { BytesRef utf8 = new BytesRef(20); Int32sRef utf32 = new Int32sRef(20);
