BUG: Lucene.Net.Core.Util.ArrayUtil: Need to call string.CompareOrdinal() from the NaturalComparer in order for it to sort to match Lucene. This fixes the Lucene.Net.Tests.Util.TestBytesRefArray.TestSort() and Lucene.Net.Tests.Util.TestBytesRefHash.TestSort() tests.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/f87315bb Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/f87315bb Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/f87315bb Branch: refs/heads/api-work Commit: f87315bba4469e854f4cb0d90c3d0174752bfeb5 Parents: 2a99f9e Author: Shad Storhaug <[email protected]> Authored: Sun Mar 5 14:19:21 2017 +0700 Committer: Shad Storhaug <[email protected]> Committed: Sun Mar 5 17:08:50 2017 +0700 ---------------------------------------------------------------------- src/Lucene.Net.Core/Util/ArrayUtil.cs | 87 ++++++++++++++++---- src/Lucene.Net.Core/Util/CollectionUtil.cs | 4 +- src/Lucene.Net.Tests/Util/TestBytesRefArray.cs | 7 +- src/Lucene.Net.Tests/Util/TestBytesRefHash.cs | 5 +- .../Util/TestInPlaceMergeSorter.cs | 2 +- src/Lucene.Net.Tests/Util/TestIntroSorter.cs | 2 +- src/Lucene.Net.Tests/Util/TestTimSorter.cs | 2 +- 7 files changed, 85 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Core/Util/ArrayUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Util/ArrayUtil.cs b/src/Lucene.Net.Core/Util/ArrayUtil.cs index 2d9ba0c..39bcc95 100644 --- a/src/Lucene.Net.Core/Util/ArrayUtil.cs +++ b/src/Lucene.Net.Core/Util/ArrayUtil.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Diagnostics; +using System.Reflection; namespace Lucene.Net.Util { @@ -782,26 +783,80 @@ namespace Lucene.Net.Util return result; } - // LUCENENET specific - replaced NaturalComparer<T> with Comparer<T>.Default - //private class NaturalComparerImpl<T> : IComparer<T> where T : IComparable<T> - //{ - // internal NaturalComparerImpl() - // { - // } + // LUCENENET specific - we don't have an IComparable<T> constraint - + // the logic of GetNaturalComparer<T> handles that so we just + // do a cast here. + private class NaturalComparer<T> : IComparer<T> //where T : IComparable<T> + { + internal NaturalComparer() + { + } - // public virtual int Compare(T o1, T o2) - // { - // return o1.CompareTo(o2); - // } - //} + public virtual int Compare(T o1, T o2) + { + return ((IComparable<T>)o1).CompareTo(o2); + } + } /// <summary> - /// Get the natural <seealso cref="Comparer"/> for the provided object class. </summary> - public static IComparer<T> NaturalComparer<T>() + /// LUCENENET specific - we need custom handling for sorting strings the same + /// way they were sorted in Java. + /// </summary> + private class StringOrdinalComparer : IComparer<string> + { + internal StringOrdinalComparer() + { + } + + public virtual int Compare(string o1, string o2) + { + return string.CompareOrdinal(o1, o2); + } + } + + /// <summary> + /// A comparer that uses the <see cref="string.CompareOrdinal(string, string)"/> method to make + /// string comparisons, which provides a culture-insensitive sort operation (similar to Java's default + /// string comparision). + /// </summary> + public static readonly IComparer<string> STRING_ORDINAL_COMPARER = new StringOrdinalComparer(); + + /// <summary> + /// Get the natural <seealso cref="Comparer"/> for the provided object class. + /// <para/> + /// The comparer returned depends on the <typeparam name="T"/> argument: + /// <list type="number"> + /// <item>If the type is <see cref="string"/>, the comparer returned uses + /// the <see cref="string.CompareOrdinal(string, string)"/> to make the comparison + /// to ensure that the current culture doesn't affect the results. This is the + /// default string comparison used in Java, and what Lucene's design depends on.</item> + /// <item>If the type implements <see cref="IComparable{T}"/>, the comparer uses + /// <see cref="IComparable{T}.CompareTo(T)"/> for the comparison. This allows + /// the use of types with custom comparison schemes.</item> + /// <item>If neither of the above conditions are true, will default to <see cref="Comparer{T}.Default"/>.</item> + /// </list> + /// <para/> + /// NOTE: This was naturalComparer() in Lucene + /// </summary> + public static IComparer<T> GetNaturalComparer<T>() //where T : IComparable<T> // LUCENENET specific: removing constraint because in .NET, it is not needed { + // LUCENENET specific - we need to ensure that strings are compared + // in a culture-insenitive manner. + if (typeof(T).Equals(typeof(string))) + { + return (IComparer<T>)STRING_ORDINAL_COMPARER; + } + // LUCENENET specific - Only return the NaturalComparer if the type + // implements IComparable<T>, otherwise use Comparer<T>.Default. + // This allows the comparison to be customized, but it is not mandatory + // to implement IComparable<T>. + else if (typeof(IComparable<T>).GetTypeInfo().IsAssignableFrom(typeof(T))) + { + return new NaturalComparer<T>(); + } + return Comparer<T>.Default; - //return (IComparer<T>)new NaturalComparerImpl<T>(); } /// <summary> @@ -849,7 +904,7 @@ namespace Lucene.Net.Util { return; } - IntroSort(a, fromIndex, toIndex, ArrayUtil.NaturalComparer<T>()); + IntroSort(a, fromIndex, toIndex, ArrayUtil.GetNaturalComparer<T>()); } /// <summary> @@ -897,7 +952,7 @@ namespace Lucene.Net.Util { return; } - TimSort(a, fromIndex, toIndex, ArrayUtil.NaturalComparer<T>()); + TimSort(a, fromIndex, toIndex, ArrayUtil.GetNaturalComparer<T>()); } /// <summary> http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Core/Util/CollectionUtil.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Core/Util/CollectionUtil.cs b/src/Lucene.Net.Core/Util/CollectionUtil.cs index d952d54..fd030d5 100644 --- a/src/Lucene.Net.Core/Util/CollectionUtil.cs +++ b/src/Lucene.Net.Core/Util/CollectionUtil.cs @@ -166,7 +166,7 @@ namespace Lucene.Net.Util { return; } - IntroSort(list, ArrayUtil.NaturalComparer<T>()); + IntroSort(list, ArrayUtil.GetNaturalComparer<T>()); } // Tim sorts: @@ -202,7 +202,7 @@ namespace Lucene.Net.Util { return; } - TimSort(list, ArrayUtil.NaturalComparer<T>()); + TimSort(list, ArrayUtil.GetNaturalComparer<T>()); } } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Tests/Util/TestBytesRefArray.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Util/TestBytesRefArray.cs b/src/Lucene.Net.Tests/Util/TestBytesRefArray.cs index dde2b9e..d56719d 100644 --- a/src/Lucene.Net.Tests/Util/TestBytesRefArray.cs +++ b/src/Lucene.Net.Tests/Util/TestBytesRefArray.cs @@ -1,4 +1,4 @@ -using Lucene.Net.Randomized.Generators; +using Lucene.Net.Randomized.Generators; using NUnit.Framework; using System; using System.Collections.Generic; @@ -97,7 +97,10 @@ namespace Lucene.Net.Util stringList.Add(randomRealisticUnicodeString); } - stringList.Sort(); + // LUCENENET NOTE: Must sort using ArrayUtil.GetNaturalComparator<T>() + // to ensure culture isn't taken into consideration during the sort, + // which will match the sort order of BytesRef.UTF8SortedAsUTF16Comparer. + CollectionUtil.TimSort(stringList); #pragma warning disable 612, 618 IBytesRefIterator iter = list.GetIterator(BytesRef.UTF8SortedAsUTF16Comparer); #pragma warning restore 612, 618 http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Tests/Util/TestBytesRefHash.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Util/TestBytesRefHash.cs b/src/Lucene.Net.Tests/Util/TestBytesRefHash.cs index 696a011..4c9f5d8 100644 --- a/src/Lucene.Net.Tests/Util/TestBytesRefHash.cs +++ b/src/Lucene.Net.Tests/Util/TestBytesRefHash.cs @@ -200,7 +200,10 @@ namespace Lucene.Net.Util int num = AtLeast(2); for (int j = 0; j < num; j++) { - SortedSet<string> strings = new SortedSet<string>(); + // LUCENENET specific - to ensure sorting strings works the same in the SortedSet, + // we need to use the natural comparer from ArrayUtil, which compares strings the same + // way they are done in Java. + SortedSet<string> strings = new SortedSet<string>(ArrayUtil.GetNaturalComparer<string>()); for (int k = 0; k < 797; k++) { string str; http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Tests/Util/TestInPlaceMergeSorter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Util/TestInPlaceMergeSorter.cs b/src/Lucene.Net.Tests/Util/TestInPlaceMergeSorter.cs index 232aa94..9c78f4c 100644 --- a/src/Lucene.Net.Tests/Util/TestInPlaceMergeSorter.cs +++ b/src/Lucene.Net.Tests/Util/TestInPlaceMergeSorter.cs @@ -32,7 +32,7 @@ namespace Lucene.Net.Util public override Sorter NewSorter(Entry[] arr) { - return new ArrayInPlaceMergeSorter<Entry>(arr, ArrayUtil.NaturalComparer<Entry>()); + return new ArrayInPlaceMergeSorter<Entry>(arr, ArrayUtil.GetNaturalComparer<Entry>()); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Tests/Util/TestIntroSorter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Util/TestIntroSorter.cs b/src/Lucene.Net.Tests/Util/TestIntroSorter.cs index 2261560..5105b75 100644 --- a/src/Lucene.Net.Tests/Util/TestIntroSorter.cs +++ b/src/Lucene.Net.Tests/Util/TestIntroSorter.cs @@ -28,7 +28,7 @@ namespace Lucene.Net.Util public override Sorter NewSorter(Entry[] arr) { - return new ArrayIntroSorter<Entry>(arr, ArrayUtil.NaturalComparer<Entry>()); + return new ArrayIntroSorter<Entry>(arr, ArrayUtil.GetNaturalComparer<Entry>()); } http://git-wip-us.apache.org/repos/asf/lucenenet/blob/f87315bb/src/Lucene.Net.Tests/Util/TestTimSorter.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Util/TestTimSorter.cs b/src/Lucene.Net.Tests/Util/TestTimSorter.cs index 6646d28..150ad16 100644 --- a/src/Lucene.Net.Tests/Util/TestTimSorter.cs +++ b/src/Lucene.Net.Tests/Util/TestTimSorter.cs @@ -28,7 +28,7 @@ namespace Lucene.Net.Util public override Sorter NewSorter(Entry[] arr) { - return new ArrayTimSorter<Entry>(arr, ArrayUtil.NaturalComparer<Entry>(), TestUtil.NextInt(Random(), 0, arr.Length)); + return new ArrayTimSorter<Entry>(arr, ArrayUtil.GetNaturalComparer<Entry>(), TestUtil.NextInt(Random(), 0, arr.Length)); }
