This is an automated email from the ASF dual-hosted git repository. nightowl888 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 278fabe02b3806ad4cd4dda27657b0bc4b6d31a1 Author: Shad Storhaug <[email protected]> AuthorDate: Sat Aug 29 23:21:30 2020 +0700 Lucene.Net.Util.MergedIterator<T>: Converted to MergedEnumerator<T> (see #279) --- .../VectorHighlight/FieldPhraseList.cs | 14 +- src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs | 4 +- src/Lucene.Net.Tests/Util/TestMergedIterator.cs | 175 ++++++++++++++++++++- src/Lucene.Net/Index/CoalescedUpdates.cs | 2 +- src/Lucene.Net/Index/MultiFields.cs | 2 +- src/Lucene.Net/Util/MergedIterator.cs | 169 +++++++++++++++++++- 6 files changed, 348 insertions(+), 18 deletions(-) diff --git a/src/Lucene.Net.Highlighter/VectorHighlight/FieldPhraseList.cs b/src/Lucene.Net.Highlighter/VectorHighlight/FieldPhraseList.cs index 7ae39ae..7973e11 100644 --- a/src/Lucene.Net.Highlighter/VectorHighlight/FieldPhraseList.cs +++ b/src/Lucene.Net.Highlighter/VectorHighlight/FieldPhraseList.cs @@ -153,7 +153,7 @@ namespace Lucene.Net.Search.VectorHighlight { allInfos[index++] = fplToMerge.phraseList.GetEnumerator(); } - using (MergedIterator<WeightedPhraseInfo> itr = new MergedIterator<WeightedPhraseInfo>(false, allInfos)) + using (MergedEnumerator<WeightedPhraseInfo> itr = new MergedEnumerator<WeightedPhraseInfo>(false, allInfos)) { // Step 2. Walk the sorted list merging infos that overlap phraseList = new List<WeightedPhraseInfo>(); @@ -202,10 +202,7 @@ namespace Lucene.Net.Search.VectorHighlight } finally { - foreach (var allInfo in allInfos) - { - allInfo.Dispose(); - } + IOUtils.Dispose(allInfos); } } @@ -336,7 +333,7 @@ namespace Lucene.Net.Search.VectorHighlight } // Step 2. Walk the sorted list merging overlaps - using (MergedIterator<Toffs> itr = new MergedIterator<Toffs>(false, allToffs)) + using (MergedEnumerator<Toffs> itr = new MergedEnumerator<Toffs>(false, allToffs)) { termsOffsets = new List<Toffs>(); if (!itr.MoveNext()) @@ -363,10 +360,7 @@ namespace Lucene.Net.Search.VectorHighlight } finally { - foreach (var allToff in allToffs) - { - allToff.Dispose(); - } + IOUtils.Dispose(allToffs); } } diff --git a/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs index 770c8ab..c8427f8 100644 --- a/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs +++ b/src/Lucene.Net.Tests/Index/TestPrefixCodedTerms.cs @@ -89,7 +89,7 @@ namespace Lucene.Net.Index b2.Add(t2); PrefixCodedTerms pb2 = b2.Finish(); - IEnumerator<Term> merged = new MergedIterator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); + IEnumerator<Term> merged = new MergedEnumerator<Term>(pb1.GetEnumerator(), pb2.GetEnumerator()); Assert.IsTrue(merged.MoveNext()); Assert.AreEqual(t1, merged.Current); Assert.IsTrue(merged.MoveNext()); @@ -128,7 +128,7 @@ namespace Lucene.Net.Index } IEnumerator<Term> expected = superSet.GetEnumerator(); - IEnumerator<Term> actual = new MergedIterator<Term>(subs.ToArray()); + IEnumerator<Term> actual = new MergedEnumerator<Term>(subs.ToArray()); while (actual.MoveNext()) { Assert.IsTrue(expected.MoveNext()); diff --git a/src/Lucene.Net.Tests/Util/TestMergedIterator.cs b/src/Lucene.Net.Tests/Util/TestMergedIterator.cs index 1e38f8f..c65e99d 100644 --- a/src/Lucene.Net.Tests/Util/TestMergedIterator.cs +++ b/src/Lucene.Net.Tests/Util/TestMergedIterator.cs @@ -31,10 +31,10 @@ namespace Lucene.Net.Util [Test] public virtual void TestMergeEmpty() { - IEnumerator<int> merged = new MergedIterator<int>(); + IEnumerator<int> merged = new MergedEnumerator<int>(); Assert.IsFalse(merged.MoveNext()); - merged = new MergedIterator<int>((new List<int>()).GetEnumerator()); + merged = new MergedEnumerator<int>((new List<int>()).GetEnumerator()); Assert.IsFalse(merged.MoveNext()); IEnumerator<int>[] itrs = new IEnumerator<int>[Random.Next(100)]; @@ -42,7 +42,7 @@ namespace Lucene.Net.Util { itrs[i] = (new List<int>()).GetEnumerator(); } - merged = new MergedIterator<int>(itrs); + merged = new MergedEnumerator<int>(itrs); Assert.IsFalse(merged.MoveNext()); } @@ -161,6 +161,175 @@ namespace Lucene.Net.Util { itrs[i] = lists[i].GetEnumerator(); } + try + { + MergedEnumerator<int> mergedItr = new MergedEnumerator<int>(removeDups, itrs); + IEnumerator<int?> expectedItr = expected.GetEnumerator(); + while (expectedItr.MoveNext()) + { + Assert.IsTrue(mergedItr.MoveNext()); + Assert.AreEqual(expectedItr.Current, mergedItr.Current); + } + Assert.IsFalse(mergedItr.MoveNext()); + } + finally + { + IOUtils.Dispose(itrs); + } + } + + + + + + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestMergeEmptyIterator() + { + IEnumerator<int> merged = new MergedIterator<int>(); + Assert.IsFalse(merged.MoveNext()); + + merged = new MergedIterator<int>((new List<int>()).GetEnumerator()); + Assert.IsFalse(merged.MoveNext()); + + IEnumerator<int>[] itrs = new IEnumerator<int>[Random.Next(100)]; + for (int i = 0; i < itrs.Length; i++) + { + itrs[i] = (new List<int>()).GetEnumerator(); + } + merged = new MergedIterator<int>(itrs); + Assert.IsFalse(merged.MoveNext()); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestNoDupsRemoveDupsIterator() + { + TestCaseIterator(1, 1, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOffItrDupsRemoveDupsIterator() + { + TestCaseIterator(3, 1, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOnItrDupsRemoveDupsIterator() + { + TestCaseIterator(1, 3, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOnItrRandomDupsRemoveDupsIterator() + { + TestCaseIterator(1, -3, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestBothDupsRemoveDupsIterator() + { + TestCaseIterator(3, 3, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestBothDupsWithRandomDupsRemoveDupsIterator() + { + TestCaseIterator(3, -3, true); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestNoDupsKeepDupsIterator() + { + TestCaseIterator(1, 1, false); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOffItrDupsKeepDupsIterator() + { + TestCaseIterator(3, 1, false); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOnItrDupsKeepDupsIterator() + { + TestCaseIterator(1, 3, false); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestOnItrRandomDupsKeepDupsIterator() + { + TestCaseIterator(1, -3, false); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestBothDupsKeepDupsIterator() + { + TestCaseIterator(3, 3, false); + } + + [Test] + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + public virtual void TestBothDupsWithRandomDupsKeepDupsIterator() + { + TestCaseIterator(3, -3, false); + } + + [Obsolete("This method will be removed in 4.8.0 release candidate.")] + private void TestCaseIterator(int itrsWithVal, int specifiedValsOnItr, bool removeDups) + { + // Build a random number of lists + IList<int?> expected = new List<int?>(); + Random random = new Random(Random.Next()); + int numLists = itrsWithVal + random.Next(1000 - itrsWithVal); + IList<int>[] lists = new IList<int>[numLists]; + for (int i = 0; i < numLists; i++) + { + lists[i] = new List<int>(); + } + int start = random.Next(1000000); + int end = start + VALS_TO_MERGE / itrsWithVal / Math.Abs(specifiedValsOnItr); + for (int i = start; i < end; i++) + { + int maxList = lists.Length; + int maxValsOnItr = 0; + int sumValsOnItr = 0; + for (int itrWithVal = 0; itrWithVal < itrsWithVal; itrWithVal++) + { + int list = random.Next(maxList); + int valsOnItr = specifiedValsOnItr < 0 ? (1 + random.Next(-specifiedValsOnItr)) : specifiedValsOnItr; + maxValsOnItr = Math.Max(maxValsOnItr, valsOnItr); + sumValsOnItr += valsOnItr; + for (int valOnItr = 0; valOnItr < valsOnItr; valOnItr++) + { + lists[list].Add(i); + } + maxList = maxList - 1; + ArrayUtil.Swap(lists, list, maxList); + } + int maxCount = removeDups ? maxValsOnItr : sumValsOnItr; + for (int count = 0; count < maxCount; count++) + { + expected.Add(i); + } + } + // Now check that they get merged cleanly + IEnumerator<int>[] itrs = new IEnumerator<int>[numLists]; + for (int i = 0; i < numLists; i++) + { + itrs[i] = lists[i].GetEnumerator(); + } MergedIterator<int> mergedItr = new MergedIterator<int>(removeDups, itrs); IEnumerator<int?> expectedItr = expected.GetEnumerator(); diff --git a/src/Lucene.Net/Index/CoalescedUpdates.cs b/src/Lucene.Net/Index/CoalescedUpdates.cs index 9833969..8e536db 100644 --- a/src/Lucene.Net/Index/CoalescedUpdates.cs +++ b/src/Lucene.Net/Index/CoalescedUpdates.cs @@ -87,7 +87,7 @@ namespace Lucene.Net.Index { subs[i] = outerInstance.iterables[i].GetEnumerator(); } - return new MergedIterator<Term>(subs); + return new MergedEnumerator<Term>(subs); } IEnumerator IEnumerable.GetEnumerator() diff --git a/src/Lucene.Net/Index/MultiFields.cs b/src/Lucene.Net/Index/MultiFields.cs index 0825d73..11ff79c 100644 --- a/src/Lucene.Net/Index/MultiFields.cs +++ b/src/Lucene.Net/Index/MultiFields.cs @@ -249,7 +249,7 @@ namespace Lucene.Net.Index { subIterators[i] = subs[i].GetEnumerator(); } - return new MergedIterator<string>(subIterators); + return new MergedEnumerator<string>(subIterators); } public override Terms GetTerms(string field) diff --git a/src/Lucene.Net/Util/MergedIterator.cs b/src/Lucene.Net/Util/MergedIterator.cs index f5530bc..0ef0e42 100644 --- a/src/Lucene.Net/Util/MergedIterator.cs +++ b/src/Lucene.Net/Util/MergedIterator.cs @@ -44,11 +44,178 @@ namespace Lucene.Net.Util /// </list> /// <para/> /// The caller is responsible for disposing the <see cref="IEnumerator{T}"/> instances that are passed + /// into the constructor, <see cref="MergedEnumerator{T}"/> doesn't do it automatically. + /// <para/> + /// @lucene.internal + /// </summary> + public sealed class MergedEnumerator<T> : IEnumerator<T> + where T : IComparable<T> + { + private readonly TermMergeQueue<T> queue; + private readonly SubEnumerator<T>[] top; + private readonly bool removeDuplicates; + private int numTop; + private T current; + + public MergedEnumerator(params IEnumerator<T>[] enumerators) : this(true, enumerators) { } + + public MergedEnumerator(bool removeDuplicates, params IEnumerator<T>[] enumerators) : this(removeDuplicates, (IList<IEnumerator<T>>)enumerators) { } + + public MergedEnumerator(IList<IEnumerator<T>> enumerators) : this(true, enumerators) { } // LUCENENET specific - added overload for better compatibity + + public MergedEnumerator(bool removeDuplicates, IList<IEnumerator<T>> enumerators) // LUCENENET specific - added overload for better compatibity + { + if (enumerators is null) + throw new ArgumentNullException(nameof(enumerators)); // LUCENENET specific - added guard clause + + this.removeDuplicates = removeDuplicates; + queue = new TermMergeQueue<T>(enumerators.Count); + top = new SubEnumerator<T>[enumerators.Count]; + int index = 0; + foreach (IEnumerator<T> iter in enumerators) + { + // If hasNext + if (iter.MoveNext()) + { + queue.Add(new SubEnumerator<T> + { + Current = iter.Current, + Enumerator = iter, + Index = index++ + }); + } + } + } + + public bool MoveNext() + { + PushTop(); + + if (queue.Count > 0) + { + PullTop(); + } + else + { + return false; + } + + return true; + } + + public T Current => current; + + object System.Collections.IEnumerator.Current => Current; + + public void Reset() + { + throw new NotSupportedException(); + } + + public void Dispose() + { + } + + private void PullTop() + { + if (Debugging.AssertsEnabled) Debugging.Assert(numTop == 0); + top[numTop++] = queue.Pop(); + if (removeDuplicates) + { + //extract all subs from the queue that have the same top element + while (queue.Count != 0 && queue.Top.Current.Equals(top[0].Current)) + { + top[numTop++] = queue.Pop(); + } + } + current = top[0].Current; + } + + private void PushTop() + { + for (int i = 0; i < numTop; ++i) + { + if (top[i].Enumerator.MoveNext()) + { + top[i].Current = top[i].Enumerator.Current; + queue.Add(top[i]); + } + else + { + top[i].Current = default(T); + } + } + numTop = 0; + } + + private class SubEnumerator<I> + where I : IComparable<I> + { + internal IEnumerator<I> Enumerator { get; set; } + internal I Current { get; set; } + internal int Index { get; set; } + } + + private class TermMergeQueue<C> : PriorityQueue<SubEnumerator<C>> + where C : IComparable<C> + { + internal TermMergeQueue(int size) + : base(size) + { + } + + protected internal override bool LessThan(SubEnumerator<C> a, SubEnumerator<C> b) + { + int cmp; + // LUCNENENET specific: For strings, we need to ensure we compare them ordinal + if (typeof(C).Equals(typeof(string))) + { + cmp = (a.Current as string).CompareToOrdinal(b.Current as string); + } + else + { + cmp = a.Current.CompareTo(b.Current); + } + if (cmp != 0) + { + return cmp < 0; + } + else + { + return a.Index < b.Index; + } + } + } + } + + /// <summary> + /// Provides a merged sorted view from several sorted iterators. + /// <para/> + /// If built with <see cref="removeDuplicates"/> set to <c>true</c> and an element + /// appears in multiple iterators then it is deduplicated, that is this iterator + /// returns the sorted union of elements. + /// <para/> + /// If built with <see cref="removeDuplicates"/> set to <c>false</c> then all elements + /// in all iterators are returned. + /// <para/> + /// Caveats: + /// <list type="bullet"> + /// <item><description>The behavior is undefined if the iterators are not actually sorted.</description></item> + /// <item><description>Null elements are unsupported.</description></item> + /// <item><description>If <see cref="removeDuplicates"/> is set to <c>true</c> and if a single iterator contains + /// duplicates then they will not be deduplicated.</description></item> + /// <item><description>When elements are deduplicated it is not defined which one is returned.</description></item> + /// <item><description>If <see cref="removeDuplicates"/> is set to <c>false</c> then the order in which duplicates + /// are returned isn't defined.</description></item> + /// </list> + /// <para/> + /// The caller is responsible for disposing the <see cref="IEnumerator{T}"/> instances that are passed /// into the constructor, <see cref="MergedIterator{T}"/> doesn't do it automatically. /// <para/> /// @lucene.internal /// </summary> - public sealed class MergedIterator<T> : IEnumerator<T> // LUCENENET TODO: API - rename MergedEnumerator + [Obsolete("Use MergedEnumerator instead. This class will be removed in 4.8.0 release candidate."), System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] + public sealed class MergedIterator<T> : IEnumerator<T> where T : IComparable<T> { private readonly TermMergeQueue<T> queue;
