bruno-roustant commented on a change in pull request #404:
URL: https://github.com/apache/lucene/pull/404#discussion_r739837745
##########
File path: lucene/core/src/java/org/apache/lucene/util/IntroSorter.java
##########
@@ -20,66 +20,120 @@
* {@link Sorter} implementation based on a variant of the quicksort algorithm
called <a
* href="http://en.wikipedia.org/wiki/Introsort">introsort</a>: when the
recursion level exceeds the
* log of the length of the array to sort, it falls back to heapsort. This
prevents quicksort from
- * running into its worst-case quadratic runtime. Small arrays are sorted with
insertion sort.
+ * running into its worst-case quadratic runtime. Small ranges are sorted with
insertion sort.
*
* @lucene.internal
*/
public abstract class IntroSorter extends Sorter {
+ /** Below this size threshold, the partition selection is simplified to a
single median. */
+ private static final int SINGLE_MEDIAN_THRESHOLD = 40;
+
/** Create a new {@link IntroSorter}. */
public IntroSorter() {}
@Override
public final void sort(int from, int to) {
checkRange(from, to);
- quicksort(from, to, 2 * MathUtil.log(to - from, 2));
+ sort(from, to, 2 * MathUtil.log(to - from, 2));
}
- void quicksort(int from, int to, int maxDepth) {
- if (to - from < BINARY_SORT_THRESHOLD) {
- binarySort(from, to);
- return;
- } else if (--maxDepth < 0) {
- heapSort(from, to);
- return;
- }
-
- final int mid = (from + to) >>> 1;
-
- if (compare(from, mid) > 0) {
- swap(from, mid);
- }
-
- if (compare(mid, to - 1) > 0) {
- swap(mid, to - 1);
- if (compare(from, mid) > 0) {
- swap(from, mid);
+ /**
+ * Sorts between from (inclusive) and to (exclusive) with intro sort.
+ *
+ * <p>Sorts small ranges with insertion sort. Fallbacks to heap sort to
avoid quadratic worst
+ * case. Selects the pivot with medians and partitions with the
Bentley-McIlroy fast 3-ways
+ * algorithm (Engineering a Sort Function, Bentley-McIlroy).
+ */
+ void sort(int from, int to, int maxDepth) {
+ int size;
+
+ // Sort small ranges with insertion sort.
+ while ((size = to - from) > INSERTION_SORT_THRESHOLD) {
+
+ if (--maxDepth < 0) {
+ // Max recursion depth reached: fallback to heap sort.
+ heapSort(from, to);
+ return;
}
- }
-
- int left = from + 1;
- int right = to - 2;
- setPivot(mid);
- for (; ; ) {
- while (comparePivot(right) < 0) {
- --right;
+ // Pivot selection based on medians.
+ int last = to - 1;
+ int mid = (from + last) >>> 1;
+ int range = size >> 3;
+ int pivot;
+ if (size <= SINGLE_MEDIAN_THRESHOLD) {
+ // Select the pivot with a single median around the middle element.
+ // Do not take the median between [from, mid, last] because it hurts
performance
+ // if the order is descending.
+ pivot = median(mid - range, mid, mid + range);
+ } else {
+ // Select the pivot with the median of medians.
+ int doubleRange = range << 1;
+ int medianFirst = median(from, from + range, from + doubleRange);
+ int medianMiddle = median(mid - range, mid, mid + range);
+ int medianLast = median(last - doubleRange, last - range, last);
+ pivot = median(medianFirst, medianMiddle, medianLast);
}
- while (left < right && comparePivot(left) >= 0) {
- ++left;
+ // Bentley-McIlroy 3-way partitioning.
+ setPivot(pivot);
+ swap(from, pivot);
+ int i = from;
+ int j = to;
+ int p = from + 1;
+ int q = last;
+ while (true) {
+ int leftCmp, rightCmp;
+ while ((leftCmp = comparePivot(++i)) > 0) {}
+ while ((rightCmp = comparePivot(--j)) < 0) {}
+ if (i >= j) {
+ if (i == j && rightCmp == 0) {
+ swap(i, p);
+ }
+ break;
+ }
+ swap(i, j);
+ if (rightCmp == 0) {
+ swap(i, p++);
+ }
+ if (leftCmp == 0) {
+ swap(j, q--);
+ }
+ }
+ i = j + 1;
+ for (int k = from; k < p; ) {
+ swap(k++, j--);
+ }
+ for (int k = last; k > q; ) {
+ swap(k--, i++);
}
- if (left < right) {
- swap(left, right);
- --right;
+ // Recursion on the smallest partition. Replace the tail recursion by a
loop.
Review comment:
No, maybe I'm not clear. I replaced the two recursive calls on both left
and right parts, by a recursive call on the smallest part and a loop on the
largest part.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]