jpountz commented on a change in pull request #966: LUCENE-9024 Optimize IntroSelector to use median of medians URL: https://github.com/apache/lucene-solr/pull/966#discussion_r338920942
########## File path: lucene/core/src/java/org/apache/lucene/util/IntroSelector.java ########## @@ -33,26 +32,87 @@ public final void select(int from, int to, int k) { quickSelect(from, to, k, maxDepth); } - // heap sort - // TODO: use median of median instead to have linear worst-case rather than - // n*log(n) - void slowSelect(int from, int to, int k) { - new Sorter() { + int slowSelect(int from, int to, int k) { + return medianOfMediansSelect(from, to-1, k); + } + + int medianOfMediansSelect(int from, int to, int k) { + int pivotIndex; + do { + if (from == to) { + return from; + } + pivotIndex = pivot(from, to); + setPivot(pivotIndex); + pivotIndex = partition(from, to, k, pivotIndex); + setPivot(pivotIndex); + if (k == pivotIndex) { + return k; + } else if (k < pivotIndex) { + to = pivotIndex-1; + } else { + from = pivotIndex+1; + } + } while (from != to); + setPivot(pivotIndex); + return pivotIndex; + } - @Override - protected void swap(int i, int j) { - IntroSelector.this.swap(i, j); + private int partition(int left, int right, int n, int pivotIndex) { + swap(pivotIndex, right); + int storeIndex = left; + for (int i = left; i < right; i++) { + if (comparePivot(i) > 0) { + swap(storeIndex, i); + storeIndex++; + } + } + int storeIndexEq = storeIndex; + for (int i = storeIndex; i < right; i++) { + if (comparePivot(i) == 0) { + swap(storeIndexEq, i); + storeIndexEq++; } + } + swap(right, storeIndexEq); + if (n < storeIndex) { + return storeIndex; + } else if (n <= storeIndexEq) { + return n; + } + return storeIndexEq; + } + + private int pivot(int left, int right) { + if (right - left < 5) { + int pivotIndex = partition5(left, right); + return pivotIndex; + } - @Override - protected int compare(int i, int j) { - return IntroSelector.this.compare(i, j); + for (int i = left; i <= right; i=i+5) { + int subRight = i + 4; + if (subRight > right) { + subRight = right; } + int median5 = partition5(i, subRight); + swap(median5, left + ((i-left)/5)); + } + int mid = ((right - left) / 10) + left + 1; + int to = left + ((right - left)/5); + return medianOfMediansSelect(left, to, mid); + } - public void sort(int from, int to) { - heapSort(from, to); + private int partition5(int left, int right) { Review comment: Is this a bubble sort? If so it might make things clearer to say it in a comment, and maybe also be explicit that it doesn't hurt the runtime complexity since we only apply it to small arrays? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org For additional commands, e-mail: issues-h...@lucene.apache.org