The attached patch replaces the existing siftup method for heapify with
a siftdown method. Tested with random integers it does 18% fewer
compares and takes 10% less time for the heapify, over the work_mem
range 1024 to 1048576.
Both algorithms appear to be O(n) (contradicting Wikipedia's claim
that a siftup heapify is O(n log n)).
--
Cheers,
Jeremy
diff --git a/src/backend/utils/sort/tuplesort.c
b/src/backend/utils/sort/tuplesort.c
index 8b520c1..2ea7b2d 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -1211,7 +1211,8 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
(void) grow_memtuples(state);
Assert(state->memtupcount < state->memtupsize);
}
- state->memtuples[state->memtupcount++] = *tuple;
+ state->memtuples[state->memtupcount] = *tuple;
+ state->memtuples[state->memtupcount++].tupindex = 0;
/*
* Check if it's time to switch over to a bounded
heapsort. We do
@@ -1884,23 +1885,47 @@ inittapes(Tuplesortstate *state)
state->tp_tapenum = (int *) palloc0(maxTapes * sizeof(int));
/*
- * Convert the unsorted contents of memtuples[] into a heap. Each tuple
is
- * marked as belonging to run number zero.
- *
- * NOTE: we pass false for checkIndex since there's no point in
comparing
- * indexes in this step, even though we do intend the indexes to be part
- * of the sort key...
+ * Convert the unsorted contents of memtuples[] into a heap. Each tuple
was
+ * marked as belonging to run number zero on input; we don't compare
that.
*/
ntuples = state->memtupcount;
- state->memtupcount = 0; /* make the heap empty */
- for (j = 0; j < ntuples; j++)
{
- /* Must copy source tuple to avoid possible overwrite */
- SortTuple stup = state->memtuples[j];
+ SortTuple * m = state->memtuples;
- tuplesort_heap_insert(state, &stup, 0, false);
+ for (j = (ntuples-2)/2; /* comb the array from the last heap
parent */
+ j >= 0; /* to the start */
+ j--)
+ {
+ int root = j;
+ int child, swap = 0;
+ SortTuple ptup = m[root];
+
+ while ((child = root*2 + 1) <= ntuples-1)
+ { /* root has at least one child. Check
left-child */
+ if (COMPARETUP(state, &ptup, &m[child]) < 0)
+ {
/* [root] < [lchild] */
+ if ( ++child <= ntuples-1
/* check right-child */
+ && COMPARETUP(state, &ptup,
&m[child]) > 0)
+ swap = child;
+ else
+ break;
/* [root] smallest */
+ }
+ else
+ {
+ swap = child;
+ if ( ++child <= ntuples-1
/* check right-child */
+ && COMPARETUP(state, &m[swap],
&m[child]) > 0)
+ swap = child;
+ }
+ /* [swap] is smallest of three; move into the
parent slot */
+ m[root] = m[swap];
+ root = swap; /* and repeat
with the child subtree */
+ }
+ if (swap)
+ m[swap] = ptup;
+ /* This and all heap nodes after are now
well-positioned */
+ }
}
- Assert(state->memtupcount == ntuples);
state->currentRun = 0;
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers