Good day, everyone.
I've been playing a bit with unlogged tables - just random updates on
simple
key-value table. I've noticed amount of cpu spent in a compactify_tuples
(called by PageRepareFragmentaion). Most of time were spent in qsort of
itemidbase items.
itemidbase array is bounded by number of tuples in a page, and
itemIdSortData
structure is simple, so specialized version could be a better choice.
Attached patch adds combination of one pass of prefix sort with
insertion
sort for larger array and shell sort for smaller array.
Insertion sort and shell sort are implemented as macros and could be
reused.
I've tested following table:
create unlogged table test3 (
id integer PRIMARY KEY with (fillfactor=85),
val text
) WITH (fillfactor=85);
insert into test3 select i, '!'||i from generate_series(1, 10000000)
as i;
With pgbench script:
\set id1 RANDOM(1, :scale)
\set id2 RANDOM(1, :scale)
select * from test3 where id = :id1;
update test3 set val = '!'|| :id2 where id = :id1;
And command:
pgbench -M prepared -c 3 -s 10000000 -T 1000 -P 3 -n -f test3.sql
testdb
Using 1GB shared_buffers and synchronous_commit=off.
On my notebook improvement is:
before patch:
progress: 63.0 s, 15880.1 tps, lat 0.189 ms stddev 0.127
progress: 66.0 s, 15975.8 tps, lat 0.188 ms stddev 0.122
progress: 69.0 s, 15904.1 tps, lat 0.189 ms stddev 0.152
progress: 72.0 s, 15000.9 tps, lat 0.200 ms stddev 0.213
progress: 75.0 s, 15101.7 tps, lat 0.199 ms stddev 0.192
progress: 78.0 s, 15854.2 tps, lat 0.189 ms stddev 0.158
progress: 81.0 s, 15803.3 tps, lat 0.190 ms stddev 0.158
progress: 84.0 s, 15242.9 tps, lat 0.197 ms stddev 0.203
progress: 87.0 s, 15184.1 tps, lat 0.198 ms stddev 0.215
after patch:
progress: 63.0 s, 17108.5 tps, lat 0.175 ms stddev 0.140
progress: 66.0 s, 17271.9 tps, lat 0.174 ms stddev 0.155
progress: 69.0 s, 17243.5 tps, lat 0.174 ms stddev 0.143
progress: 72.0 s, 16675.3 tps, lat 0.180 ms stddev 0.206
progress: 75.0 s, 17187.4 tps, lat 0.175 ms stddev 0.157
progress: 78.0 s, 17293.0 tps, lat 0.173 ms stddev 0.159
progress: 81.0 s, 16289.8 tps, lat 0.184 ms stddev 0.180
progress: 84.0 s, 16131.2 tps, lat 0.186 ms stddev 0.170
progress: 87.0 s, 16741.1 tps, lat 0.179 ms stddev 0.165
I understand that it is quite degenerate test case.
But probably this improvement still has sense.
With regards,
--
Sokolov Yura aka funny.falcon
Postgres Professional: https://postgrespro.ru
The Russian Postgres Company
From f8ed235dbb60a79b4f59dc8a4af014b2ca698772 Mon Sep 17 00:00:00 2001
From: Sokolov Yura aka funny_falcon <funny.fal...@gmail.com>
Date: Sun, 14 May 2017 20:57:00 +0300
Subject: [PATCH] storage/page/bufpage.c: improve compactify_tuples
Items passed to compactify_tuples are almost sorted. So call to general
qsort makes unnecessary overhead on function calls (swapfunc, med,
itemoffcompare).
This patch add one pass of prefix sort + insertion sort for large array
of items, and shell sort for smaller arrays.
Insertion and Shell sort are implemented using macroses.
This approach allows to save 3% of cpu in degenerate case
(highly intensive HOT random updates on unlogged table with
synchronized_commit=off).
---
src/backend/storage/page/bufpage.c | 42 +++++++++++++++++++++++++--
src/include/c.h | 59 ++++++++++++++++++++++++++++++++++++++
2 files changed, 98 insertions(+), 3 deletions(-)
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index fdf045a..b7c6392 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -433,6 +433,36 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
((itemIdSort) itemidp1)->itemoff;
}
+#define NSPLIT 256
+#define PREFDIV (BLCKSZ / NSPLIT)
+/* one pass of prefix sort for high 8 bits of itemoff.*/
+static void
+prefix_presort(itemIdSort itemidbase, int nitems)
+{
+ itemIdSortData copy[MaxIndexTuplesPerPage];
+ int count[NSPLIT+1] = { 0 };
+ int i, pos, highbits;
+
+ Assert(nitems <= MaxIndexTuplesPerPage);
+ for (i = 0; i < nitems; i++)
+ {
+ highbits = itemidbase[i].itemoff / PREFDIV;
+ count[highbits]++;
+ }
+ /* sort in decreasing order */
+ for (i = NSPLIT-1; i != 0; i--)
+ count[i-1] += count[i];
+ Assert(count[0] == nitems);
+ for (i = 0; i < nitems; i++)
+ {
+ highbits = itemidbase[i].itemoff / PREFDIV;
+ pos = count[highbits+1]++;
+ copy[pos] = itemidbase[i];
+ }
+ Assert(count[1] == nitems);
+ memcpy(itemidbase, copy, sizeof(itemIdSortData) * nitems);
+}
+
/*
* After removing or marking some line pointers unused, move the tuples to
* remove the gaps caused by the removed items.
@@ -444,9 +474,15 @@ compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
Offset upper;
int i;
- /* sort itemIdSortData array into decreasing itemoff order */
- qsort((char *) itemidbase, nitems, sizeof(itemIdSortData),
- itemoffcompare);
+ if (nitems > 48)
+ {
+ prefix_presort(itemidbase, nitems);
+ pg_insertion_sort(itemIdSortData, itemidbase, nitems, itemoffcompare);
+ }
+ else
+ {
+ pg_shell_sort(itemIdSortData, itemidbase, nitems, itemoffcompare);
+ }
upper = phdr->pd_special;
for (i = 0; i < nitems; i++)
diff --git a/src/include/c.h b/src/include/c.h
index fba07c6..837940d 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -962,6 +962,65 @@ typedef NameData *Name;
#define unlikely(x) ((x) != 0)
#endif
+/*
+ * pg_shell_sort - sort for small arrays with inlinable comparator.
+ * Since it is implemented as a macros it could be optimized together with
+ * comparison function.
+ * Gaps are "gap(i) = smallest prime number below e^i". They are close to
+ * Incerpi & Sedwick gaps, but looks to be better in average.
+ * It is best used with array smaller than 200 elements, and could be safely
+ * used upto 1000 elements. But it degrades fast after that, and it is
+ * better to use qsort.
+ *
+ * Arguments:
+ * elem_t - type of array element (for declaring temporary variables)
+ * array - pointer to elements to be sorted
+ * nitems - amount of elements to be sorted
+ * cmp - comparison function that accepts address of elements
+ * (it is compatible with qsort comparison function).
+ * Note: `cmp` argument evaluates many times.
+ * Other arguments evaluates once
+ */
+#define pg_shell_sort_pass(elem_t, cmp, off) \
+ do { \
+ int _i, _j; \
+ elem_t _temp; \
+ for (_i = off; _i < _n; _i += off) \
+ { \
+ if ((cmp)(_arr+_i, _arr+_i-off) >= 0) \
+ continue; \
+ _temp = _arr[_i]; \
+ _j = _i; \
+ do { \
+ _arr[_j] = _arr[_j - off]; \
+ _j -= off; \
+ } while (_j >= off && (cmp)(_arr+_j-off, &_temp) > 0); \
+ _arr[_j] = _temp; \
+ } \
+ } while (0)
+#define pg_shell_sort(elem_t, array, nitems, cmp) \
+ do { \
+ int _n = (nitems); \
+ elem_t* _arr = (array); \
+ static const int _offsets[] = {401, 139, 53, 19, 7, 3}; \
+ int _noff, _off; \
+ for (_noff = 0; _noff < lengthof(_offsets); _noff++) { \
+ _off = _offsets[_noff]; \
+ pg_shell_sort_pass(elem_t, (cmp), _off); \
+ } \
+ pg_shell_sort_pass(elem_t, (cmp), 1); \
+ } while(0)
+/*
+ * pg_insertion_sort - just insertion sort for smallest array, or if
+ * array was almost sorted already. Uses same arguments as pg_shell_sort.
+ */
+#define pg_insertion_sort(elem_t, array, nitems, cmp) \
+ do { \
+ int _n = (nitems); \
+ elem_t* _arr = (array); \
+ pg_shell_sort_pass(elem_t, (cmp), 1); \
+ } while (0)
+
/* ----------------------------------------------------------------
* Section 8: random stuff
--
2.9.3
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers