Sokolov Yura писал 2017-05-15 18:23:
Alvaro Herrera писал 2017-05-15 18:04:
Please add these two patches to the upcoming commitfest,
https://commitfest.postgresql.org/
Thank you for suggestion.
I've created https://commitfest.postgresql.org/14/1138/
As I could understand, I should attach both patches to single email
to be show correctly in commitfest topic. So I do it with this email.
Please, correct me, if I do something wrong.
With regards.
Looks like it should be single file.
--
Sokolov Yura aka funny.falcon
Postgres Professional: https://postgrespro.ru
The Russian Postgres Company
From 231023298aad3024e31b487c8f5d2a6e68ad1da9 Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.fal...@postgrespro.ru>
Date: Mon, 15 May 2017 14:23:39 +0300
Subject: [PATCH 1/2] Improve compactify_tuples
Items passed to compactify_tuples are almost sorted. So call to general
qsort makes unnecessary overhead on function calls (swapfunc, med,
itemoffcompare).
This patch implements bucket sort:
- one pass of stable prefix sort on high 8 bits of offset
- and insertion sort for buckets larger than 1 element
Also for smaller arrays shell sort is used.
Insertion and Shell sort are implemented using macroses.
This approach allows to save 3% of cpu in degenerate case
(highly intensive HOT random updates on unlogged table with
synchronized_commit=off), and speeds up WAL replaying (as were
found by Heikki Linnakangas).
Same approach were implemented by Heikki Linnakangas some time ago with
several distinctions.
---
src/backend/storage/page/bufpage.c | 78 ++++++++++++++++++++++++++++++++++++--
src/include/c.h | 59 ++++++++++++++++++++++++++++
2 files changed, 134 insertions(+), 3 deletions(-)
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index fdf045a45b..c5630e808b 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -434,6 +434,77 @@ itemoffcompare(const void *itemidp1, const void *itemidp2)
}
/*
+ * Sort an array of itemIdSort's on itemoff, descending.
+ *
+ * It uses Shell sort. Given array is small and itemoffcompare is inlined,
+ * it is much faster than call to qsort.
+ */
+static void
+sort_itemIds_small(itemIdSort itemidbase, int nitems)
+{
+ pg_shell_sort(itemIdSortData, itemidbase, nitems, itemoffcompare);
+}
+
+/*
+ * Sort an array of itemIdSort's on itemoff, descending.
+ *
+ * It uses bucket sort:
+ * - single pass of stable prefix sort on high 8 bits
+ * - and insertion sort on buckets larger than 1 element
+ */
+static void
+sort_itemIds(itemIdSort itemidbase, int nitems)
+{
+ itemIdSortData copy[Max(MaxIndexTuplesPerPage, MaxHeapTuplesPerPage)];
+#define NSPLIT 256
+#define PREFDIV (BLCKSZ / NSPLIT)
+ /* two extra elements to emulate offset on previous step */
+ uint16 count[NSPLIT + 2] = {0};
+ int i,
+ pos,
+ highbits;
+
+ Assert(nitems <= MaxIndexTuplesPerPage);
+ for (i = 0; i < nitems; i++)
+ {
+ highbits = itemidbase[i].itemoff / PREFDIV;
+ count[highbits]++;
+ }
+ /* sort in decreasing order */
+ for (i = NSPLIT - 1; i != 0; i--)
+ count[i - 1] += count[i];
+
+ /*
+ * count[k+1] is start of bucket k, count[k] is end of bucket k, and
+ * count[k] - count[k+1] is length of bucket k.
+ */
+ Assert(count[0] == nitems);
+ for (i = 0; i < nitems; i++)
+ {
+ highbits = itemidbase[i].itemoff / PREFDIV;
+ pos = count[highbits + 1];
+ count[highbits + 1]++;
+ copy[pos] = itemidbase[i];
+ }
+ Assert(count[1] == nitems);
+
+ /*
+ * count[k+2] is start of bucket k, count[k+1] is end of bucket k, and
+ * count[k+1]-count[k+2] is length of bucket k.
+ */
+ for (i = NSPLIT; i > 0; i--)
+ {
+ if (likely(count[i] - count[i + 1] <= 1))
+ continue;
+ pg_insertion_sort(itemIdSortData,
+ copy + count[i + 1],
+ count[i] - count[i + 1],
+ itemoffcompare);
+ }
+ memcpy(itemidbase, copy, sizeof(itemIdSortData) * nitems);
+}
+
+/*
* After removing or marking some line pointers unused, move the tuples to
* remove the gaps caused by the removed items.
*/
@@ -444,9 +515,10 @@ compactify_tuples(itemIdSort itemidbase, int nitems, Page page)
Offset upper;
int i;
- /* sort itemIdSortData array into decreasing itemoff order */
- qsort((char *) itemidbase, nitems, sizeof(itemIdSortData),
- itemoffcompare);
+ if (nitems > 48)
+ sort_itemIds(itemidbase, nitems);
+ else
+ sort_itemIds_small(itemidbase, nitems);
upper = phdr->pd_special;
for (i = 0; i < nitems; i++)
diff --git a/src/include/c.h b/src/include/c.h
index fba07c651f..837940d5cf 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -962,6 +962,65 @@ typedef NameData *Name;
#define unlikely(x) ((x) != 0)
#endif
+/*
+ * pg_shell_sort - sort for small arrays with inlinable comparator.
+ * Since it is implemented as a macros it could be optimized together with
+ * comparison function.
+ * Gaps are "gap(i) = smallest prime number below e^i". They are close to
+ * Incerpi & Sedwick gaps, but looks to be better in average.
+ * It is best used with array smaller than 200 elements, and could be safely
+ * used upto 1000 elements. But it degrades fast after that, and it is
+ * better to use qsort.
+ *
+ * Arguments:
+ * elem_t - type of array element (for declaring temporary variables)
+ * array - pointer to elements to be sorted
+ * nitems - amount of elements to be sorted
+ * cmp - comparison function that accepts address of elements
+ * (it is compatible with qsort comparison function).
+ * Note: `cmp` argument evaluates many times.
+ * Other arguments evaluates once
+ */
+#define pg_shell_sort_pass(elem_t, cmp, off) \
+ do { \
+ int _i, _j; \
+ elem_t _temp; \
+ for (_i = off; _i < _n; _i += off) \
+ { \
+ if ((cmp)(_arr+_i, _arr+_i-off) >= 0) \
+ continue; \
+ _temp = _arr[_i]; \
+ _j = _i; \
+ do { \
+ _arr[_j] = _arr[_j - off]; \
+ _j -= off; \
+ } while (_j >= off && (cmp)(_arr+_j-off, &_temp) > 0); \
+ _arr[_j] = _temp; \
+ } \
+ } while (0)
+#define pg_shell_sort(elem_t, array, nitems, cmp) \
+ do { \
+ int _n = (nitems); \
+ elem_t* _arr = (array); \
+ static const int _offsets[] = {401, 139, 53, 19, 7, 3}; \
+ int _noff, _off; \
+ for (_noff = 0; _noff < lengthof(_offsets); _noff++) { \
+ _off = _offsets[_noff]; \
+ pg_shell_sort_pass(elem_t, (cmp), _off); \
+ } \
+ pg_shell_sort_pass(elem_t, (cmp), 1); \
+ } while(0)
+/*
+ * pg_insertion_sort - just insertion sort for smallest array, or if
+ * array was almost sorted already. Uses same arguments as pg_shell_sort.
+ */
+#define pg_insertion_sort(elem_t, array, nitems, cmp) \
+ do { \
+ int _n = (nitems); \
+ elem_t* _arr = (array); \
+ pg_shell_sort_pass(elem_t, (cmp), 1); \
+ } while (0)
+
/* ----------------------------------------------------------------
* Section 8: random stuff
--
2.11.0
From 201f4b4ac07eebe05a4deb5ade42cd41ec74b8e8 Mon Sep 17 00:00:00 2001
From: Sokolov Yura <funny.fal...@postgrespro.ru>
Date: Mon, 15 May 2017 16:04:14 +0300
Subject: [PATCH 2/2] Simplify PageRepairFragmentation
In assumption that page usually doesn't become empty, merge second loop
body (collecting items with storage) into first (counting kinds of
items).
---
src/backend/storage/page/bufpage.c | 46 +++++++++++++++-----------------------
1 file changed, 18 insertions(+), 28 deletions(-)
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index c5630e808b..61738f241f 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -555,10 +555,11 @@ PageRepairFragmentation(Page page)
Offset pd_special = ((PageHeader) page)->pd_special;
ItemId lp;
int nline,
- nstorage,
nunused;
int i;
Size totallen;
+ itemIdSortData itemidbase[MaxHeapTuplesPerPage];
+ itemIdSort itemidptr = itemidbase;
/*
* It's worth the trouble to be more paranoid here than in most places,
@@ -578,14 +579,26 @@ PageRepairFragmentation(Page page)
pd_lower, pd_upper, pd_special)));
nline = PageGetMaxOffsetNumber(page);
- nunused = nstorage = 0;
+ nunused = totallen = 0;
for (i = FirstOffsetNumber; i <= nline; i++)
{
lp = PageGetItemId(page, i);
if (ItemIdIsUsed(lp))
{
if (ItemIdHasStorage(lp))
- nstorage++;
+ {
+ itemidptr->offsetindex = i - 1;
+ itemidptr->itemoff = ItemIdGetOffset(lp);
+ if (unlikely(itemidptr->itemoff < (int) pd_upper ||
+ itemidptr->itemoff >= (int) pd_special))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("corrupted item pointer: %u",
+ itemidptr->itemoff)));
+ itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
+ totallen += itemidptr->alignedlen;
+ itemidptr++;
+ }
}
else
{
@@ -595,7 +608,7 @@ PageRepairFragmentation(Page page)
}
}
- if (nstorage == 0)
+ if (itemidptr == itemidbase)
{
/* Page is completely empty, so just reset it quickly */
((PageHeader) page)->pd_upper = pd_special;
@@ -603,36 +616,13 @@ PageRepairFragmentation(Page page)
else
{
/* Need to compact the page the hard way */
- itemIdSortData itemidbase[MaxHeapTuplesPerPage];
- itemIdSort itemidptr = itemidbase;
-
- totallen = 0;
- for (i = 0; i < nline; i++)
- {
- lp = PageGetItemId(page, i + 1);
- if (ItemIdHasStorage(lp))
- {
- itemidptr->offsetindex = i;
- itemidptr->itemoff = ItemIdGetOffset(lp);
- if (itemidptr->itemoff < (int) pd_upper ||
- itemidptr->itemoff >= (int) pd_special)
- ereport(ERROR,
- (errcode(ERRCODE_DATA_CORRUPTED),
- errmsg("corrupted item pointer: %u",
- itemidptr->itemoff)));
- itemidptr->alignedlen = MAXALIGN(ItemIdGetLength(lp));
- totallen += itemidptr->alignedlen;
- itemidptr++;
- }
- }
-
if (totallen > (Size) (pd_special - pd_lower))
ereport(ERROR,
(errcode(ERRCODE_DATA_CORRUPTED),
errmsg("corrupted item lengths: total %u, available space %u",
(unsigned int) totallen, pd_special - pd_lower)));
- compactify_tuples(itemidbase, nstorage, page);
+ compactify_tuples(itemidbase, (int) (itemidptr - itemidbase), page);
}
/* Set hint bit for PageAddItem */
--
2.11.0
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers