Hello.

Just an updated commit message.

Thanks,
Michail.
From 934d649a18c66f8b448463e57375865b33ce53e7 Mon Sep 17 00:00:00 2001
From: nkey <n...@yandex-team.ru>
Date: Fri, 1 Apr 2022 02:17:55 +0300
Subject: [PATCH v5] Optimize KnownAssignedXidsGetAndSetXmin by maintaining
 offset to next valid xid.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently KnownAssignedXidsGetAndSetXmin requires an iterative loop through 
KnownAssignedXids array,
including xids marked as invalid. Performance impact is especially noticeable 
in the presence of
long (few seconds) transactions on primary, high value (few thousands) of 
max_connections and high
read workload on standby. Most of the cpu spent on looping throw 
KnownAssignedXids while almost all
xid are invalid anyway. KnownAssignedXidsCompress removes invalid xids from 
time to time,
but performance is still affected.

To increase performance we lazily maintain an offset in the 
KnownAssignedXidsNext array to skip known
to be invalid xids. KnownAssignedXidsNext does not always point to “next” valid 
xid, it is just some
offset safe to skip (known to be filled by only invalid xids).

It helps to skip the gaps and could significantly increase performance - up to 
10% more TPS on standby.

Thread: 
https://www.postgresql.org/message-id/flat/CALdSSPgahNUD_%3DpB_j%3D1zSnDBaiOtqVfzo8Ejt5J_k7qZiU1Tw%40mail.gmail.com

Benchmark:
https://www.postgresql.org/message-id/flat/CANtu0ohzBFTYwdLtcanWo4%2B794WWUi7LY2rnbHyorJdE8_ZnGg%40mail.gmail.com#379c1be7b8134ada5a574078d51b64c6
---
 src/backend/storage/ipc/procarray.c | 56 ++++++++++++++++++++++++-----
 1 file changed, 47 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/ipc/procarray.c 
b/src/backend/storage/ipc/procarray.c
index 13d192ec2b..b5cb3423fb 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -267,6 +267,7 @@ static PGPROC *allProcs;
  */
 static TransactionId *KnownAssignedXids;
 static bool *KnownAssignedXidsValid;
+static int32 *KnownAssignedXidsNext;
 static TransactionId latestObservedXid = InvalidTransactionId;
 
 /*
@@ -455,6 +456,12 @@ CreateSharedProcArray(void)
                        ShmemInitStruct("KnownAssignedXidsValid",
                                                        mul_size(sizeof(bool), 
TOTAL_MAX_CACHED_SUBXIDS),
                                                        &found);
+               KnownAssignedXidsNext = (int32 *)
+                               ShmemInitStruct("KnownAssignedXidsNext",
+                                                               
mul_size(sizeof(int32), TOTAL_MAX_CACHED_SUBXIDS),
+                                                               &found);
+               for (int i = 0; i < TOTAL_MAX_CACHED_SUBXIDS; i++)
+                       KnownAssignedXidsNext[i] = 1;
        }
 }
 
@@ -4544,7 +4551,13 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  * XID entry itself.  This preserves the property that the XID entries are
  * sorted, so we can do binary searches easily.  Periodically we compress
  * out the unused entries; that's much cheaper than having to compress the
- * array immediately on every deletion.
+ * array immediately on every deletion. Also, we lazily maintain an offset
+ * in KnownAssignedXidsNext[] array to skip known to be invalid xids. It
+ * helps to skip the gaps; it could significantly increase performance in
+ * the case of long transactions on the primary. KnownAssignedXidsNext[] is
+ * updating while taking the snapshot. In general case KnownAssignedXidsNext
+ * contains not an offset to the next valid xid but a number which tends to
+ * the offset to next valid xid.
  *
  * The actually valid items in KnownAssignedXids[] and KnownAssignedXidsValid[]
  * are those with indexes tail <= i < head; items outside this subscript range
@@ -4582,7 +4595,7 @@ ExpireOldKnownAssignedTransactionIds(TransactionId xid)
  *             must happen)
  *     * Compressing the array is O(S) and requires exclusive lock
  *     * Removing an XID is O(logS) and requires exclusive lock
- *     * Taking a snapshot is O(S) and requires shared lock
+ *     * Taking a snapshot is O(S), O(N) next call; requires shared lock
  *     * Checking for an XID is O(logS) and requires shared lock
  *
  * In comparison, using a hash table for KnownAssignedXids would mean that
@@ -4642,12 +4655,13 @@ KnownAssignedXidsCompress(bool force)
         * re-aligning data to 0th element.
         */
        compress_index = 0;
-       for (i = tail; i < head; i++)
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                if (KnownAssignedXidsValid[i])
                {
                        KnownAssignedXids[compress_index] = 
KnownAssignedXids[i];
                        KnownAssignedXidsValid[compress_index] = true;
+                       KnownAssignedXidsNext[compress_index] = 1;
                        compress_index++;
                }
        }
@@ -4750,6 +4764,7 @@ KnownAssignedXidsAdd(TransactionId from_xid, 
TransactionId to_xid,
        {
                KnownAssignedXids[head] = next_xid;
                KnownAssignedXidsValid[head] = true;
+               KnownAssignedXidsNext[head] = 1;
                TransactionIdAdvance(next_xid);
                head++;
        }
@@ -4965,7 +4980,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
        tail = pArray->tailKnownAssignedXids;
        head = pArray->headKnownAssignedXids;
 
-       for (i = tail; i < head; i++)
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                if (KnownAssignedXidsValid[i])
                {
@@ -4988,7 +5003,7 @@ KnownAssignedXidsRemovePreceding(TransactionId removeXid)
        /*
         * Advance the tail pointer if we've marked the tail item invalid.
         */
-       for (i = tail; i < head; i++)
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                if (KnownAssignedXidsValid[i])
                        break;
@@ -5038,7 +5053,9 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, 
TransactionId *xmin,
        int                     count = 0;
        int                     head,
                                tail;
-       int                     i;
+       int                     i,
+                               prev,
+                               prevOffset;
 
        /*
         * Fetch head just once, since it may change while we loop. We can stop
@@ -5052,9 +5069,12 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, 
TransactionId *xmin,
        SpinLockAcquire(&procArray->known_assigned_xids_lck);
        tail = procArray->tailKnownAssignedXids;
        head = procArray->headKnownAssignedXids;
+       prev = tail;
+       prevOffset = KnownAssignedXidsNext[prev];
        SpinLockRelease(&procArray->known_assigned_xids_lck);
 
-       for (i = tail; i < head; i++)
+
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                /* Skip any gaps in the array */
                if (KnownAssignedXidsValid[i])
@@ -5079,6 +5099,24 @@ KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, 
TransactionId *xmin,
 
                        /* Add knownXid into output array */
                        xarray[count++] = knownXid;
+
+                       if (prev != i)
+                       {
+                               int32 n = i - prev;
+                               /*
+                                * Do not touch the cache if value is 
unchanged. This way we
+                                * can avoid additional cache miss.
+                                */
+                               if (n != prevOffset)
+                                       KnownAssignedXidsNext[prev] = n;
+                               /*
+                                * Remember this xid as previous valid. Also, 
manually store
+                                * prevOffset from current fetched value to 
avoid additional
+                                * atomic read.
+                                */
+                               prev = i;
+                               prevOffset = KnownAssignedXidsNext[i];
+                       }
                }
        }
 
@@ -5104,7 +5142,7 @@ KnownAssignedXidsGetOldestXmin(void)
        head = procArray->headKnownAssignedXids;
        SpinLockRelease(&procArray->known_assigned_xids_lck);
 
-       for (i = tail; i < head; i++)
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                /* Skip any gaps in the array */
                if (KnownAssignedXidsValid[i])
@@ -5139,7 +5177,7 @@ KnownAssignedXidsDisplay(int trace_level)
 
        initStringInfo(&buf);
 
-       for (i = tail; i < head; i++)
+       for (i = tail; i < head; i += KnownAssignedXidsNext[i])
        {
                if (KnownAssignedXidsValid[i])
                {
-- 
2.17.1

Reply via email to