On Fri, 2010-04-16 at 13:00 +0100, Simon Riggs wrote:
> Almost done
Here's the finished patch.
Internal changes only, no functional or user visible changes, so no
docs, just detailed explanatory comments.
Expectation is
* performance no longer depends upon max_connections
* recovery will be about same or slightly faster
* snapshots should be about equivalent primary/standby
Main changes are
* lock free add to sorted array (equivalent to GetNewTransactionId)
* bsearch for xid removal at commit/abort/TransactionIdIsInProgress
* faster, more compact approach to snapshots, with self-trimming
* some minor API changes to facilitate above
* new code to ignore deletion failures only when !consistent
Erik,
Could you retest please?
--
Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/transam/twophase.c
--- b/src/backend/access/transam/twophase.c
***************
*** 1200,1205 **** StandbyTransactionIdIsPrepared(TransactionId xid)
--- 1200,1208 ----
Assert(TransactionIdIsValid(xid));
+ if (max_prepared_xacts <= 0)
+ return false; /* nothing to do */
+
/* Read and validate file */
buf = ReadTwoPhaseFile(xid, false);
if (buf == NULL)
*** a/src/backend/access/transam/xlog.c
--- b/src/backend/access/transam/xlog.c
***************
*** 6453,6458 **** CheckRecoveryConsistency(void)
--- 6453,6464 ----
}
}
+ bool
+ XLogConsistentState(void)
+ {
+ return reachedMinRecoveryPoint;
+ }
+
/*
* Is the system still in recovery?
*
*** a/src/backend/storage/ipc/procarray.c
--- b/src/backend/storage/ipc/procarray.c
***************
*** 68,73 **** typedef struct ProcArrayStruct
--- 68,77 ----
* xids */
int maxKnownAssignedXids; /* allocated size of known assigned
* xids */
+ int tailKnownAssignedXids; /* current tail of known assigned
+ * xids */
+ int headKnownAssignedXids; /* current head of known assigned
+ * xids */
/*
* Highest subxid that overflowed KnownAssignedXids array. Similar to
***************
*** 87,93 **** static ProcArrayStruct *procArray;
/*
* Bookkeeping for tracking emulated transactions in recovery
*/
! static HTAB *KnownAssignedXidsHash;
static TransactionId latestObservedXid = InvalidTransactionId;
/*
--- 91,98 ----
/*
* Bookkeeping for tracking emulated transactions in recovery
*/
! static TransactionId *KnownAssignedXids;
! static bool *KnownAssignedXidsValid;
static TransactionId latestObservedXid = InvalidTransactionId;
/*
***************
*** 142,150 **** static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax);
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
TransactionId xmax);
static bool KnownAssignedXidsExist(TransactionId xid);
! static void KnownAssignedXidsAdd(TransactionId *xids, int nxids);
static void KnownAssignedXidsRemove(TransactionId xid);
! static void KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts);
static void KnownAssignedXidsDisplay(int trace_level);
/*
--- 147,158 ----
static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
TransactionId xmax);
static bool KnownAssignedXidsExist(TransactionId xid);
! static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool have_lock);
static void KnownAssignedXidsRemove(TransactionId xid);
! static void KnownAssignedXidsRemoveMany(TransactionId xid);
! static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
! TransactionId *subxids);
! static void KnownAssignedXidsCompress(bool full);
static void KnownAssignedXidsDisplay(int trace_level);
/*
***************
*** 204,228 **** CreateSharedProcArray(void)
procArray->numKnownAssignedXids = 0;
procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
procArray->lastOverflowedXid = InvalidTransactionId;
}
if (XLogRequestRecoveryConnections)
{
! /* Create or attach to the KnownAssignedXids hash table */
! HASHCTL info;
!
! MemSet(&info, 0, sizeof(info));
! info.keysize = sizeof(TransactionId);
! info.entrysize = sizeof(TransactionId);
! info.hash = tag_hash;
!
! KnownAssignedXidsHash = ShmemInitHash("KnownAssignedXids Hash",
! TOTAL_MAX_CACHED_SUBXIDS,
! TOTAL_MAX_CACHED_SUBXIDS,
! &info,
! HASH_ELEM | HASH_FUNCTION);
! if (!KnownAssignedXidsHash)
! elog(FATAL, "could not initialize known assigned xids hash table");
}
}
--- 212,231 ----
procArray->numKnownAssignedXids = 0;
procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS;
procArray->lastOverflowedXid = InvalidTransactionId;
+ procArray->tailKnownAssignedXids = 0;
+ procArray->headKnownAssignedXids = 0;
}
if (XLogRequestRecoveryConnections)
{
! KnownAssignedXids = (TransactionId *)
! ShmemInitStruct("KnownAssignedXids",
! mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS),
! &found);
! KnownAssignedXidsValid = (bool *)
! ShmemInitStruct("KnownAssignedXidsValid",
! mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS),
! &found);
}
}
***************
*** 544,550 **** ProcArrayApplyRecoveryInfo(RunningTransactions running)
if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
continue;
! KnownAssignedXidsAdd(&xid, 1);
}
KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
--- 547,553 ----
if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid))
continue;
! KnownAssignedXidsAdd(xid, xid, true);
}
KnownAssignedXidsDisplay(trace_recovery(DEBUG3));
***************
*** 617,624 **** ProcArrayApplyXidAssignment(TransactionId topxid,
/*
* Remove from known-assigned-xacts.
*/
! for (i = 0; i < nsubxids; i++)
! KnownAssignedXidsRemove(subxids[i]);
/*
* Advance lastOverflowedXid when required.
--- 620,626 ----
/*
* Remove from known-assigned-xacts.
*/
! KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids);
/*
* Advance lastOverflowedXid when required.
***************
*** 2162,2168 **** DisplayXidCache(void)
*
* During recovery we do not fret too much about the distinction between
* top-level xids and subtransaction xids. We hold both together in
! * a hash table called KnownAssignedXids. In backends, this is copied into
* snapshots in GetSnapshotData(), taking advantage
* of the fact that XidInMVCCSnapshot() doesn't care about the distinction
* either. Subtransaction xids are effectively treated as top-level xids
--- 2164,2170 ----
*
* During recovery we do not fret too much about the distinction between
* top-level xids and subtransaction xids. We hold both together in
! * a data structure called KnownAssignedXids. In backends, this is copied into
* snapshots in GetSnapshotData(), taking advantage
* of the fact that XidInMVCCSnapshot() doesn't care about the distinction
* either. Subtransaction xids are effectively treated as top-level xids
***************
*** 2190,2198 **** DisplayXidCache(void)
* transaction are not visible to backends in the standby.
* We prune KnownAssignedXids when XLOG_XACT_RUNNING_XACTS arrives, to
* ensure we do not overflow.
- *
- * If we are in STANDBY_SNAPSHOT_PENDING state, then we may try to remove
- * xids that are not present.
*/
void
RecordKnownAssignedTransactionIds(TransactionId xid)
--- 2192,2197 ----
***************
*** 2230,2274 **** RecordKnownAssignedTransactionIds(TransactionId xid)
*/
if (TransactionIdFollows(xid, latestObservedXid))
{
! TransactionId next_expected_xid = latestObservedXid;
!
! TransactionIdAdvance(next_expected_xid);
/*
! * Locking requirement is currently higher than for xid assignment in
! * normal running. However, we only get called here for new high xids
! * - so on a multi-processor where it is common that xids arrive out
! * of order the average number of locks per assignment will actually
! * reduce. So not too worried about this locking.
! *
! * XXX It does seem possible that we could add a whole range of
! * numbers atomically to KnownAssignedXids, if we use a sorted list
! * for KnownAssignedXids. But that design also increases the length of
! * time we hold lock when we process commits/aborts, so on balance
! * don't worry about this.
*/
! LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
!
while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
{
- if (TransactionIdPrecedes(next_expected_xid, xid))
- elog(trace_recovery(DEBUG4),
- "recording unobserved xid %u (latestObservedXid %u)",
- next_expected_xid, latestObservedXid);
- KnownAssignedXidsAdd(&next_expected_xid, 1);
-
- /*
- * Extend clog and subtrans like we do in GetNewTransactionId()
- * during normal operation
- */
ExtendCLOG(next_expected_xid);
ExtendSUBTRANS(next_expected_xid);
TransactionIdAdvance(next_expected_xid);
}
! LWLockRelease(ProcArrayLock);
latestObservedXid = xid;
}
--- 2229,2263 ----
*/
if (TransactionIdFollows(xid, latestObservedXid))
{
! TransactionId next_expected_xid;
/*
! * Extend clog and subtrans like we do in GetNewTransactionId()
! * during normal operation using individual extend steps.
! * Typical case requires almost no activity.
*/
! next_expected_xid = latestObservedXid;
! TransactionIdAdvance(next_expected_xid);
while (TransactionIdPrecedesOrEquals(next_expected_xid, xid))
{
ExtendCLOG(next_expected_xid);
ExtendSUBTRANS(next_expected_xid);
TransactionIdAdvance(next_expected_xid);
}
! /*
! * Add the new xids onto the KnownAssignedXids array. Note that
! * we don't need ProcArrayLock to do this, mimicing the locking
! * behaviour when assigning an xid in normal running.
! */
! next_expected_xid = latestObservedXid;
! TransactionIdAdvance(next_expected_xid);
! KnownAssignedXidsAdd(next_expected_xid, xid, false);
+ /*
+ * Now we can advance latestObservedXid
+ */
latestObservedXid = xid;
}
***************
*** 2285,2291 **** void
ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
TransactionId *subxids)
{
- int i;
TransactionId max_xid;
if (standbyState == STANDBY_DISABLED)
--- 2274,2279 ----
***************
*** 2298,2307 **** ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids,
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! if (TransactionIdIsValid(xid))
! KnownAssignedXidsRemove(xid);
! for (i = 0; i < nsubxids; i++)
! KnownAssignedXidsRemove(subxids[i]);
/* Like in ProcArrayRemove, advance latestCompletedXid */
if (TransactionIdFollowsOrEquals(max_xid,
--- 2286,2292 ----
*/
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! KnownAssignedXidsRemoveTree(xid, nsubxids, subxids);
/* Like in ProcArrayRemove, advance latestCompletedXid */
if (TransactionIdFollowsOrEquals(max_xid,
***************
*** 2315,2321 **** void
ExpireAllKnownAssignedTransactionIds(void)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! KnownAssignedXidsRemoveMany(InvalidTransactionId, false);
LWLockRelease(ProcArrayLock);
}
--- 2300,2306 ----
ExpireAllKnownAssignedTransactionIds(void)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! KnownAssignedXidsRemoveMany(InvalidTransactionId);
LWLockRelease(ProcArrayLock);
}
***************
*** 2323,2354 **** void
ExpireOldKnownAssignedTransactionIds(TransactionId xid)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! KnownAssignedXidsRemoveMany(xid, true);
LWLockRelease(ProcArrayLock);
}
/*
* Private module functions to manipulate KnownAssignedXids
*
! * There are 3 main users of the KnownAssignedXids data structure:
*
! * * backends taking snapshots
* * startup process adding new knownassigned xids
* * startup process removing xids as transactions end
*
! * If we make KnownAssignedXids a simple sorted array then the first two
! * operations are fast, but the last one is at least O(N). If we make
! * KnownAssignedXids a hash table then the last two operations are fast,
! * though we have to do more work at snapshot time. Doing more work at
! * commit could slow down taking snapshots anyway because of lwlock
! * contention. Scanning the hash table is O(N) on the max size of the array,
! * so performs poorly in comparison when we have very low numbers of
! * write transactions to process. But at least it is constant overhead
! * and a sequential memory scan will utilise hardware memory readahead
! * to give much improved performance. In any case the emphasis must be on
! * having the standby process changes quickly so that it can provide
! * high availability. So we choose to implement as a hash table.
*/
/*
* Add xids into KnownAssignedXids.
--- 2308,2436 ----
ExpireOldKnownAssignedTransactionIds(TransactionId xid)
{
LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
! KnownAssignedXidsRemoveMany(xid);
LWLockRelease(ProcArrayLock);
}
/*
* Private module functions to manipulate KnownAssignedXids
*
! * There are 4 main users of the KnownAssignedXids data structure:
*
! * * backends taking snapshots - all valid xids are copied out
! * * backends seeking to determine presence of an xid
* * startup process adding new knownassigned xids
* * startup process removing xids as transactions end
*
! * We use a sorted array to store xids, always adding xids at head of array.
! * When we add xids we move the head of the array atomically, so we don't
! * need ProcArrayLock. The array grows until it hits the right end and then
! * we compress the array so it starts at zeroth element again.
! *
! * Maintaing a compact array of valid xids is costly, so we allow gaps to
! * form in the sequence so we can avoid compressing the contents after each
! * removal. Gaps are represented by tracking the validity of xids in a
! * separate array of booleans.
! *
! * If we have a maximum of M slots, with N xids currently spread across
! * S elements then we have N <= S <= M always.
! *
! * Removing xids from the array requires ExclusiveLock for the usual
! * transactional reasons. That allows us to left trim the list while only
! * holding ProcArrayLock in Shared mode by atomically updating the tail,
! * which is useful since snapshots scan the array from tail to head.
! * Trimming is useful since older xids are naturally removed from the tail
! * and in a steady state this works to keep S small, so snapshots
! * naturally reduce their own future costs.
! *
! * Keeping track of both head and tail allows a simple binary search for
! * both searching and removing. Since the array never wraps, we have
! * head >= tail always. When xids wrap we can continue to use this
! * technique though this only works because the number of xids we
! * store is less than half the maximum number of xids, so we only get
! * a single wrap. That is different from general user indexes where
! * bsearch is not possible because the number of rows is not sufficiently
! * limited.
! *
! * Algorithmic analysis
! * * Adding an xid is O(1) and is lock free (except when compressing)
! * * Removing an xid is O(logS)
! * * Taking snapshots is O(S) and is similar to normal running
! * * Pruning is mostly O(1)
! * * Locating an xid is essentially same as Removing
! *
! * In comparison, using a hash table for KnownAssignedXids would mean that
! * taking snapshots would be O(M). If we can maintain S << M then the
! * sorted array technique will deliver significantly faster snapshots.
! * If we try to keep S too small then we increase cost of removal
! * dramatically, so there is an optimal point for any workload mix. We
! * use a heuristic to decide when to compress the array, though trimming
! * also helps reduce frequency of compressing.
! *
! * procArray->tailKnownAssignedXids points to first currently valid xid
! * procArray->headKnownAssignedXids points to next xid insertion point,
! * which may be one off to right of array
! *
! * With carefully chosen compression we can also say that xids never move
! * right and that compression always maintains the sorted order.
! * Compression requires ProcArrayLock in Exclusive mode.
*/
+ #define RESULT_NOT_FOUND -1
+
+ /*
+ * Compress KnownAssignedXids by removing any gaps in virtual array.
+ *
+ * Must be called while holding ProcArrayLock in Exclusive mode.
+ */
+ static void
+ KnownAssignedXidsCompress(bool full)
+ {
+ int head = procArray->headKnownAssignedXids;
+ int tail = procArray->tailKnownAssignedXids;
+ int nelements = head - tail;
+ int compress_index;
+ int i;
+
+ if (!full)
+ {
+ /*
+ * If we can choose how much to compress, use a heuristic to
+ * avoid compressing too often or not often enough.
+ *
+ * Heuristic is if we have a large enough current spread and
+ * less than 50% of the elements are currently in use, then
+ * compress. This should ensure we compress fairly infrequently.
+ * We could compress less often though the virtual array would
+ * spread out more and snapshots would become more expensive.
+ */
+ if (nelements < 4 * PROCARRAY_MAXPROCS ||
+ nelements < 2 * procArray->numKnownAssignedXids)
+ return;
+ }
+
+ /*
+ * We compress the array by reading the valid values from tail
+ * to head, re-aligning array to 0th element. Note that we don't
+ * worry about zero-ing out the higher parts of the main or
+ * validity array since those will be overwritten when adding xids.
+ */
+ compress_index = 0;
+ for (i = tail; i < head; i++)
+ {
+ if (KnownAssignedXidsValid[i])
+ {
+ if (i != compress_index)
+ {
+ KnownAssignedXids[compress_index] = KnownAssignedXids[i];
+ KnownAssignedXidsValid[compress_index] = true;
+ }
+ compress_index++;
+ }
+ }
+ procArray->tailKnownAssignedXids = 0;
+ procArray->headKnownAssignedXids = compress_index;
+ KnownAssignedXidsValid[compress_index] = false;
+ }
/*
* Add xids into KnownAssignedXids.
***************
*** 2356,2413 **** ExpireOldKnownAssignedTransactionIds(TransactionId xid)
* Must be called while holding ProcArrayLock in Exclusive mode
*/
static void
! KnownAssignedXidsAdd(TransactionId *xids, int nxids)
{
! TransactionId *result;
! bool found;
int i;
for (i = 0; i < nxids; i++)
{
! Assert(TransactionIdIsValid(xids[i]));
! elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xids[i]);
! procArray->numKnownAssignedXids++;
! if (procArray->numKnownAssignedXids > procArray->maxKnownAssignedXids)
! {
! KnownAssignedXidsDisplay(LOG);
! LWLockRelease(ProcArrayLock);
! elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids);
! }
! result = (TransactionId *) hash_search(KnownAssignedXidsHash, &xids[i], HASH_ENTER,
! &found);
! if (!result)
{
! LWLockRelease(ProcArrayLock);
! ereport(ERROR,
! (errcode(ERRCODE_OUT_OF_MEMORY),
! errmsg("out of shared memory")));
}
! if (found)
{
! KnownAssignedXidsDisplay(LOG);
! LWLockRelease(ProcArrayLock);
! elog(ERROR, "found duplicate KnownAssignedXid %u", xids[i]);
}
}
}
- /*
- * Is an xid present in KnownAssignedXids?
- *
- * Must be called while holding ProcArrayLock in shared mode
- */
static bool
KnownAssignedXidsExist(TransactionId xid)
{
! bool found;
! (void) hash_search(KnownAssignedXidsHash, &xid, HASH_FIND, &found);
! return found;
}
/*
--- 2438,2568 ----
* Must be called while holding ProcArrayLock in Exclusive mode
*/
static void
! KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool have_lock)
{
! TransactionId next_xid;
! int head = procArray->headKnownAssignedXids;
! int tail = procArray->tailKnownAssignedXids;
! int nxids = 1;
int i;
+ Assert(head >= 0 && head <= procArray->maxKnownAssignedXids);
+ Assert(tail >= 0 && tail < procArray->maxKnownAssignedXids);
+ Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid));
+
+ /*
+ * Calculate how many slots we need so we can test for overflow.
+ */
+ if (to_xid >= from_xid)
+ nxids = to_xid - from_xid + 1;
+ else
+ {
+ next_xid = from_xid;
+ while (TransactionIdPrecedes(next_xid, to_xid))
+ {
+ nxids++;
+ TransactionIdAdvance(next_xid);
+ }
+ }
+
+ /*
+ * If our xids won't fit easily then grab the lock and compress
+ */
+ if (head + nxids > procArray->maxKnownAssignedXids)
+ {
+ if (!have_lock)
+ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
+ KnownAssignedXidsCompress(true);
+ head = procArray->headKnownAssignedXids;
+ tail = procArray->tailKnownAssignedXids;
+ if (!have_lock)
+ LWLockRelease(ProcArrayLock);
+ }
+
+ /*
+ * If we still won't fit then we're out of memory
+ */
+ if (head + nxids > procArray->maxKnownAssignedXids)
+ elog(ERROR, "too many KnownAssignedXids");
+
+ next_xid = from_xid;
for (i = 0; i < nxids; i++)
{
! /* Insert the xid at head, since it already
! * points at the next insertion point
! */
! KnownAssignedXids[head] = next_xid;
! KnownAssignedXidsValid[head] = true;
! /* increment next_xid */
! TransactionIdAdvance(next_xid);
! head++;
! }
! /*
! * Set the new head of the ProcArray last, so it is atomic
! */
! procArray->numKnownAssignedXids += nxids;
! procArray->headKnownAssignedXids = head;
! }
! /*
! * KnownAssignedXidsSearch
! *
! * Searches KnownAssignedXids for a specific xid and optionally removes it.
! *
! * Must be called while holding ProcArrayLock in shared or exclusive mode.
! * Exclusive lock must be held for remove = true
! *
! * Use binary search to locate value, then test validity. Callers expect the
! * value to be present so we don't bother to test against head and tail.
! */
! static bool
! KnownAssignedXidsSearch(TransactionId xid, bool remove)
! {
! int first = procArray->tailKnownAssignedXids;
! int last = procArray->headKnownAssignedXids - 1;
! int result_index = RESULT_NOT_FOUND;
! int mid_index;
! TransactionId mid_xid;
!
! while (first <= last)
! {
! mid_index = first + (last - first) / 2;
! mid_xid = KnownAssignedXids[mid_index];
! if (xid == mid_xid)
{
! result_index = mid_index;
! break;
}
+ else if (TransactionIdPrecedes(xid, mid_xid))
+ last = mid_index - 1;
+ else
+ first = mid_index + 1;
+ }
! if (result_index == RESULT_NOT_FOUND)
! return false;
! else
! {
! if (KnownAssignedXidsValid[result_index])
{
! if (remove)
! KnownAssignedXidsValid[result_index] = false;
! return true;
}
+ else
+ return false;
}
}
static bool
KnownAssignedXidsExist(TransactionId xid)
{
! Assert(TransactionIdIsValid(xid));
! return KnownAssignedXidsSearch(xid, false);
}
/*
***************
*** 2418,2448 **** KnownAssignedXidsExist(TransactionId xid)
static void
KnownAssignedXidsRemove(TransactionId xid)
{
- bool found;
-
Assert(TransactionIdIsValid(xid));
elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
! (void) hash_search(KnownAssignedXidsHash, &xid, HASH_REMOVE, &found);
!
! if (found)
procArray->numKnownAssignedXids--;
! Assert(procArray->numKnownAssignedXids >= 0);
/*
* We can fail to find an xid if the xid came from a subtransaction that
* aborts, though the xid hadn't yet been reported and no WAL records have
* been written using the subxid. In that case the abort record will
* contain that subxid and we haven't seen it before.
- *
- * If we fail to find it for other reasons it might be a problem, but it
- * isn't much use to log that it happened, since we can't divine much from
- * just an isolated xid value.
*/
}
/*
* KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
* We filter out anything higher than xmax.
*
--- 2573,2640 ----
static void
KnownAssignedXidsRemove(TransactionId xid)
{
Assert(TransactionIdIsValid(xid));
elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid);
! if (KnownAssignedXidsSearch(xid, true))
procArray->numKnownAssignedXids--;
! else
! {
! int i;
! bool found = false;
!
! #ifdef USE_ASSERT_CHECKING
! for (i = 0; i < procArray->maxKnownAssignedXids; i++)
! {
! if (KnownAssignedXids[i] == xid && KnownAssignedXidsValid[i])
! {
! KnownAssignedXidsDisplay(LOG);
! elog(ERROR, "found error in search for xid=%u", xid);
! found = true;
! break;
! }
! }
! #endif
! if (!found && XLogConsistentState())
! elog(ERROR, "remove could not locate xid=%u", xid);
! }
!
! if (procArray->numKnownAssignedXids < 0)
! {
! KnownAssignedXidsDisplay(LOG);
! Assert(procArray->numKnownAssignedXids >= 0);
! }
/*
* We can fail to find an xid if the xid came from a subtransaction that
* aborts, though the xid hadn't yet been reported and no WAL records have
* been written using the subxid. In that case the abort record will
* contain that subxid and we haven't seen it before.
*/
}
/*
+ * KnownAssignedXidsRemoveTree
+ *
+ * Must be called while holding ProcArrayLock in exclusive mode.
+ */
+ static void
+ KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids,
+ TransactionId *subxids)
+ {
+ int i;
+
+ if (TransactionIdIsValid(xid))
+ KnownAssignedXidsRemove(xid);
+
+ for (i = 0; i < nsubxids; i++)
+ KnownAssignedXidsRemove(subxids[i]);
+
+ KnownAssignedXidsCompress(false);
+ }
+
+ /*
* KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids.
* We filter out anything higher than xmax.
*
***************
*** 2460,2491 **** KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax)
* KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus we reduce *xmin
* to the lowest xid value seen if not already lower.
*
! * Must be called while holding ProcArrayLock (in shared mode)
*/
static int
KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
TransactionId xmax)
{
! HASH_SEQ_STATUS status;
! TransactionId *knownXid;
int count = 0;
! hash_seq_init(&status, KnownAssignedXidsHash);
! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
{
! /*
! * Filter out anything higher than xmax
! */
! if (TransactionIdPrecedes(xmax, *knownXid))
! continue;
! *xarray = *knownXid;
! xarray++;
! count++;
! /* update xmin if required */
! if (TransactionIdPrecedes(*knownXid, *xmin))
! *xmin = *knownXid;
}
return count;
--- 2652,2728 ----
* KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus we reduce *xmin
* to the lowest xid value seen if not already lower.
*
! * Must be called while holding ProcArrayLock, in shared mode or higher.
*/
static int
KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
TransactionId xmax)
{
! TransactionId knownXid;
int count = 0;
+ bool can_move_tail = true;
+ int head, i, tail, newtail;
+
+ Assert(TransactionIdIsValid(xmax));
+
+ /*
+ * Fetch the tail just once, since it may change while we loop.
+ */
+ tail = procArray->tailKnownAssignedXids;
+ newtail = tail;
+
+ /*
+ * Fetch the head just once, since it may change while we loop.
+ * We need only go as far as head at the start of the loop, since
+ * we are certain that an xid cannot enter and then leave the
+ * array without us knowing, so if we skip a few higher xids they
+ * will look like they were running anyway.
+ */
+ head = procArray->headKnownAssignedXids;
! for (i = tail; i < head; i++)
{
! /* Skip any gaps in the array */
! if (KnownAssignedXidsValid[i])
! {
! knownXid = KnownAssignedXids[i];
! /* Update xmin if required */
! if (count == 0 &&
! TransactionIdPrecedes(knownXid, *xmin))
! *xmin = knownXid;
!
! /*
! * Filter out anything higher than xmax, relying on
! * sorted property of array.
! */
! if (TransactionIdPrecedes(xmax, knownXid))
! break;
!
! /* Add knownXid onto output array */
! *xarray = knownXid;
! xarray++;
! count++;
! can_move_tail = false;
! }
! else if (can_move_tail)
! newtail = i;
! }
! /*
! * If we can simply prune the list, then do so.
! */
! if (newtail != tail)
! {
! /*
! * Move the tail of the buffer to the leftmost valid xid.
! * This action happens atomically, so no problem with locking.
! * It is possible that someone else has already done this, but
! * it's not possible that they moved it to a different place than
! * we will do because xids can only be removed while holding
! * ExclusiveLock, so that cannot be occurring concurrently.
! */
! procArray->tailKnownAssignedXids = newtail;
}
return count;
***************
*** 2498,2533 **** KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin,
* Must be called while holding ProcArrayLock in Exclusive mode.
*/
static void
! KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
{
! TransactionId *knownXid;
! HASH_SEQ_STATUS status;
! if (TransactionIdIsValid(xid))
! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid);
! else
elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
! hash_seq_init(&status, KnownAssignedXidsHash);
! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
! {
! TransactionId removeXid = *knownXid;
! bool found;
! if (!TransactionIdIsValid(xid) || TransactionIdPrecedes(removeXid, xid))
{
! if (keepPreparedXacts && StandbyTransactionIdIsPrepared(removeXid))
! continue;
! else
{
! (void) hash_search(KnownAssignedXidsHash, &removeXid,
! HASH_REMOVE, &found);
! if (found)
! procArray->numKnownAssignedXids--;
! Assert(procArray->numKnownAssignedXids >= 0);
}
}
}
}
/*
--- 2735,2781 ----
* Must be called while holding ProcArrayLock in Exclusive mode.
*/
static void
! KnownAssignedXidsRemoveMany(TransactionId removeXid)
{
! TransactionId knownXid;
! int count = 0;
! int head, tail, i;
! if (!TransactionIdIsValid(removeXid))
! {
elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids");
+ procArray->numKnownAssignedXids = 0;
+ procArray->headKnownAssignedXids = procArray->tailKnownAssignedXids = 0;
+ return;
+ }
! Assert(TransactionIdIsValid(removeXid));
! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid);
!
! tail = procArray->tailKnownAssignedXids;
! head = procArray->headKnownAssignedXids;
! for (i = tail; i < head; i++)
! {
! if (KnownAssignedXidsValid[i])
{
! knownXid = KnownAssignedXids[i];
!
! if (TransactionIdFollowsOrEquals(knownXid, removeXid))
! break;
!
! if (!StandbyTransactionIdIsPrepared(knownXid))
{
! KnownAssignedXidsValid[i] = false;
! count++;
}
}
}
+
+ procArray->numKnownAssignedXids -= count;
+ Assert(procArray->numKnownAssignedXids >= 0);
+
+ KnownAssignedXidsCompress(false);
}
/*
***************
*** 2538,2565 **** KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts)
static void
KnownAssignedXidsDisplay(int trace_level)
{
! HASH_SEQ_STATUS status;
! TransactionId *knownXid;
! StringInfoData buf;
! TransactionId *xids;
! int nxids;
! int i;
!
! xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS);
! nxids = 0;
!
! hash_seq_init(&status, KnownAssignedXidsHash);
! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL)
! xids[nxids++] = *knownXid;
! qsort(xids, nxids, sizeof(TransactionId), xidComparator);
initStringInfo(&buf);
! for (i = 0; i < nxids; i++)
! appendStringInfo(&buf, "%u ", xids[i]);
! elog(trace_level, "%d KnownAssignedXids %s", nxids, buf.data);
pfree(buf.data);
}
--- 2786,2815 ----
static void
KnownAssignedXidsDisplay(int trace_level)
{
! StringInfoData buf;
! int head, tail, i;
! int nxids = 0;
! tail = procArray->tailKnownAssignedXids;
! head = procArray->headKnownAssignedXids;
initStringInfo(&buf);
! for (i = tail; i < head; i++)
! {
! if (KnownAssignedXidsValid[i])
! {
! nxids++;
! appendStringInfo(&buf, "[%u]=%u ", i, KnownAssignedXids[i]);
! }
! }
! elog(trace_level, "%d KnownAssignedXids (num=%u tail=%u head=%u) %s",
! nxids,
! procArray->numKnownAssignedXids,
! procArray->tailKnownAssignedXids,
! procArray->headKnownAssignedXids,
! buf.data);
pfree(buf.data);
}
*** a/src/include/access/xlog.h
--- b/src/include/access/xlog.h
***************
*** 278,283 **** extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
--- 278,284 ----
extern void issue_xlog_fsync(int fd, uint32 log, uint32 seg);
+ extern bool XLogConsistentState(void);
extern bool RecoveryInProgress(void);
extern bool XLogInsertAllowed(void);
extern TimestampTz GetLatestXLogTime(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers