On Fri, 2010-04-16 at 13:00 +0100, Simon Riggs wrote: > Almost done Here's the finished patch.
Internal changes only, no functional or user visible changes, so no docs, just detailed explanatory comments. Expectation is * performance no longer depends upon max_connections * recovery will be about same or slightly faster * snapshots should be about equivalent primary/standby Main changes are * lock free add to sorted array (equivalent to GetNewTransactionId) * bsearch for xid removal at commit/abort/TransactionIdIsInProgress * faster, more compact approach to snapshots, with self-trimming * some minor API changes to facilitate above * new code to ignore deletion failures only when !consistent Erik, Could you retest please? -- Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/transam/twophase.c --- b/src/backend/access/transam/twophase.c *************** *** 1200,1205 **** StandbyTransactionIdIsPrepared(TransactionId xid) --- 1200,1208 ---- Assert(TransactionIdIsValid(xid)); + if (max_prepared_xacts <= 0) + return false; /* nothing to do */ + /* Read and validate file */ buf = ReadTwoPhaseFile(xid, false); if (buf == NULL) *** a/src/backend/access/transam/xlog.c --- b/src/backend/access/transam/xlog.c *************** *** 6453,6458 **** CheckRecoveryConsistency(void) --- 6453,6464 ---- } } + bool + XLogConsistentState(void) + { + return reachedMinRecoveryPoint; + } + /* * Is the system still in recovery? * *** a/src/backend/storage/ipc/procarray.c --- b/src/backend/storage/ipc/procarray.c *************** *** 68,73 **** typedef struct ProcArrayStruct --- 68,77 ---- * xids */ int maxKnownAssignedXids; /* allocated size of known assigned * xids */ + int tailKnownAssignedXids; /* current tail of known assigned + * xids */ + int headKnownAssignedXids; /* current head of known assigned + * xids */ /* * Highest subxid that overflowed KnownAssignedXids array. Similar to *************** *** 87,93 **** static ProcArrayStruct *procArray; /* * Bookkeeping for tracking emulated transactions in recovery */ ! static HTAB *KnownAssignedXidsHash; static TransactionId latestObservedXid = InvalidTransactionId; /* --- 91,98 ---- /* * Bookkeeping for tracking emulated transactions in recovery */ ! static TransactionId *KnownAssignedXids; ! static bool *KnownAssignedXidsValid; static TransactionId latestObservedXid = InvalidTransactionId; /* *************** *** 142,150 **** static int KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax); static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax); static bool KnownAssignedXidsExist(TransactionId xid); ! static void KnownAssignedXidsAdd(TransactionId *xids, int nxids); static void KnownAssignedXidsRemove(TransactionId xid); ! static void KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts); static void KnownAssignedXidsDisplay(int trace_level); /* --- 147,158 ---- static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax); static bool KnownAssignedXidsExist(TransactionId xid); ! static void KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool have_lock); static void KnownAssignedXidsRemove(TransactionId xid); ! static void KnownAssignedXidsRemoveMany(TransactionId xid); ! static void KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, ! TransactionId *subxids); ! static void KnownAssignedXidsCompress(bool full); static void KnownAssignedXidsDisplay(int trace_level); /* *************** *** 204,228 **** CreateSharedProcArray(void) procArray->numKnownAssignedXids = 0; procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS; procArray->lastOverflowedXid = InvalidTransactionId; } if (XLogRequestRecoveryConnections) { ! /* Create or attach to the KnownAssignedXids hash table */ ! HASHCTL info; ! ! MemSet(&info, 0, sizeof(info)); ! info.keysize = sizeof(TransactionId); ! info.entrysize = sizeof(TransactionId); ! info.hash = tag_hash; ! ! KnownAssignedXidsHash = ShmemInitHash("KnownAssignedXids Hash", ! TOTAL_MAX_CACHED_SUBXIDS, ! TOTAL_MAX_CACHED_SUBXIDS, ! &info, ! HASH_ELEM | HASH_FUNCTION); ! if (!KnownAssignedXidsHash) ! elog(FATAL, "could not initialize known assigned xids hash table"); } } --- 212,231 ---- procArray->numKnownAssignedXids = 0; procArray->maxKnownAssignedXids = TOTAL_MAX_CACHED_SUBXIDS; procArray->lastOverflowedXid = InvalidTransactionId; + procArray->tailKnownAssignedXids = 0; + procArray->headKnownAssignedXids = 0; } if (XLogRequestRecoveryConnections) { ! KnownAssignedXids = (TransactionId *) ! ShmemInitStruct("KnownAssignedXids", ! mul_size(sizeof(TransactionId), TOTAL_MAX_CACHED_SUBXIDS), ! &found); ! KnownAssignedXidsValid = (bool *) ! ShmemInitStruct("KnownAssignedXidsValid", ! mul_size(sizeof(bool), TOTAL_MAX_CACHED_SUBXIDS), ! &found); } } *************** *** 544,550 **** ProcArrayApplyRecoveryInfo(RunningTransactions running) if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) continue; ! KnownAssignedXidsAdd(&xid, 1); } KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); --- 547,553 ---- if (TransactionIdDidCommit(xid) || TransactionIdDidAbort(xid)) continue; ! KnownAssignedXidsAdd(xid, xid, true); } KnownAssignedXidsDisplay(trace_recovery(DEBUG3)); *************** *** 617,624 **** ProcArrayApplyXidAssignment(TransactionId topxid, /* * Remove from known-assigned-xacts. */ ! for (i = 0; i < nsubxids; i++) ! KnownAssignedXidsRemove(subxids[i]); /* * Advance lastOverflowedXid when required. --- 620,626 ---- /* * Remove from known-assigned-xacts. */ ! KnownAssignedXidsRemoveTree(InvalidTransactionId, nsubxids, subxids); /* * Advance lastOverflowedXid when required. *************** *** 2162,2168 **** DisplayXidCache(void) * * During recovery we do not fret too much about the distinction between * top-level xids and subtransaction xids. We hold both together in ! * a hash table called KnownAssignedXids. In backends, this is copied into * snapshots in GetSnapshotData(), taking advantage * of the fact that XidInMVCCSnapshot() doesn't care about the distinction * either. Subtransaction xids are effectively treated as top-level xids --- 2164,2170 ---- * * During recovery we do not fret too much about the distinction between * top-level xids and subtransaction xids. We hold both together in ! * a data structure called KnownAssignedXids. In backends, this is copied into * snapshots in GetSnapshotData(), taking advantage * of the fact that XidInMVCCSnapshot() doesn't care about the distinction * either. Subtransaction xids are effectively treated as top-level xids *************** *** 2190,2198 **** DisplayXidCache(void) * transaction are not visible to backends in the standby. * We prune KnownAssignedXids when XLOG_XACT_RUNNING_XACTS arrives, to * ensure we do not overflow. - * - * If we are in STANDBY_SNAPSHOT_PENDING state, then we may try to remove - * xids that are not present. */ void RecordKnownAssignedTransactionIds(TransactionId xid) --- 2192,2197 ---- *************** *** 2230,2274 **** RecordKnownAssignedTransactionIds(TransactionId xid) */ if (TransactionIdFollows(xid, latestObservedXid)) { ! TransactionId next_expected_xid = latestObservedXid; ! ! TransactionIdAdvance(next_expected_xid); /* ! * Locking requirement is currently higher than for xid assignment in ! * normal running. However, we only get called here for new high xids ! * - so on a multi-processor where it is common that xids arrive out ! * of order the average number of locks per assignment will actually ! * reduce. So not too worried about this locking. ! * ! * XXX It does seem possible that we could add a whole range of ! * numbers atomically to KnownAssignedXids, if we use a sorted list ! * for KnownAssignedXids. But that design also increases the length of ! * time we hold lock when we process commits/aborts, so on balance ! * don't worry about this. */ ! LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! while (TransactionIdPrecedesOrEquals(next_expected_xid, xid)) { - if (TransactionIdPrecedes(next_expected_xid, xid)) - elog(trace_recovery(DEBUG4), - "recording unobserved xid %u (latestObservedXid %u)", - next_expected_xid, latestObservedXid); - KnownAssignedXidsAdd(&next_expected_xid, 1); - - /* - * Extend clog and subtrans like we do in GetNewTransactionId() - * during normal operation - */ ExtendCLOG(next_expected_xid); ExtendSUBTRANS(next_expected_xid); TransactionIdAdvance(next_expected_xid); } ! LWLockRelease(ProcArrayLock); latestObservedXid = xid; } --- 2229,2263 ---- */ if (TransactionIdFollows(xid, latestObservedXid)) { ! TransactionId next_expected_xid; /* ! * Extend clog and subtrans like we do in GetNewTransactionId() ! * during normal operation using individual extend steps. ! * Typical case requires almost no activity. */ ! next_expected_xid = latestObservedXid; ! TransactionIdAdvance(next_expected_xid); while (TransactionIdPrecedesOrEquals(next_expected_xid, xid)) { ExtendCLOG(next_expected_xid); ExtendSUBTRANS(next_expected_xid); TransactionIdAdvance(next_expected_xid); } ! /* ! * Add the new xids onto the KnownAssignedXids array. Note that ! * we don't need ProcArrayLock to do this, mimicing the locking ! * behaviour when assigning an xid in normal running. ! */ ! next_expected_xid = latestObservedXid; ! TransactionIdAdvance(next_expected_xid); ! KnownAssignedXidsAdd(next_expected_xid, xid, false); + /* + * Now we can advance latestObservedXid + */ latestObservedXid = xid; } *************** *** 2285,2291 **** void ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, TransactionId *subxids) { - int i; TransactionId max_xid; if (standbyState == STANDBY_DISABLED) --- 2274,2279 ---- *************** *** 2298,2307 **** ExpireTreeKnownAssignedTransactionIds(TransactionId xid, int nsubxids, */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! if (TransactionIdIsValid(xid)) ! KnownAssignedXidsRemove(xid); ! for (i = 0; i < nsubxids; i++) ! KnownAssignedXidsRemove(subxids[i]); /* Like in ProcArrayRemove, advance latestCompletedXid */ if (TransactionIdFollowsOrEquals(max_xid, --- 2286,2292 ---- */ LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! KnownAssignedXidsRemoveTree(xid, nsubxids, subxids); /* Like in ProcArrayRemove, advance latestCompletedXid */ if (TransactionIdFollowsOrEquals(max_xid, *************** *** 2315,2321 **** void ExpireAllKnownAssignedTransactionIds(void) { LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! KnownAssignedXidsRemoveMany(InvalidTransactionId, false); LWLockRelease(ProcArrayLock); } --- 2300,2306 ---- ExpireAllKnownAssignedTransactionIds(void) { LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! KnownAssignedXidsRemoveMany(InvalidTransactionId); LWLockRelease(ProcArrayLock); } *************** *** 2323,2354 **** void ExpireOldKnownAssignedTransactionIds(TransactionId xid) { LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! KnownAssignedXidsRemoveMany(xid, true); LWLockRelease(ProcArrayLock); } /* * Private module functions to manipulate KnownAssignedXids * ! * There are 3 main users of the KnownAssignedXids data structure: * ! * * backends taking snapshots * * startup process adding new knownassigned xids * * startup process removing xids as transactions end * ! * If we make KnownAssignedXids a simple sorted array then the first two ! * operations are fast, but the last one is at least O(N). If we make ! * KnownAssignedXids a hash table then the last two operations are fast, ! * though we have to do more work at snapshot time. Doing more work at ! * commit could slow down taking snapshots anyway because of lwlock ! * contention. Scanning the hash table is O(N) on the max size of the array, ! * so performs poorly in comparison when we have very low numbers of ! * write transactions to process. But at least it is constant overhead ! * and a sequential memory scan will utilise hardware memory readahead ! * to give much improved performance. In any case the emphasis must be on ! * having the standby process changes quickly so that it can provide ! * high availability. So we choose to implement as a hash table. */ /* * Add xids into KnownAssignedXids. --- 2308,2436 ---- ExpireOldKnownAssignedTransactionIds(TransactionId xid) { LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); ! KnownAssignedXidsRemoveMany(xid); LWLockRelease(ProcArrayLock); } /* * Private module functions to manipulate KnownAssignedXids * ! * There are 4 main users of the KnownAssignedXids data structure: * ! * * backends taking snapshots - all valid xids are copied out ! * * backends seeking to determine presence of an xid * * startup process adding new knownassigned xids * * startup process removing xids as transactions end * ! * We use a sorted array to store xids, always adding xids at head of array. ! * When we add xids we move the head of the array atomically, so we don't ! * need ProcArrayLock. The array grows until it hits the right end and then ! * we compress the array so it starts at zeroth element again. ! * ! * Maintaing a compact array of valid xids is costly, so we allow gaps to ! * form in the sequence so we can avoid compressing the contents after each ! * removal. Gaps are represented by tracking the validity of xids in a ! * separate array of booleans. ! * ! * If we have a maximum of M slots, with N xids currently spread across ! * S elements then we have N <= S <= M always. ! * ! * Removing xids from the array requires ExclusiveLock for the usual ! * transactional reasons. That allows us to left trim the list while only ! * holding ProcArrayLock in Shared mode by atomically updating the tail, ! * which is useful since snapshots scan the array from tail to head. ! * Trimming is useful since older xids are naturally removed from the tail ! * and in a steady state this works to keep S small, so snapshots ! * naturally reduce their own future costs. ! * ! * Keeping track of both head and tail allows a simple binary search for ! * both searching and removing. Since the array never wraps, we have ! * head >= tail always. When xids wrap we can continue to use this ! * technique though this only works because the number of xids we ! * store is less than half the maximum number of xids, so we only get ! * a single wrap. That is different from general user indexes where ! * bsearch is not possible because the number of rows is not sufficiently ! * limited. ! * ! * Algorithmic analysis ! * * Adding an xid is O(1) and is lock free (except when compressing) ! * * Removing an xid is O(logS) ! * * Taking snapshots is O(S) and is similar to normal running ! * * Pruning is mostly O(1) ! * * Locating an xid is essentially same as Removing ! * ! * In comparison, using a hash table for KnownAssignedXids would mean that ! * taking snapshots would be O(M). If we can maintain S << M then the ! * sorted array technique will deliver significantly faster snapshots. ! * If we try to keep S too small then we increase cost of removal ! * dramatically, so there is an optimal point for any workload mix. We ! * use a heuristic to decide when to compress the array, though trimming ! * also helps reduce frequency of compressing. ! * ! * procArray->tailKnownAssignedXids points to first currently valid xid ! * procArray->headKnownAssignedXids points to next xid insertion point, ! * which may be one off to right of array ! * ! * With carefully chosen compression we can also say that xids never move ! * right and that compression always maintains the sorted order. ! * Compression requires ProcArrayLock in Exclusive mode. */ + #define RESULT_NOT_FOUND -1 + + /* + * Compress KnownAssignedXids by removing any gaps in virtual array. + * + * Must be called while holding ProcArrayLock in Exclusive mode. + */ + static void + KnownAssignedXidsCompress(bool full) + { + int head = procArray->headKnownAssignedXids; + int tail = procArray->tailKnownAssignedXids; + int nelements = head - tail; + int compress_index; + int i; + + if (!full) + { + /* + * If we can choose how much to compress, use a heuristic to + * avoid compressing too often or not often enough. + * + * Heuristic is if we have a large enough current spread and + * less than 50% of the elements are currently in use, then + * compress. This should ensure we compress fairly infrequently. + * We could compress less often though the virtual array would + * spread out more and snapshots would become more expensive. + */ + if (nelements < 4 * PROCARRAY_MAXPROCS || + nelements < 2 * procArray->numKnownAssignedXids) + return; + } + + /* + * We compress the array by reading the valid values from tail + * to head, re-aligning array to 0th element. Note that we don't + * worry about zero-ing out the higher parts of the main or + * validity array since those will be overwritten when adding xids. + */ + compress_index = 0; + for (i = tail; i < head; i++) + { + if (KnownAssignedXidsValid[i]) + { + if (i != compress_index) + { + KnownAssignedXids[compress_index] = KnownAssignedXids[i]; + KnownAssignedXidsValid[compress_index] = true; + } + compress_index++; + } + } + procArray->tailKnownAssignedXids = 0; + procArray->headKnownAssignedXids = compress_index; + KnownAssignedXidsValid[compress_index] = false; + } /* * Add xids into KnownAssignedXids. *************** *** 2356,2413 **** ExpireOldKnownAssignedTransactionIds(TransactionId xid) * Must be called while holding ProcArrayLock in Exclusive mode */ static void ! KnownAssignedXidsAdd(TransactionId *xids, int nxids) { ! TransactionId *result; ! bool found; int i; for (i = 0; i < nxids; i++) { ! Assert(TransactionIdIsValid(xids[i])); ! elog(trace_recovery(DEBUG4), "adding KnownAssignedXid %u", xids[i]); ! procArray->numKnownAssignedXids++; ! if (procArray->numKnownAssignedXids > procArray->maxKnownAssignedXids) ! { ! KnownAssignedXidsDisplay(LOG); ! LWLockRelease(ProcArrayLock); ! elog(ERROR, "too many KnownAssignedXids (%u)", procArray->maxKnownAssignedXids); ! } ! result = (TransactionId *) hash_search(KnownAssignedXidsHash, &xids[i], HASH_ENTER, ! &found); ! if (!result) { ! LWLockRelease(ProcArrayLock); ! ereport(ERROR, ! (errcode(ERRCODE_OUT_OF_MEMORY), ! errmsg("out of shared memory"))); } ! if (found) { ! KnownAssignedXidsDisplay(LOG); ! LWLockRelease(ProcArrayLock); ! elog(ERROR, "found duplicate KnownAssignedXid %u", xids[i]); } } } - /* - * Is an xid present in KnownAssignedXids? - * - * Must be called while holding ProcArrayLock in shared mode - */ static bool KnownAssignedXidsExist(TransactionId xid) { ! bool found; ! (void) hash_search(KnownAssignedXidsHash, &xid, HASH_FIND, &found); ! return found; } /* --- 2438,2568 ---- * Must be called while holding ProcArrayLock in Exclusive mode */ static void ! KnownAssignedXidsAdd(TransactionId from_xid, TransactionId to_xid, bool have_lock) { ! TransactionId next_xid; ! int head = procArray->headKnownAssignedXids; ! int tail = procArray->tailKnownAssignedXids; ! int nxids = 1; int i; + Assert(head >= 0 && head <= procArray->maxKnownAssignedXids); + Assert(tail >= 0 && tail < procArray->maxKnownAssignedXids); + Assert(TransactionIdPrecedesOrEquals(from_xid, to_xid)); + + /* + * Calculate how many slots we need so we can test for overflow. + */ + if (to_xid >= from_xid) + nxids = to_xid - from_xid + 1; + else + { + next_xid = from_xid; + while (TransactionIdPrecedes(next_xid, to_xid)) + { + nxids++; + TransactionIdAdvance(next_xid); + } + } + + /* + * If our xids won't fit easily then grab the lock and compress + */ + if (head + nxids > procArray->maxKnownAssignedXids) + { + if (!have_lock) + LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); + KnownAssignedXidsCompress(true); + head = procArray->headKnownAssignedXids; + tail = procArray->tailKnownAssignedXids; + if (!have_lock) + LWLockRelease(ProcArrayLock); + } + + /* + * If we still won't fit then we're out of memory + */ + if (head + nxids > procArray->maxKnownAssignedXids) + elog(ERROR, "too many KnownAssignedXids"); + + next_xid = from_xid; for (i = 0; i < nxids; i++) { ! /* Insert the xid at head, since it already ! * points at the next insertion point ! */ ! KnownAssignedXids[head] = next_xid; ! KnownAssignedXidsValid[head] = true; ! /* increment next_xid */ ! TransactionIdAdvance(next_xid); ! head++; ! } ! /* ! * Set the new head of the ProcArray last, so it is atomic ! */ ! procArray->numKnownAssignedXids += nxids; ! procArray->headKnownAssignedXids = head; ! } ! /* ! * KnownAssignedXidsSearch ! * ! * Searches KnownAssignedXids for a specific xid and optionally removes it. ! * ! * Must be called while holding ProcArrayLock in shared or exclusive mode. ! * Exclusive lock must be held for remove = true ! * ! * Use binary search to locate value, then test validity. Callers expect the ! * value to be present so we don't bother to test against head and tail. ! */ ! static bool ! KnownAssignedXidsSearch(TransactionId xid, bool remove) ! { ! int first = procArray->tailKnownAssignedXids; ! int last = procArray->headKnownAssignedXids - 1; ! int result_index = RESULT_NOT_FOUND; ! int mid_index; ! TransactionId mid_xid; ! ! while (first <= last) ! { ! mid_index = first + (last - first) / 2; ! mid_xid = KnownAssignedXids[mid_index]; ! if (xid == mid_xid) { ! result_index = mid_index; ! break; } + else if (TransactionIdPrecedes(xid, mid_xid)) + last = mid_index - 1; + else + first = mid_index + 1; + } ! if (result_index == RESULT_NOT_FOUND) ! return false; ! else ! { ! if (KnownAssignedXidsValid[result_index]) { ! if (remove) ! KnownAssignedXidsValid[result_index] = false; ! return true; } + else + return false; } } static bool KnownAssignedXidsExist(TransactionId xid) { ! Assert(TransactionIdIsValid(xid)); ! return KnownAssignedXidsSearch(xid, false); } /* *************** *** 2418,2448 **** KnownAssignedXidsExist(TransactionId xid) static void KnownAssignedXidsRemove(TransactionId xid) { - bool found; - Assert(TransactionIdIsValid(xid)); elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); ! (void) hash_search(KnownAssignedXidsHash, &xid, HASH_REMOVE, &found); ! ! if (found) procArray->numKnownAssignedXids--; ! Assert(procArray->numKnownAssignedXids >= 0); /* * We can fail to find an xid if the xid came from a subtransaction that * aborts, though the xid hadn't yet been reported and no WAL records have * been written using the subxid. In that case the abort record will * contain that subxid and we haven't seen it before. - * - * If we fail to find it for other reasons it might be a problem, but it - * isn't much use to log that it happened, since we can't divine much from - * just an isolated xid value. */ } /* * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids. * We filter out anything higher than xmax. * --- 2573,2640 ---- static void KnownAssignedXidsRemove(TransactionId xid) { Assert(TransactionIdIsValid(xid)); elog(trace_recovery(DEBUG4), "remove KnownAssignedXid %u", xid); ! if (KnownAssignedXidsSearch(xid, true)) procArray->numKnownAssignedXids--; ! else ! { ! int i; ! bool found = false; ! ! #ifdef USE_ASSERT_CHECKING ! for (i = 0; i < procArray->maxKnownAssignedXids; i++) ! { ! if (KnownAssignedXids[i] == xid && KnownAssignedXidsValid[i]) ! { ! KnownAssignedXidsDisplay(LOG); ! elog(ERROR, "found error in search for xid=%u", xid); ! found = true; ! break; ! } ! } ! #endif ! if (!found && XLogConsistentState()) ! elog(ERROR, "remove could not locate xid=%u", xid); ! } ! ! if (procArray->numKnownAssignedXids < 0) ! { ! KnownAssignedXidsDisplay(LOG); ! Assert(procArray->numKnownAssignedXids >= 0); ! } /* * We can fail to find an xid if the xid came from a subtransaction that * aborts, though the xid hadn't yet been reported and no WAL records have * been written using the subxid. In that case the abort record will * contain that subxid and we haven't seen it before. */ } /* + * KnownAssignedXidsRemoveTree + * + * Must be called while holding ProcArrayLock in exclusive mode. + */ + static void + KnownAssignedXidsRemoveTree(TransactionId xid, int nsubxids, + TransactionId *subxids) + { + int i; + + if (TransactionIdIsValid(xid)) + KnownAssignedXidsRemove(xid); + + for (i = 0; i < nsubxids; i++) + KnownAssignedXidsRemove(subxids[i]); + + KnownAssignedXidsCompress(false); + } + + /* * KnownAssignedXidsGet - Get an array of xids by scanning KnownAssignedXids. * We filter out anything higher than xmax. * *************** *** 2460,2491 **** KnownAssignedXidsGet(TransactionId *xarray, TransactionId xmax) * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus we reduce *xmin * to the lowest xid value seen if not already lower. * ! * Must be called while holding ProcArrayLock (in shared mode) */ static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax) { ! HASH_SEQ_STATUS status; ! TransactionId *knownXid; int count = 0; ! hash_seq_init(&status, KnownAssignedXidsHash); ! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL) { ! /* ! * Filter out anything higher than xmax ! */ ! if (TransactionIdPrecedes(xmax, *knownXid)) ! continue; ! *xarray = *knownXid; ! xarray++; ! count++; ! /* update xmin if required */ ! if (TransactionIdPrecedes(*knownXid, *xmin)) ! *xmin = *knownXid; } return count; --- 2652,2728 ---- * KnownAssignedXidsGetAndSetXmin - as KnownAssignedXidsGet, plus we reduce *xmin * to the lowest xid value seen if not already lower. * ! * Must be called while holding ProcArrayLock, in shared mode or higher. */ static int KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, TransactionId xmax) { ! TransactionId knownXid; int count = 0; + bool can_move_tail = true; + int head, i, tail, newtail; + + Assert(TransactionIdIsValid(xmax)); + + /* + * Fetch the tail just once, since it may change while we loop. + */ + tail = procArray->tailKnownAssignedXids; + newtail = tail; + + /* + * Fetch the head just once, since it may change while we loop. + * We need only go as far as head at the start of the loop, since + * we are certain that an xid cannot enter and then leave the + * array without us knowing, so if we skip a few higher xids they + * will look like they were running anyway. + */ + head = procArray->headKnownAssignedXids; ! for (i = tail; i < head; i++) { ! /* Skip any gaps in the array */ ! if (KnownAssignedXidsValid[i]) ! { ! knownXid = KnownAssignedXids[i]; ! /* Update xmin if required */ ! if (count == 0 && ! TransactionIdPrecedes(knownXid, *xmin)) ! *xmin = knownXid; ! ! /* ! * Filter out anything higher than xmax, relying on ! * sorted property of array. ! */ ! if (TransactionIdPrecedes(xmax, knownXid)) ! break; ! ! /* Add knownXid onto output array */ ! *xarray = knownXid; ! xarray++; ! count++; ! can_move_tail = false; ! } ! else if (can_move_tail) ! newtail = i; ! } ! /* ! * If we can simply prune the list, then do so. ! */ ! if (newtail != tail) ! { ! /* ! * Move the tail of the buffer to the leftmost valid xid. ! * This action happens atomically, so no problem with locking. ! * It is possible that someone else has already done this, but ! * it's not possible that they moved it to a different place than ! * we will do because xids can only be removed while holding ! * ExclusiveLock, so that cannot be occurring concurrently. ! */ ! procArray->tailKnownAssignedXids = newtail; } return count; *************** *** 2498,2533 **** KnownAssignedXidsGetAndSetXmin(TransactionId *xarray, TransactionId *xmin, * Must be called while holding ProcArrayLock in Exclusive mode. */ static void ! KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts) { ! TransactionId *knownXid; ! HASH_SEQ_STATUS status; ! if (TransactionIdIsValid(xid)) ! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", xid); ! else elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids"); ! hash_seq_init(&status, KnownAssignedXidsHash); ! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL) ! { ! TransactionId removeXid = *knownXid; ! bool found; ! if (!TransactionIdIsValid(xid) || TransactionIdPrecedes(removeXid, xid)) { ! if (keepPreparedXacts && StandbyTransactionIdIsPrepared(removeXid)) ! continue; ! else { ! (void) hash_search(KnownAssignedXidsHash, &removeXid, ! HASH_REMOVE, &found); ! if (found) ! procArray->numKnownAssignedXids--; ! Assert(procArray->numKnownAssignedXids >= 0); } } } } /* --- 2735,2781 ---- * Must be called while holding ProcArrayLock in Exclusive mode. */ static void ! KnownAssignedXidsRemoveMany(TransactionId removeXid) { ! TransactionId knownXid; ! int count = 0; ! int head, tail, i; ! if (!TransactionIdIsValid(removeXid)) ! { elog(trace_recovery(DEBUG4), "removing all KnownAssignedXids"); + procArray->numKnownAssignedXids = 0; + procArray->headKnownAssignedXids = procArray->tailKnownAssignedXids = 0; + return; + } ! Assert(TransactionIdIsValid(removeXid)); ! elog(trace_recovery(DEBUG4), "prune KnownAssignedXids to %u", removeXid); ! ! tail = procArray->tailKnownAssignedXids; ! head = procArray->headKnownAssignedXids; ! for (i = tail; i < head; i++) ! { ! if (KnownAssignedXidsValid[i]) { ! knownXid = KnownAssignedXids[i]; ! ! if (TransactionIdFollowsOrEquals(knownXid, removeXid)) ! break; ! ! if (!StandbyTransactionIdIsPrepared(knownXid)) { ! KnownAssignedXidsValid[i] = false; ! count++; } } } + + procArray->numKnownAssignedXids -= count; + Assert(procArray->numKnownAssignedXids >= 0); + + KnownAssignedXidsCompress(false); } /* *************** *** 2538,2565 **** KnownAssignedXidsRemoveMany(TransactionId xid, bool keepPreparedXacts) static void KnownAssignedXidsDisplay(int trace_level) { ! HASH_SEQ_STATUS status; ! TransactionId *knownXid; ! StringInfoData buf; ! TransactionId *xids; ! int nxids; ! int i; ! ! xids = palloc(sizeof(TransactionId) * TOTAL_MAX_CACHED_SUBXIDS); ! nxids = 0; ! ! hash_seq_init(&status, KnownAssignedXidsHash); ! while ((knownXid = (TransactionId *) hash_seq_search(&status)) != NULL) ! xids[nxids++] = *knownXid; ! qsort(xids, nxids, sizeof(TransactionId), xidComparator); initStringInfo(&buf); ! for (i = 0; i < nxids; i++) ! appendStringInfo(&buf, "%u ", xids[i]); ! elog(trace_level, "%d KnownAssignedXids %s", nxids, buf.data); pfree(buf.data); } --- 2786,2815 ---- static void KnownAssignedXidsDisplay(int trace_level) { ! StringInfoData buf; ! int head, tail, i; ! int nxids = 0; ! tail = procArray->tailKnownAssignedXids; ! head = procArray->headKnownAssignedXids; initStringInfo(&buf); ! for (i = tail; i < head; i++) ! { ! if (KnownAssignedXidsValid[i]) ! { ! nxids++; ! appendStringInfo(&buf, "[%u]=%u ", i, KnownAssignedXids[i]); ! } ! } ! elog(trace_level, "%d KnownAssignedXids (num=%u tail=%u head=%u) %s", ! nxids, ! procArray->numKnownAssignedXids, ! procArray->tailKnownAssignedXids, ! procArray->headKnownAssignedXids, ! buf.data); pfree(buf.data); } *** a/src/include/access/xlog.h --- b/src/include/access/xlog.h *************** *** 278,283 **** extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec); --- 278,284 ---- extern void issue_xlog_fsync(int fd, uint32 log, uint32 seg); + extern bool XLogConsistentState(void); extern bool RecoveryInProgress(void); extern bool XLogInsertAllowed(void); extern TimestampTz GetLatestXLogTime(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers