Here's a revamped version of this patch. One thing I didn't do here is
revert the exporting of CreateMultiXactId, but I don't see any way to
avoid that.
Andres mentioned the idea of sharing some code between
heap_prepare_freeze_tuple and heap_tuple_needs_freeze, but I haven't
explored that.
--
Álvaro Herrera http://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 5238,5251 **** heap_inplace_update(Relation relation, HeapTuple tuple)
CacheInvalidateHeapTuple(relation, tuple, NULL);
}
/*
! * heap_freeze_tuple
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
! * are older than the specified cutoff XID. If so, replace them with
! * FrozenTransactionId or InvalidTransactionId as appropriate, and return
! * TRUE. Return FALSE if nothing was changed.
*
* It is assumed that the caller has checked the tuple with
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
--- 5238,5448 ----
CacheInvalidateHeapTuple(relation, tuple, NULL);
}
+ #define FRM_NOOP 0x0001
+ #define FRM_INVALIDATE_XMAX 0x0002
+ #define FRM_RETURN_IS_XID 0x0004
+ #define FRM_RETURN_IS_MULTI 0x0008
+ #define FRM_MARK_COMMITTED 0x0010
/*
! * FreezeMultiXactId
! * Determine what to do during freezing when a tuple is marked by a
! * MultiXactId.
! *
! * "flags" is an output value; it's used to tell caller what to do on return.
! *
! * Possible flags are:
! * FRM_NOOP
! * don't do anything -- keep existing Xmax
! * FRM_INVALIDATE_XMAX
! * mark Xmax as InvalidTransactionId and set XMAX_INVALID flag.
! * FRM_RETURN_IS_XID
! * The Xid return value is a single update Xid to set as xmax.
! * FRM_MARK_COMMITTED
! * Xmax can be marked as HEAP_XMAX_COMMITTED
! * FRM_RETURN_IS_MULTI
! * The return value is a new MultiXactId to set as new Xmax.
! * (caller must obtain proper infomask bits using GetMultiXactIdHintBits)
! */
! static TransactionId
! FreezeMultiXactId(MultiXactId multi, uint16 t_infomask,
! TransactionId cutoff_xid, MultiXactId cutoff_multi,
! uint16 *flags)
! {
! TransactionId xid = InvalidTransactionId;
! int i;
! MultiXactMember *members;
! int nmembers;
! bool need_replace;
! int nnewmembers;
! MultiXactMember *newmembers;
! bool has_lockers;
! TransactionId update_xid;
! bool update_committed;
!
! *flags = 0;
!
! if (!MultiXactIdIsValid(multi))
! {
! /* Ensure infomask bits are appropriately set/reset */
! *flags |= FRM_INVALIDATE_XMAX;
! return InvalidTransactionId;
! }
! else if (MultiXactIdPrecedes(multi, cutoff_multi))
! {
! /*
! * This old multi cannot possibly have members still running. If it
! * was a locker only, it can be removed without any further
! * consideration; but if it contained an update, we might need to
! * preserve it.
! */
! if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))
! {
! *flags |= FRM_INVALIDATE_XMAX;
! return InvalidTransactionId;
! }
! else
! {
! /* replace multi by update xid */
! xid = MultiXactIdGetUpdateXid(multi, t_infomask);
!
! /* wasn't only a lock, xid needs to be valid */
! Assert(TransactionIdIsValid(xid));
!
! /*
! * If the xid is older than the cutoff, it has to have aborted,
! * otherwise the tuple would have gotten pruned away.
! */
! if (TransactionIdPrecedes(xid, cutoff_xid))
! {
! Assert(!TransactionIdDidCommit(xid));
! *flags |= FRM_INVALIDATE_XMAX;
! /* xid = InvalidTransactionId; */
! }
! else
! {
! *flags |= FRM_RETURN_IS_XID;
! }
! }
! }
!
! /*
! * This multixact might have or might not have members still running,
! * but we know it's valid and is newer than the cutoff point for
! * multis. However, some member(s) of it may be below the cutoff for
! * Xids, so we need to walk the whole members array to figure out what
! * to do, if anything.
! */
!
! nmembers = GetMultiXactIdMembers(multi, &members, false);
! if (nmembers <= 0)
! {
! /* Nothing worth keeping */
! *flags |= FRM_INVALIDATE_XMAX;
! return InvalidTransactionId;
! }
!
! /* is there anything older than the cutoff? */
! need_replace = false;
! for (i = 0; i < nmembers; i++)
! {
! if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
! {
! need_replace = true;
! break;
! }
! }
!
! /*
! * In the simplest case, there is no member older than the cutoff; we can
! * keep the existing MultiXactId as is.
! */
! if (!need_replace)
! {
! *flags |= FRM_NOOP;
! pfree(members);
! return InvalidTransactionId;
! }
!
! /*
! * If the multi needs to be updated, figure out which members do we need
! * to keep.
! */
! nnewmembers = 0;
! newmembers = palloc(sizeof(MultiXactMember) * nmembers);
! has_lockers = false;
! update_xid = InvalidTransactionId;
! update_committed = false;
!
! for (i = 0; i < nmembers; i++)
! {
! if (ISUPDATE_from_mxstatus(members[i].status) &&
! !TransactionIdDidAbort(members[i].xid))
! {
! /* if it's an update, we must keep unless it aborted */
! newmembers[nnewmembers++] = members[i];
! Assert(!TransactionIdIsValid(update_xid));
! update_xid = members[i].xid;
! /* tell caller to set hint while we have the Xid in cache */
! if (TransactionIdDidCommit(update_xid))
! update_committed = true;
! }
!
! /* We only keep lockers if they are still running */
! if (TransactionIdIsCurrentTransactionId(members[i].xid) ||
! TransactionIdIsInProgress(members[i].xid))
! {
! newmembers[nnewmembers++] = members[i];
! has_lockers = true;
! }
! }
!
! pfree(members);
!
! if (nnewmembers == 0)
! {
! /* nothing worth keeping!? Tell caller to remove the whole thing */
! *flags |= FRM_INVALIDATE_XMAX;
! xid = InvalidTransactionId;
! }
! else if (TransactionIdIsValid(update_xid) && !has_lockers)
! {
! /*
! * If there's a single member and it's an update, pass it back alone
! * without creating a new Multi. (XXX we could do this when there's a
! * single remaining locker, too, but that would complicate the API too
! * much; moreover, the case with the single updater is more
! * interesting, because those are longer-lived.)
! */
! Assert(nnewmembers == 1);
! *flags |= FRM_RETURN_IS_XID;
! if (update_committed)
! *flags |= FRM_MARK_COMMITTED;
! xid = update_xid;
! }
! else
! {
! /* Note this is WAL-logged */
! xid = CreateMultiXactId(nnewmembers, newmembers);
! *flags |= FRM_RETURN_IS_MULTI;
! }
!
! pfree(newmembers);
!
! return xid;
! }
!
! /*
! * heap_prepare_freeze_tuple
*
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
! * are older than the specified cutoff XID and cutoff MultiXactId. If so,
! * setup enough state (in the *frz output argument) to later execute and
! * WAL-log what we would need to do, and return TRUE. Return FALSE if nothing
! * is to be changed.
! *
! * Caller is responsible for setting the offset field, if appropriate. This
! * is only necessary if the freeze is to be WAL-logged.
*
* It is assumed that the caller has checked the tuple with
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
***************
*** 5254,5307 **** heap_inplace_update(Relation relation, HeapTuple tuple)
* NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
* XID older than it could neither be running nor seen as running by any
* open transaction. This ensures that the replacement will not change
! * anyone's idea of the tuple state. Also, since we assume the tuple is
! * not HEAPTUPLE_DEAD, the fact that an XID is not still running allows us
! * to assume that it is either committed good or aborted, as appropriate;
! * so we need no external state checks to decide what to do. (This is good
! * because this function is applied during WAL recovery, when we don't have
! * access to any such state, and can't depend on the hint bits to be set.)
! * There is an exception we make which is to assume GetMultiXactIdMembers can
! * be called during recovery.
! *
* Similarly, cutoff_multi must be less than or equal to the smallest
* MultiXactId used by any transaction currently open.
*
* If the tuple is in a shared buffer, caller must hold an exclusive lock on
* that buffer.
*
! * Note: it might seem we could make the changes without exclusive lock, since
! * TransactionId read/write is assumed atomic anyway. However there is a race
! * condition: someone who just fetched an old XID that we overwrite here could
! * conceivably not finish checking the XID against pg_clog before we finish
! * the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
! * exclusive lock ensures no other backend is in process of checking the
! * tuple status. Also, getting exclusive lock makes it safe to adjust the
! * infomask bits.
! *
! * NB: Cannot rely on hint bits here, they might not be set after a crash or
! * on a standby.
*/
bool
! heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
! MultiXactId cutoff_multi)
{
bool changed = false;
bool freeze_xmax = false;
TransactionId xid;
/* Process xmin */
xid = HeapTupleHeaderGetXmin(tuple);
if (TransactionIdIsNormal(xid) &&
TransactionIdPrecedes(xid, cutoff_xid))
{
! HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
!
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
* already be set here, but there's a small chance not.
*/
! Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
! tuple->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
--- 5451,5492 ----
* NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
* XID older than it could neither be running nor seen as running by any
* open transaction. This ensures that the replacement will not change
! * anyone's idea of the tuple state.
* Similarly, cutoff_multi must be less than or equal to the smallest
* MultiXactId used by any transaction currently open.
*
* If the tuple is in a shared buffer, caller must hold an exclusive lock on
* that buffer.
*
! * NB: It is not enough to set hint bits to indicate something is
! * committed/invalid -- they might not be set on a standby, or after crash
! * recovery. We really need to remove old xids.
*/
bool
! heap_prepare_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
! TransactionId cutoff_multi, xl_heap_freeze_tuple *frz)
!
{
bool changed = false;
bool freeze_xmax = false;
TransactionId xid;
+ frz->frzflags = 0;
+ frz->t_infomask2 = tuple->t_infomask2;
+ frz->t_infomask = tuple->t_infomask;
+ frz->xmax = HeapTupleHeaderGetRawXmax(tuple);
+
/* Process xmin */
xid = HeapTupleHeaderGetXmin(tuple);
if (TransactionIdIsNormal(xid) &&
TransactionIdPrecedes(xid, cutoff_xid))
{
! frz->frzflags |= XLH_FREEZE_XMIN;
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
* already be set here, but there's a small chance not.
*/
! frz->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
***************
*** 5318,5408 **** heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
! if (!MultiXactIdIsValid(xid))
! {
! /* no xmax set, ignore */
! ;
! }
! else if (MultiXactIdPrecedes(xid, cutoff_multi))
! {
! /*
! * This old multi cannot possibly be running. If it was a locker
! * only, it can be removed without much further thought; but if it
! * contained an update, we need to preserve it.
! */
! if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
! freeze_xmax = true;
! else
! {
! TransactionId update_xid;
! update_xid = HeapTupleGetUpdateXid(tuple);
! /*
! * The multixact has an update hidden within. Get rid of it.
! *
! * If the update_xid is below the cutoff_xid, it necessarily
! * must be an aborted transaction. In a primary server, such
! * an Xmax would have gotten marked invalid by
! * HeapTupleSatisfiesVacuum, but in a replica that is not
! * called before we are, so deal with it in the same way.
! *
! * If not below the cutoff_xid, then the tuple would have been
! * pruned by vacuum, if the update committed long enough ago,
! * and we wouldn't be freezing it; so it's either recently
! * committed, or in-progress. Deal with this by setting the
! * Xmax to the update Xid directly and remove the IS_MULTI
! * bit. (We know there cannot be running lockers in this
! * multi, because it's below the cutoff_multi value.)
! */
!
! if (TransactionIdPrecedes(update_xid, cutoff_xid))
! {
! Assert(InRecovery || TransactionIdDidAbort(update_xid));
! freeze_xmax = true;
! }
! else
! {
! Assert(InRecovery || !TransactionIdIsInProgress(update_xid));
! tuple->t_infomask &= ~HEAP_XMAX_BITS;
! HeapTupleHeaderSetXmax(tuple, update_xid);
! changed = true;
! }
! }
! }
! else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
{
! /* newer than the cutoff, so don't touch it */
;
}
! else
{
! TransactionId update_xid;
! /*
! * This is a multixact which is not marked LOCK_ONLY, but which
! * is newer than the cutoff_multi. If the update_xid is below the
! * cutoff_xid point, then we can just freeze the Xmax in the
! * tuple, removing it altogether. This seems simple, but there
! * are several underlying assumptions:
! *
! * 1. A tuple marked by an multixact containing a very old
! * committed update Xid would have been pruned away by vacuum; we
! * wouldn't be freezing this tuple at all.
! *
! * 2. There cannot possibly be any live locking members remaining
! * in the multixact. This is because if they were alive, the
! * update's Xid would had been considered, via the lockers'
! * snapshot's Xmin, as part the cutoff_xid.
! *
! * 3. We don't create new MultiXacts via MultiXactIdExpand() that
! * include a very old aborted update Xid: in that function we only
! * include update Xids corresponding to transactions that are
! * committed or in-progress.
! */
! update_xid = HeapTupleGetUpdateXid(tuple);
! if (TransactionIdPrecedes(update_xid, cutoff_xid))
! freeze_xmax = true;
}
}
else if (TransactionIdIsNormal(xid) &&
--- 5503,5536 ----
if (tuple->t_infomask & HEAP_XMAX_IS_MULTI)
{
! TransactionId newxmax;
! uint16 flags;
! newxmax = FreezeMultiXactId(xid, tuple->t_infomask,
! cutoff_xid, cutoff_multi, &flags);
! if (flags & FRM_NOOP)
{
! /* nothing to do in this case */
;
}
! if (flags & FRM_INVALIDATE_XMAX)
! freeze_xmax = true;
! else if (flags & FRM_RETURN_IS_XID)
{
! frz->t_infomask &= ~HEAP_XMAX_BITS;
! frz->xmax = newxmax;
! if (flags & FRM_MARK_COMMITTED)
! frz->t_infomask &= HEAP_XMAX_COMMITTED;
! }
! else if (flags & FRM_RETURN_IS_MULTI)
! {
! frz->t_infomask &= ~HEAP_XMAX_BITS;
! frz->xmax = newxmax;
! GetMultiXactIdHintBits(newxmax,
! &frz->t_infomask,
! &frz->t_infomask2);
}
}
else if (TransactionIdIsNormal(xid) &&
***************
*** 5413,5429 **** heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
if (freeze_xmax)
{
! HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
/*
* The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
* LOCKED. Normalize to INVALID just to be sure no one gets confused.
* Also get rid of the HEAP_KEYS_UPDATED bit.
*/
! tuple->t_infomask &= ~HEAP_XMAX_BITS;
! tuple->t_infomask |= HEAP_XMAX_INVALID;
! HeapTupleHeaderClearHotUpdated(tuple);
! tuple->t_infomask2 &= ~HEAP_KEYS_UPDATED;
changed = true;
}
--- 5541,5557 ----
if (freeze_xmax)
{
! frz->xmax = InvalidTransactionId;
/*
* The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
* LOCKED. Normalize to INVALID just to be sure no one gets confused.
* Also get rid of the HEAP_KEYS_UPDATED bit.
*/
! frz->t_infomask &= ~HEAP_XMAX_BITS;
! frz->t_infomask |= HEAP_XMAX_INVALID;
! frz->t_infomask2 &= ~HEAP_HOT_UPDATED;
! frz->t_infomask2 &= ~HEAP_KEYS_UPDATED;
changed = true;
}
***************
*** 5443,5458 **** heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
* xvac transaction succeeded.
*/
if (tuple->t_infomask & HEAP_MOVED_OFF)
! HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
else
! HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED
* will already be set here, but there's a small chance not.
*/
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
! tuple->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
}
--- 5571,5586 ----
* xvac transaction succeeded.
*/
if (tuple->t_infomask & HEAP_MOVED_OFF)
! frz->frzflags |= XLH_FREEZE_XVAC;
else
! frz->frzflags |= XLH_INVALID_XVAC;
/*
* Might as well fix the hint bits too; usually XMIN_COMMITTED
* will already be set here, but there's a small chance not.
*/
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
! frz->t_infomask |= HEAP_XMIN_COMMITTED;
changed = true;
}
}
***************
*** 5461,5466 **** heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
--- 5589,5656 ----
}
/*
+ * heap_execute_freeze_tuple
+ * Execute the prepared freezing of a tuple.
+ *
+ * Caller is responsible for ensuring that no other backend can access the
+ * storage underlying this tuple, either by holding an exclusive lock on the
+ * buffer containing it (which is what lazy VACUUM does), or by having it by
+ * in private storage (which is what CLUSTER and friends do).
+ *
+ * Note: it might seem we could make the changes without exclusive lock, since
+ * TransactionId read/write is assumed atomic anyway. However there is a race
+ * condition: someone who just fetched an old XID that we overwrite here could
+ * conceivably not finish checking the XID against pg_clog before we finish
+ * the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
+ * exclusive lock ensures no other backend is in process of checking the
+ * tuple status. Also, getting exclusive lock makes it safe to adjust the
+ * infomask bits.
+ *
+ * NB: All code in here must be safe to execute during crash recovery!
+ */
+ void
+ heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *frz)
+ {
+ if (frz->frzflags & XLH_FREEZE_XMIN)
+ HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
+
+ HeapTupleHeaderSetXmax(tuple, frz->xmax);
+
+ if (frz->frzflags & XLH_FREEZE_XVAC)
+ HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
+
+ if (frz->frzflags & XLH_INVALID_XVAC)
+ HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+
+ tuple->t_infomask = frz->t_infomask;
+ tuple->t_infomask2 = frz->t_infomask2;
+ }
+
+ /*
+ * heap_freeze_tuple - freeze tuple inplace without WAL logging.
+ *
+ * Useful for callers like CLUSTER that perform their own WAL logging.
+ */
+ bool
+ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ TransactionId cutoff_multi)
+ {
+ xl_heap_freeze_tuple frz;
+ bool do_freeze;
+
+ do_freeze = heap_prepare_freeze_tuple(tuple, cutoff_xid, cutoff_multi, &frz);
+
+ /*
+ * Note that because this is not a WAL-logged operation, we don't need
+ * to fill in the offset in the freeze record.
+ */
+
+ if (do_freeze)
+ heap_execute_freeze_tuple(tuple, &frz);
+ return do_freeze;
+ }
+
+ /*
* For a given MultiXactId, return the hint bits that should be set in the
* tuple's infomask.
*
***************
*** 5763,5778 **** heap_tuple_needs_freeze(HeapTupleHeader tuple, TransactionId cutoff_xid,
}
else if (MultiXactIdPrecedes(multi, cutoff_multi))
return true;
- else if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask))
- {
- /* only-locker multis don't need internal examination */
- ;
- }
else
{
! if (TransactionIdPrecedes(HeapTupleGetUpdateXid(tuple),
! cutoff_xid))
! return true;
}
}
else
--- 5953,5978 ----
}
else if (MultiXactIdPrecedes(multi, cutoff_multi))
return true;
else
{
! MultiXactMember *members;
! int nmembers;
! int i;
!
! /* need to check whether any member of the mxact is too old */
!
! nmembers = GetMultiXactIdMembers(multi, &members, false);
!
! for (i = 0; i < nmembers; i++)
! {
! if (TransactionIdPrecedes(members[i].xid, cutoff_xid))
! {
! pfree(members);
! return true;
! }
! }
! if (nmembers > 0)
! pfree(members);
}
}
else
***************
*** 6022,6048 **** log_heap_clean(Relation reln, Buffer buffer,
}
/*
! * Perform XLogInsert for a heap-freeze operation. Caller must already
! * have modified the buffer and marked it dirty.
*/
XLogRecPtr
! log_heap_freeze(Relation reln, Buffer buffer,
! TransactionId cutoff_xid, MultiXactId cutoff_multi,
! OffsetNumber *offsets, int offcnt)
{
! xl_heap_freeze xlrec;
XLogRecPtr recptr;
XLogRecData rdata[2];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
/* nor when there are no tuples to freeze */
! Assert(offcnt > 0);
xlrec.node = reln->rd_node;
xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
! xlrec.cutoff_multi = cutoff_multi;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapFreeze;
--- 6222,6247 ----
}
/*
! * Perform XLogInsert for a heap-freeze operation. Caller must have already
! * modified the buffer and marked it dirty.
*/
XLogRecPtr
! log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
! xl_heap_freeze_tuple *tuples, int ntuples)
{
! xl_heap_freeze_page xlrec;
XLogRecPtr recptr;
XLogRecData rdata[2];
/* Caller should not call me on a non-WAL-logged relation */
Assert(RelationNeedsWAL(reln));
/* nor when there are no tuples to freeze */
! Assert(ntuples > 0);
xlrec.node = reln->rd_node;
xlrec.block = BufferGetBlockNumber(buffer);
xlrec.cutoff_xid = cutoff_xid;
! xlrec.ntuples = ntuples;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapFreeze;
***************
*** 6050,6066 **** log_heap_freeze(Relation reln, Buffer buffer,
rdata[0].next = &(rdata[1]);
/*
! * The tuple-offsets array is not actually in the buffer, but pretend that
! * it is. When XLogInsert stores the whole buffer, the offsets array need
* not be stored too.
*/
! rdata[1].data = (char *) offsets;
! rdata[1].len = offcnt * sizeof(OffsetNumber);
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
! recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE, rdata);
return recptr;
}
--- 6249,6265 ----
rdata[0].next = &(rdata[1]);
/*
! * The freeze plan array is not actually in the buffer, but pretend that
! * it is. When XLogInsert stores the whole buffer, the freeze plan need
* not be stored too.
*/
! rdata[1].data = (char *) tuples;
! rdata[1].len = ntuples * SizeOfHeapFreezeTuple;
rdata[1].buffer = buffer;
rdata[1].buffer_std = true;
rdata[1].next = NULL;
! recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE_PAGE, rdata);
return recptr;
}
***************
*** 6402,6407 **** heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
--- 6601,6699 ----
XLogRecordPageWithFreeSpace(xlrec->node, xlrec->block, freespace);
}
+ /*
+ * Freeze a single tuple for XLOG_HEAP2_FREEZE
+ *
+ * NB: This type of record aren't generated anymore, since bugs around
+ * multixacts couldn't be fixed without a more robust type of freezing. This
+ * is kept around to be able to perform PITR.
+ */
+ static bool
+ heap_xlog_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
+ MultiXactId cutoff_multi)
+ {
+ bool changed = false;
+ TransactionId xid;
+
+ xid = HeapTupleHeaderGetXmin(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ TransactionIdPrecedes(xid, cutoff_xid))
+ {
+ HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
+
+ /*
+ * Might as well fix the hint bits too; usually XMIN_COMMITTED will
+ * already be set here, but there's a small chance not.
+ */
+ Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
+ tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+ changed = true;
+ }
+
+ /*
+ * Note that this code handles IS_MULTI Xmax values, too, but only to mark
+ * the tuple as not updated if the multixact is below the cutoff Multixact
+ * given; it doesn't remove dead members of a very old multixact.
+ */
+ xid = HeapTupleHeaderGetRawXmax(tuple);
+ if ((tuple->t_infomask & HEAP_XMAX_IS_MULTI) ?
+ (MultiXactIdIsValid(xid) &&
+ MultiXactIdPrecedes(xid, cutoff_multi)) :
+ (TransactionIdIsNormal(xid) &&
+ TransactionIdPrecedes(xid, cutoff_xid)))
+ {
+ HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
+
+ /*
+ * The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED +
+ * LOCKED. Normalize to INVALID just to be sure no one gets confused.
+ * Also get rid of the HEAP_KEYS_UPDATED bit.
+ */
+ tuple->t_infomask &= ~HEAP_XMAX_BITS;
+ tuple->t_infomask |= HEAP_XMAX_INVALID;
+ HeapTupleHeaderClearHotUpdated(tuple);
+ tuple->t_infomask2 &= ~HEAP_KEYS_UPDATED;
+ changed = true;
+ }
+
+ /*
+ * Old-style VACUUM FULL is gone, but we have to keep this code as long as
+ * we support having MOVED_OFF/MOVED_IN tuples in the database.
+ */
+ if (tuple->t_infomask & HEAP_MOVED)
+ {
+ xid = HeapTupleHeaderGetXvac(tuple);
+ if (TransactionIdIsNormal(xid) &&
+ TransactionIdPrecedes(xid, cutoff_xid))
+ {
+ /*
+ * If a MOVED_OFF tuple is not dead, the xvac transaction must
+ * have failed; whereas a non-dead MOVED_IN tuple must mean the
+ * xvac transaction succeeded.
+ */
+ if (tuple->t_infomask & HEAP_MOVED_OFF)
+ HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
+ else
+ HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
+
+ /*
+ * Might as well fix the hint bits too; usually XMIN_COMMITTED
+ * will already be set here, but there's a small chance not.
+ */
+ Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
+ tuple->t_infomask |= HEAP_XMIN_COMMITTED;
+ changed = true;
+ }
+ }
+
+ return changed;
+ }
+
+ /*
+ * NB: This type of record aren't generated anymore, since bugs around
+ * multixacts couldn't be fixed without a more robust type of freezing. This
+ * is kept around to be able to perform PITR.
+ */
static void
heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
{
***************
*** 6450,6456 **** heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
ItemId lp = PageGetItemId(page, *offsets);
HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
! (void) heap_freeze_tuple(tuple, cutoff_xid, cutoff_multi);
offsets++;
}
}
--- 6742,6748 ----
ItemId lp = PageGetItemId(page, *offsets);
HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
! (void) heap_xlog_freeze_tuple(tuple, cutoff_xid, cutoff_multi);
offsets++;
}
}
***************
*** 6574,6579 **** heap_xlog_visible(XLogRecPtr lsn, XLogRecord *record)
--- 6866,6928 ----
}
}
+ /*
+ * Replay XLOG_HEAP2_FREEZE_PAGE records
+ */
+ static void
+ heap_xlog_freeze_page(XLogRecPtr lsn, XLogRecord *record)
+ {
+ xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) XLogRecGetData(record);
+ TransactionId cutoff_xid = xlrec->cutoff_xid;
+ Buffer buffer;
+ Page page;
+ int ntup;
+
+ /*
+ * In Hot Standby mode, ensure that there's no queries running which still
+ * consider the frozen xids as running.
+ */
+ if (InHotStandby)
+ ResolveRecoveryConflictWithSnapshot(cutoff_xid, xlrec->node);
+
+ /* If we have a full-page image, restore it and we're done */
+ if (record->xl_info & XLR_BKP_BLOCK(0))
+ {
+ (void) RestoreBackupBlock(lsn, record, 0, false, false);
+ return;
+ }
+
+ buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ if (!BufferIsValid(buffer))
+ return;
+
+ page = (Page) BufferGetPage(buffer);
+
+ if (lsn <= PageGetLSN(page))
+ {
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+
+ /* now execute freeze plan for each frozen tuple */
+ for (ntup = 0; ntup < xlrec->ntuples; ntup++)
+ {
+ xl_heap_freeze_tuple *xlrec_tp;
+ ItemId lp;
+ HeapTupleHeader tuple;
+
+ xlrec_tp = &xlrec->tuples[ntup];
+ lp = PageGetItemId(page, xlrec_tp->offset); /* offsets are one-based */
+ tuple = (HeapTupleHeader) PageGetItem(page, lp);
+
+ heap_execute_freeze_tuple(tuple, xlrec_tp);
+ }
+
+ PageSetLSN(page, lsn);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+
static void
heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
{
***************
*** 7429,7434 **** heap2_redo(XLogRecPtr lsn, XLogRecord *record)
--- 7778,7786 ----
case XLOG_HEAP2_CLEAN:
heap_xlog_clean(lsn, record);
break;
+ case XLOG_HEAP2_FREEZE_PAGE:
+ heap_xlog_freeze_page(lsn, record);
+ break;
case XLOG_HEAP2_CLEANUP_INFO:
heap_xlog_cleanup_info(lsn, record);
break;
*** a/src/backend/access/rmgrdesc/heapdesc.c
--- b/src/backend/access/rmgrdesc/heapdesc.c
***************
*** 149,154 **** heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
--- 149,163 ----
xlrec->node.relNode, xlrec->block,
xlrec->latestRemovedXid);
}
+ if (info == XLOG_HEAP2_FREEZE_PAGE)
+ {
+ xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
+
+ appendStringInfo(buf, "freeze_page: rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
+ xlrec->node.spcNode, xlrec->node.dbNode,
+ xlrec->node.relNode, xlrec->block,
+ xlrec->cutoff_xid, xlrec->ntuples);
+ }
else if (info == XLOG_HEAP2_CLEANUP_INFO)
{
xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec;
*** a/src/backend/access/rmgrdesc/mxactdesc.c
--- b/src/backend/access/rmgrdesc/mxactdesc.c
***************
*** 41,47 **** out_member(StringInfo buf, MultiXactMember *member)
appendStringInfoString(buf, "(upd) ");
break;
default:
! appendStringInfoString(buf, "(unk) ");
break;
}
}
--- 41,47 ----
appendStringInfoString(buf, "(upd) ");
break;
default:
! appendStringInfo(buf, "(unk) ", member->status);
break;
}
}
*** a/src/backend/access/transam/multixact.c
--- b/src/backend/access/transam/multixact.c
***************
*** 286,292 **** static MemoryContext MXactContext = NULL;
/* internal MultiXactId management */
static void MultiXactIdSetOldestVisible(void);
- static MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members);
static void RecordNewMultiXact(MultiXactId multi, MultiXactOffset offset,
int nmembers, MultiXactMember *members);
static MultiXactId GetNewMultiXactId(int nmembers, MultiXactOffset *offset);
--- 286,291 ----
***************
*** 672,678 **** ReadNextMultiXactId(void)
*
* NB: the passed members[] array will be sorted in-place.
*/
! static MultiXactId
CreateMultiXactId(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
--- 671,677 ----
*
* NB: the passed members[] array will be sorted in-place.
*/
! MultiXactId
CreateMultiXactId(int nmembers, MultiXactMember *members)
{
MultiXactId multi;
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 424,429 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
--- 424,430 ----
Buffer vmbuffer = InvalidBuffer;
BlockNumber next_not_all_visible_block;
bool skipping_all_visible_blocks;
+ xl_heap_freeze_tuple *frozen;
pg_rusage_init(&ru0);
***************
*** 446,451 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
--- 447,453 ----
vacrelstats->latestRemovedXid = InvalidTransactionId;
lazy_space_alloc(vacrelstats, nblocks);
+ frozen = palloc(sizeof(xl_heap_freeze_tuple) * MaxHeapTuplesPerPage);
/*
* We want to skip pages that don't require vacuuming according to the
***************
*** 500,506 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
bool tupgone,
hastup;
int prev_dead_count;
- OffsetNumber frozen[MaxOffsetNumber];
int nfrozen;
Size freespace;
bool all_visible_according_to_vm;
--- 502,507 ----
***************
*** 893,901 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
*/
! if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
! MultiXactCutoff))
! frozen[nfrozen++] = offnum;
}
} /* scan along page */
--- 894,902 ----
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
*/
! if (heap_prepare_freeze_tuple(tuple.t_data, FreezeLimit,
! MultiXactCutoff, &frozen[nfrozen]))
! frozen[nfrozen++].offset = offnum;
}
} /* scan along page */
***************
*** 906,920 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
*/
if (nfrozen > 0)
{
MarkBufferDirty(buf);
if (RelationNeedsWAL(onerel))
{
XLogRecPtr recptr;
recptr = log_heap_freeze(onerel, buf, FreezeLimit,
! MultiXactCutoff, frozen, nfrozen);
PageSetLSN(page, recptr);
}
}
/*
--- 907,939 ----
*/
if (nfrozen > 0)
{
+ START_CRIT_SECTION();
+
MarkBufferDirty(buf);
+
+ /* execute collected freezes */
+ for (i = 0; i < nfrozen; i++)
+ {
+ ItemId itemid;
+ HeapTupleHeader htup;
+
+ itemid = PageGetItemId(page, frozen[i].offset);
+ htup = (HeapTupleHeader) PageGetItem(page, itemid);
+
+ heap_execute_freeze_tuple(htup, &frozen[i]);
+ }
+
+ /* Now WAL-log freezing if neccessary */
if (RelationNeedsWAL(onerel))
{
XLogRecPtr recptr;
recptr = log_heap_freeze(onerel, buf, FreezeLimit,
! frozen, nfrozen);
PageSetLSN(page, recptr);
}
+
+ END_CRIT_SECTION();
}
/*
***************
*** 1015,1020 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
--- 1034,1041 ----
RecordPageWithFreeSpace(onerel, blkno, freespace);
}
+ pfree(frozen);
+
/* save stats for use later */
vacrelstats->scanned_tuples = num_tuples;
vacrelstats->tuples_deleted = tups_vacuumed;
*** a/src/include/access/heapam_xlog.h
--- b/src/include/access/heapam_xlog.h
***************
*** 50,56 ****
*/
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
! /* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */
#define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
--- 50,56 ----
*/
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
! #define XLOG_HEAP2_FREEZE_PAGE 0x20
#define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
***************
*** 239,245 **** typedef struct xl_heap_inplace
#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
! /* This is what we need to know about tuple freezing during vacuum */
typedef struct xl_heap_freeze
{
RelFileNode node;
--- 239,245 ----
#define SizeOfHeapInplace (offsetof(xl_heap_inplace, target) + SizeOfHeapTid)
! /* This is what we need to know about tuple freezing during vacuum (legacy) */
typedef struct xl_heap_freeze
{
RelFileNode node;
***************
*** 251,256 **** typedef struct xl_heap_freeze
--- 251,289 ----
#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_multi) + sizeof(MultiXactId))
+ /*
+ * a 'freeze plan' struct that represents what we need to know about a single
+ * tuple being frozen during vacuum
+ */
+ #define XLH_FREEZE_XMIN 0x01
+ #define XLH_FREEZE_XVAC 0x02
+ #define XLH_INVALID_XVAC 0x04
+
+ typedef struct xl_heap_freeze_tuple
+ {
+ TransactionId xmax;
+ OffsetNumber offset;
+ uint16 t_infomask2;
+ uint16 t_infomask;
+ uint8 frzflags;
+ } xl_heap_freeze_tuple;
+
+ /* XXX we could define size as offsetof(struct, frzflags) and save some
+ * padding, but then the array below wouldn't work properly ... */
+ #define SizeOfHeapFreezeTuple sizeof(xl_heap_freeze_tuple)
+
+ /*
+ * This is what we need to know about a block being frozen during vacuum
+ */
+ typedef struct xl_heap_freeze_block
+ {
+ RelFileNode node;
+ BlockNumber block;
+ TransactionId cutoff_xid;
+ uint16 ntuples;
+ xl_heap_freeze_tuple tuples[FLEXIBLE_ARRAY_MEMBER];
+ } xl_heap_freeze_page;
+
/* This is what we need to know about setting a visibility map bit */
typedef struct xl_heap_visible
{
***************
*** 277,284 **** extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *nowunused, int nunused,
TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
! TransactionId cutoff_xid, MultiXactId cutoff_multi,
! OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
--- 310,321 ----
OffsetNumber *nowunused, int nunused,
TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
! TransactionId cutoff_xid, xl_heap_freeze_tuple *tuples, int ntuples);
! extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
! TransactionId cutoff_xid, TransactionId cutoff_multi,
! xl_heap_freeze_tuple *frz);
! extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
! xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
*** a/src/include/access/multixact.h
--- b/src/include/access/multixact.h
***************
*** 81,86 **** extern MultiXactId MultiXactIdCreate(TransactionId xid1,
--- 81,87 ----
MultiXactStatus status2);
extern MultiXactId MultiXactIdExpand(MultiXactId multi, TransactionId xid,
MultiXactStatus status);
+ extern MultiXactId CreateMultiXactId(int nmembers, MultiXactMember *members);
extern MultiXactId ReadNextMultiXactId(void);
extern bool MultiXactIdIsRunning(MultiXactId multi);
extern void MultiXactIdSetOldestMember(void);
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers