Andres Freund wrote: > I've played around quite some with the attached patch. So far, after > applying the second patch, neither VACUUM nor VACUUM FULL / CLUSTER make > the situation worse for already existing corruption. HOT pruning can > change the exact appearance of existing corruption a bit, but I don't > think it can make the corruption meaningfully worse. It's a bit > annoying and scary to add so many checks to backbranches but it kinda > seems required. The error message texts aren't perfect, but these are > "should never be hit" type elog()s so I'm not too worried about that.
Looking at 0002: I agree with the stuff being done here. I think a couple of these checks could be moved one block outerwards in term of scope; I don't see any reason why the check should not apply in that case. I didn't catch any place missing additional checks. Despite these being "shouldn't happen" conditions, I think we should turn these up all the way to ereports with an errcode and all, and also report the XIDs being complained about. No translation required, though. Other than those changes and minor copy editing a commit (attached), 0002 looks good to me. I started thinking it'd be good to report block number whenever anything happened while scanning the relation. The best way to go about this seems to be to add an errcontext callback to lazy_scan_heap, so I attach a WIP untested patch to add that. (I'm not proposing this for back-patch for now, mostly because I don't have the time/energy to push for it right now.) It appears that you got all the places that seem to reasonably need additional checks. -- Álvaro Herrera https://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
>From 9ac665638c86460f0b767628203f8bf28df35e49 Mon Sep 17 00:00:00 2001 From: Alvaro Herrera <alvhe...@alvh.no-ip.org> Date: Wed, 6 Dec 2017 16:44:25 -0300 Subject: [PATCH 1/2] tweaks for 0002 --- src/backend/access/heap/heapam.c | 67 +++++++++++++++++++++++++++------------ src/backend/commands/vacuumlazy.c | 6 ++-- 2 files changed, 50 insertions(+), 23 deletions(-) diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index eb93718baa..5c284a4c32 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6387,17 +6387,23 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, else if (MultiXactIdPrecedes(multi, cutoff_multi)) { if (MultiXactIdPrecedes(multi, relminmxid)) - elog(ERROR, "encountered multixact from before horizon"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("found multixact %u from before relminmxid %u", + multi, relminmxid))); /* - * This old multi cannot possibly have members still running. If it - * was a locker only, it can be removed without any further - * consideration; but if it contained an update, we might need to - * preserve it. + * This old multi cannot possibly have members still running, but + * verify just in case. If it was a locker only, it can be removed + * without any further consideration; but if it contained an update, we + * might need to preserve it. */ if (MultiXactIdIsRunning(multi, HEAP_XMAX_IS_LOCKED_ONLY(t_infomask))) - elog(ERROR, "multixact from before cutoff found to be still running"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("multixact %u from before cutoff %u found to be still running", + multi, cutoff_multi))); if (HEAP_XMAX_IS_LOCKED_ONLY(t_infomask)) { @@ -6419,9 +6425,14 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, if (TransactionIdPrecedes(xid, cutoff_xid)) { if (TransactionIdPrecedes(xid, relfrozenxid)) - elog(ERROR, "encountered xid from before horizon"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("found xid %u from before relfrozenxid %u", + xid, relfrozenxid))); if (TransactionIdDidCommit(xid)) - elog(ERROR, "can't freeze committed xmax"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("can't freeze committed xmax %u", xid))); *flags |= FRM_INVALIDATE_XMAX; xid = InvalidTransactionId; /* not strictly necessary */ } @@ -6495,7 +6506,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, Assert(TransactionIdIsValid(xid)); if (TransactionIdPrecedes(xid, relfrozenxid)) - elog(ERROR, "encountered xid from before horizon"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("found update xid %u from before relfrozenxid %u", + xid, relfrozenxid))); /* * It's an update; should we keep it? If the transaction is known @@ -6533,7 +6547,6 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, */ } - /* * Since the tuple wasn't marked HEAPTUPLE_DEAD by vacuum, the * update Xid cannot possibly be older than the xid cutoff. The @@ -6542,7 +6555,10 @@ FreezeMultiXactId(MultiXactId multi, uint16 t_infomask, */ if (TransactionIdIsValid(update_xid) && TransactionIdPrecedes(update_xid, cutoff_xid)) - elog(ERROR, "encountered xid from before xid cutoff"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("found update xid %u from before xid cutoff %u", + update_xid, cutoff_xid))); /* * If we determined that it's an Xid corresponding to an update @@ -6658,13 +6674,19 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, xid = HeapTupleHeaderGetXmin(tuple); if (TransactionIdIsNormal(xid)) { + if (TransactionIdPrecedes(xid, relfrozenxid)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("found xid %u from before relfrozenxid %u", + xid, relfrozenxid))); + if (TransactionIdPrecedes(xid, cutoff_xid)) { - if (TransactionIdPrecedes(xid, relfrozenxid)) - elog(ERROR, "encountered xid from before xid cutoff"); - if (!TransactionIdDidCommit(xid)) - elog(ERROR, "uncommitted xmin needs to be frozen"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg_internal("uncommitted Xmin %u from before xid cutoff %u needs to be frozen", + xid, cutoff_xid))); frz->t_infomask |= HEAP_XMIN_FROZEN; changed = true; @@ -6740,20 +6762,25 @@ heap_prepare_freeze_tuple(HeapTupleHeader tuple, } else if (TransactionIdIsNormal(xid)) { + if (TransactionIdPrecedes(xid, relfrozenxid)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("found xid %u from before relfrozenxid %u", + xid, relfrozenxid))); + if (TransactionIdPrecedes(xid, cutoff_xid)) { - if (TransactionIdPrecedes(xid, relfrozenxid)) - elog(ERROR, "encountered xid from before xid cutoff"); - /* * If we freeze xmax, make absolutely sure that it's not an XID - * that is important (Note, a lock-only xmax can be removed + * that is important. (Note, a lock-only xmax can be removed * independent of committedness, since a committed lock holder has * released the lock). */ if (!(tuple->t_infomask & HEAP_XMAX_LOCK_ONLY) && TransactionIdDidCommit(xid)) - elog(ERROR, "can't freeze committed xmax"); + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("can't freeze committed xmax %u", xid))); freeze_xmax = true; } else diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index 7035a7a6f7..f95346acdb 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -1008,10 +1008,10 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, * to treat it like an indexed tuple. * * If this were to happen for a tuple that actually needed - * to be deleted , we'd be in trouble, because it'd + * to be deleted, we'd be in trouble, because it'd * possibly leave a tuple below the relation's xmin - * horizon alive. But checks in - * heap_prepare_freeze_tuple() would trigger in that case, + * horizon alive. heap_prepare_freeze_tuple() is prepared + * to detect that case and abort the transaction, * preventing corruption. */ if (HeapTupleIsHotUpdated(&tuple) || -- 2.11.0
>From 5bdc203b9eeda5e43546e0b77dd83d06894fb76b Mon Sep 17 00:00:00 2001 From: Alvaro Herrera <alvhe...@alvh.no-ip.org> Date: Wed, 6 Dec 2017 17:02:17 -0300 Subject: [PATCH 2/2] blkno/rel context info for lazy_scan_heap --- src/backend/commands/vacuumlazy.c | 41 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index f95346acdb..332adc6550 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -131,6 +131,14 @@ typedef struct LVRelStats bool lock_waiter_detected; } LVRelStats; +/* + * Struct to hold context info for lazy_scan_heap. + */ +typedef struct LazyScanHeapInfo +{ + Relation relation; + BlockNumber blkno; +} LazyScanHeapInfo; /* A few variables that don't seem worth passing around as parameters */ static int elevel = -1; @@ -146,6 +154,7 @@ static BufferAccessStrategy vac_strategy; static void lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, Relation *Irel, int nindexes, bool aggressive); +static void lazy_scan_heap_cb(void *arg); static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats); static bool lazy_check_needs_freeze(Buffer buf, bool *hastup); static void lazy_vacuum_index(Relation indrel, @@ -476,6 +485,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, nkeep, nunused; IndexBulkDeleteResult **indstats; + ErrorContextCallback callback; + LazyScanHeapInfo info; int i; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; @@ -526,6 +537,15 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, initprog_val[2] = vacrelstats->max_dead_tuples; pgstat_progress_update_multi_param(3, initprog_index, initprog_val); + /* set up our verbose error context callback */ + info.relation = onerel; + info.blkno = InvalidBlockNumber; + + callback.callback = lazy_scan_heap_cb; + callback.arg = &info; + callback.previous = error_context_stack; + error_context_stack = &callback; + /* * Except when aggressive is set, we want to skip pages that are * all-visible according to the visibility map, but only when we can skip @@ -616,6 +636,9 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, bool has_dead_tuples; TransactionId visibility_cutoff_xid = InvalidTransactionId; + /* update error callback info */ + info.blkno = blkno; + /* see note above about forcing scanning of last page */ #define FORCE_CHECK_PAGE() \ (blkno == nblocks - 1 && should_attempt_truncation(vacrelstats)) @@ -1275,6 +1298,8 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, RecordPageWithFreeSpace(onerel, blkno, freespace); } + error_context_stack = callback.previous; + /* report that everything is scanned and vacuumed */ pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); @@ -1384,6 +1409,22 @@ lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats, pfree(buf.data); } +/* + * lazy_scan_heap_cb + * Error context callback for lazy_scan_heap. + */ +static void +lazy_scan_heap_cb(void *arg) +{ + LazyScanHeapInfo *info = (LazyScanHeapInfo *) arg; + + if (info->blkno != InvalidBlockNumber) + errcontext("while scanning page %u of relation %s", + info->blkno, RelationGetRelationName(info->relation)); + else + errcontext("while vacuuming relation %s", + RelationGetRelationName(info->relation)); +} /* * lazy_vacuum_heap() -- second pass over the heap -- 2.11.0