Hi, On Mon, Apr 4, 2022 at 11:46 PM Andres Freund <and...@anarazel.de> wrote:
> > I think there's a few more things that'd be good to check. For example > amcheck > doesn't verify that HOT chains are reasonable, which can often be spotted > looking at an individual page. Which is a bit unfortunate, given how many > bugs > we had in that area. > > Stuff to check around that: > - target of redirect has HEAP_ONLY_TUPLE, HEAP_UPDATED set > - In a valid ctid chain within a page (i.e. xmax = xmin): > - tuples have HEAP_UPDATED set > - HEAP_ONLY_TUPLE / HEAP_HOT_UPDATED matches across chains elements (I changed the subject because the attached patch is related to HOT chain validation). Please find attached the patch with the above idea of HOT chain's validation(within a Page) and a few more validation as below. * If the predecessor’s xmin is aborted or in progress, the current tuples xmin should be aborted or in progress respectively. Also, Both xmin must be equal. * If the predecessor’s xmin is not frozen, then-current tuple’s shouldn’t be either. * If the predecessor’s xmin is equal to the current tuple’s xmin, the current tuple’s cmin should be greater than the predecessor’s xmin. * If the current tuple is not HOT then its predecessor’s tuple must not be HEAP_HOT_UPDATED. * If the current Tuple is HOT then its predecessor’s tuple must be HEAP_HOT_UPDATED and vice-versa. * If xmax is 0, which means it's the last tuple in the chain, then it must not be HEAP_HOT_UPDATED. * If the current tuple is the last tuple in the HOT chain then the next tuple should not be HOT. I am looking into the process of adding the TAP test for these changes and finding a way to corrupt a page in the tap test. Will try to include these test cases in my Upcoming version of the patch. -- Regards, Himanshu Upadhyaya EnterpriseDB: http://www.enterprisedb.com
From f3ae2f783a255109655cade770ebc74e01aef34c Mon Sep 17 00:00:00 2001 From: Himanshu Upadhyaya <himanshu.upadhyaya@enterprisedb.com> Date: Thu, 18 Aug 2022 13:20:51 +0530 Subject: [PATCH v1] HOT chain validation in verify_heapam. --- contrib/amcheck/verify_heapam.c | 144 ++++++++++++++++++++++++++++++++ 1 file changed, 144 insertions(+) diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c index c875f3e5a2..ae2c100de2 100644 --- a/contrib/amcheck/verify_heapam.c +++ b/contrib/amcheck/verify_heapam.c @@ -399,6 +399,7 @@ verify_heapam(PG_FUNCTION_ARGS) for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++) { OffsetNumber maxoff; + OffsetNumber predecessor[MaxOffsetNumber] = {0}; CHECK_FOR_INTERRUPTS(); @@ -433,6 +434,9 @@ verify_heapam(PG_FUNCTION_ARGS) for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff; ctx.offnum = OffsetNumberNext(ctx.offnum)) { + OffsetNumber nextTupOffnum; + HeapTupleHeader htup; + ctx.itemid = PageGetItemId(ctx.page, ctx.offnum); /* Skip over unused/dead line pointers */ @@ -469,6 +473,13 @@ verify_heapam(PG_FUNCTION_ARGS) report_corruption(&ctx, psprintf("line pointer redirection to unused item at offset %u", (unsigned) rdoffnum)); + + htup = (HeapTupleHeader) PageGetItem(ctx.page, rditem); + if (!(HeapTupleHeaderIsHeapOnly(htup) && htup->t_infomask & HEAP_UPDATED)) + report_corruption(&ctx, + psprintf("Redirected Tuple at line pointer offset %u is not HEAP_ONLY_TUPLE or HEAP_UPDATED tuple", + (unsigned) rdoffnum)); + continue; } @@ -507,6 +518,139 @@ verify_heapam(PG_FUNCTION_ARGS) /* Ok, ready to check this next tuple */ check_tuple(&ctx); + /* + * Add line pointer offset to predecessor array. + * 1) If xmax is matching with xmin of next Tuple(reaching via its t_ctid). + * 2) If next tuple is in the same page. + * Raise corruption if: + * We have two tuples having same predecessor. + * + * We add offset to predecessor irrespective of transaction(t_xmin) status. We will + * do validation related to transaction status(and also all other validations) + * when we loop over predecessor array. + */ + nextTupOffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid); + if (ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid) == ctx.blkno && + nextTupOffnum != ctx.offnum) + { + TransactionId currTupXmax; + ItemId lp; + + if (predecessor[nextTupOffnum] != 0) + { + report_corruption(&ctx, + psprintf("Tuple at offset %u is reachable from two or more updated tuple", + (unsigned) nextTupOffnum)); + continue; + } + currTupXmax = HeapTupleHeaderGetUpdateXid(ctx.tuphdr); + lp = PageGetItemId(ctx.page, nextTupOffnum); + + htup = (HeapTupleHeader) PageGetItem(ctx.page, lp); + + if (TransactionIdIsValid(currTupXmax) && + TransactionIdEquals(HeapTupleHeaderGetXmin(htup), currTupXmax)) + { + predecessor[nextTupOffnum] = ctx.offnum; + } + /* Non matching XMAX with XMIN is not a corruption*/ + } + + } + /* Now loop over offset and validate data in predecessor array.*/ + for ( ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff; + ctx.offnum = OffsetNumberNext(ctx.offnum)) + { + HeapTupleHeader pred_htup, curr_htup; + TransactionId pred_xmin, curr_xmin, curr_xmax; + ItemId pred_lp, curr_lp; + + OffsetNumber predoffset = predecessor[ctx.offnum]; + if (!predoffset) + { + /* + * Either root of chain or xmin-aborted tuple from an abandoned portion of + * a HOT chain. + */ + continue; + } + /* + * Validation via predecessor array: + * 1) If the predecessor’s xmin is aborted or in progress, the current + * tuples xmin should be aborted or in progress respectively. Also + * Both xmin must be equal. + * 2) If the predecessor’s xmin is not frozen, then current tuple’s + * shouldn’t be either. + * 3) If the predecessor’s xmin is equal to the current tuple’s xmin, + * the current tuple’s cmin should be greater than predecessor’s xmin. + * 4) If the current tuple is not HOT then its predecessor’s tuple must + * not be HEAP_HOT_UPDATED. + * 5) If the current Tuple is HOT then its predecessor’s tuple must be + * HEAP_HOT_UPDATED and vice-versa. + * 6) If xmax is 0, means it's last tuple in chain, then it must not + * be HEAP_HOT_UPDATED. + * 7) If current tuple is the last tuple in HOT chain then next tuple + * should not be HOT. + */ + curr_lp = PageGetItemId(ctx.page, ctx.offnum); + curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp); + curr_xmin = HeapTupleHeaderGetXmin(curr_htup); + curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup); + + pred_lp = PageGetItemId(ctx.page, predoffset); + pred_htup = (HeapTupleHeader) PageGetItem(ctx.page, pred_lp); + pred_xmin = HeapTupleHeaderGetXmin(pred_htup); + + if ((TransactionIdDidAbort(pred_xmin)||TransactionIdIsInProgress(pred_xmin)) + && !TransactionIdEquals(pred_xmin, curr_xmin)) + { + report_corruption(&ctx, + psprintf("Preceding tuple's(at offset %u) xmin is aborted or in-progress but is not equal to current tuple xmin at offset %u", + (unsigned) predoffset, (unsigned) ctx.offnum)); + } + if (pred_xmin != FrozenTransactionId && curr_xmin == FrozenTransactionId) + { + report_corruption(&ctx, + psprintf("Non frozen-xmin tuple precedes current frozen tuple at offset %u", + (unsigned) ctx.offnum)); + } + if (pred_xmin == curr_xmin && + HeapTupleHeaderGetCmin(pred_htup) >= + HeapTupleHeaderGetCmin(curr_htup)) + { + report_corruption(&ctx, + psprintf("Predecessor tuple was also inserted by %u transaction but cmin of predecessor's tuple does not precede current tuple.", + (unsigned) pred_xmin)); + } + if (HeapTupleHeaderIsHeapOnly(curr_htup) && + !HeapTupleHeaderIsHotUpdated(pred_htup)) + { + report_corruption(&ctx, + psprintf("Current tuple at offset %u is HOT but its predecessor is not HEAP HOT UPDATED.", + (unsigned) ctx.offnum)); + } + if (!HeapTupleHeaderIsHeapOnly(curr_htup) && + HeapTupleHeaderIsHotUpdated(pred_htup)) + { + report_corruption(&ctx, + psprintf("Current tuple at offset %u is not HOT but its predecessor is HEAP HOT UPDATED.", + (unsigned) ctx.offnum)); + } + if (!TransactionIdIsValid(curr_xmax) && + HeapTupleHeaderIsHotUpdated(curr_htup)) + { + report_corruption(&ctx, + psprintf("Current tuple at offset %u is HOT but is last tuple in the HOT chain.", + (unsigned) ctx.offnum)); + } + if (!HeapTupleHeaderIsHotUpdated(pred_htup) && + HeapTupleHeaderIsHeapOnly(pred_htup) && + HeapTupleHeaderIsHeapOnly(curr_htup)) + { + report_corruption(&ctx, + psprintf("Current tuple at offset %u is HOT but it is next updated tuple of last Tuple in HOT chain.", + (unsigned) ctx.offnum)); + } } /* clean up */ -- 2.25.1