Re: HOT chain validation in verify_heapam()

Himanshu Upadhyaya Thu, 09 Mar 2023 07:55:34 -0800

On Wed, Mar 8, 2023 at 7:30 PM Himanshu Upadhyaya <
[email protected]> wrote:
Please find the v11 patch with all these changes.


-- 
Regards,
Himanshu Upadhyaya
EnterpriseDB: http://www.enterprisedb.com

From f2b262e95fe07dddfec994f20a6d2e76bc12b410 Mon Sep 17 00:00:00 2001
From: Himanshu Upadhyaya <[email protected]>
Date: Thu, 9 Mar 2023 21:18:58 +0530
Subject: [PATCH v11] Implement HOT chain validation in verify_heapam()

Himanshu Upadhyaya, reviewed by Robert Haas, Aleksander Alekseev, Andres Freund.
Some revisions by Robert Haas.
---
 contrib/amcheck/verify_heapam.c           | 291 +++++++++++++++++++++-
 src/bin/pg_amcheck/t/004_verify_heapam.pl | 288 ++++++++++++++++++++-
 2 files changed, 562 insertions(+), 17 deletions(-)

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 4fcfd6df72..9cd4a795a0 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -150,7 +150,9 @@ typedef struct HeapCheckContext
 } HeapCheckContext;
 
 /* Internal implementation */
-static void check_tuple(HeapCheckContext *ctx);
+static void check_tuple(HeapCheckContext *ctx,
+						bool *xmin_commit_status_ok,
+						XidCommitStatus *xmin_commit_status);
 static void check_toast_tuple(HeapTuple toasttup, HeapCheckContext *ctx,
 							  ToastedAttribute *ta, int32 *expected_chunk_seq,
 							  uint32 extsize);
@@ -160,7 +162,9 @@ static void check_toasted_attribute(HeapCheckContext *ctx,
 									ToastedAttribute *ta);
 
 static bool check_tuple_header(HeapCheckContext *ctx);
-static bool check_tuple_visibility(HeapCheckContext *ctx);
+static bool check_tuple_visibility(HeapCheckContext *ctx,
+								   bool *xmin_commit_status_ok,
+								   XidCommitStatus *xmin_commit_status);
 
 static void report_corruption(HeapCheckContext *ctx, char *msg);
 static void report_toast_corruption(HeapCheckContext *ctx,
@@ -399,9 +403,16 @@ verify_heapam(PG_FUNCTION_ARGS)
 	for (ctx.blkno = first_block; ctx.blkno <= last_block; ctx.blkno++)
 	{
 		OffsetNumber maxoff;
+		OffsetNumber predecessor[MaxOffsetNumber];
+		OffsetNumber successor[MaxOffsetNumber];
+		bool		lp_valid[MaxOffsetNumber];
+		bool		xmin_commit_status_ok[MaxOffsetNumber];
+		XidCommitStatus	xmin_commit_status[MaxOffsetNumber];
 
 		CHECK_FOR_INTERRUPTS();
 
+		memset(predecessor, 0, sizeof(OffsetNumber) * MaxOffsetNumber);
+
 		/* Optionally skip over all-frozen or all-visible blocks */
 		if (skip_option != SKIP_PAGES_NONE)
 		{
@@ -433,6 +444,12 @@ verify_heapam(PG_FUNCTION_ARGS)
 		for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
 			 ctx.offnum = OffsetNumberNext(ctx.offnum))
 		{
+			BlockNumber	nextblkno;
+			OffsetNumber nextoffnum;
+
+			successor[ctx.offnum] = InvalidOffsetNumber;
+			lp_valid[ctx.offnum] = false;
+			xmin_commit_status_ok[ctx.offnum] = false;
 			ctx.itemid = PageGetItemId(ctx.page, ctx.offnum);
 
 			/* Skip over unused/dead line pointers */
@@ -469,6 +486,14 @@ verify_heapam(PG_FUNCTION_ARGS)
 					report_corruption(&ctx,
 									  psprintf("line pointer redirection to unused item at offset %u",
 											   (unsigned) rdoffnum));
+
+				/*
+				 * Record the fact that this line pointer has passed basic
+				 * sanity checking, and also the offset number to which it
+				 * points.
+				 */
+				lp_valid[ctx.offnum] = true;
+				successor[ctx.offnum] = rdoffnum;
 				continue;
 			}
 
@@ -502,11 +527,237 @@ verify_heapam(PG_FUNCTION_ARGS)
 			}
 
 			/* It should be safe to examine the tuple's header, at least */
+			lp_valid[ctx.offnum] = true;
 			ctx.tuphdr = (HeapTupleHeader) PageGetItem(ctx.page, ctx.itemid);
 			ctx.natts = HeapTupleHeaderGetNatts(ctx.tuphdr);
 
 			/* Ok, ready to check this next tuple */
-			check_tuple(&ctx);
+			check_tuple(&ctx,
+						&xmin_commit_status_ok[ctx.offnum],
+						&xmin_commit_status[ctx.offnum]);
+
+			/*
+			 * If the CTID field of this tuple seems to point to another tuple
+			 * on the same page, record that tuple as the successor of this
+			 * one.
+			 */
+			nextblkno = ItemPointerGetBlockNumber(&(ctx.tuphdr)->t_ctid);
+			nextoffnum = ItemPointerGetOffsetNumber(&(ctx.tuphdr)->t_ctid);
+			if (nextblkno == ctx.blkno && nextoffnum != ctx.offnum)
+				successor[ctx.offnum] = nextoffnum;
+		}
+
+		/*
+		 * Update chain validation. Check each line pointer that's got a valid
+		 * successor against that successor.
+		 */
+		ctx.attnum = -1;
+		for (ctx.offnum = FirstOffsetNumber; ctx.offnum <= maxoff;
+			 ctx.offnum = OffsetNumberNext(ctx.offnum))
+		{
+			ItemId		curr_lp;
+			ItemId		next_lp;
+			HeapTupleHeader curr_htup;
+			HeapTupleHeader next_htup;
+			TransactionId curr_xmin;
+			TransactionId curr_xmax;
+			TransactionId next_xmin;
+			OffsetNumber nextoffnum = successor[ctx.offnum];
+
+			/*
+			 * The current line pointer may not have a successor, either
+			 * because it's not valid or because it didn't point to anything.
+			 * In either case, we have to give up.
+			 *
+			 * If the current line pointer does point to something, it's
+			 * possible that the target line pointer isn't valid. We have to
+			 * give up in that case, too.
+			 */
+			if (nextoffnum == InvalidOffsetNumber || !lp_valid[nextoffnum])
+				continue;
+
+			/* We have two valid line pointers that we can examine. */
+			curr_lp = PageGetItemId(ctx.page, ctx.offnum);
+			next_lp = PageGetItemId(ctx.page, nextoffnum);
+
+			/* Handle the cases where the current line pointer is a redirect. */
+			if (ItemIdIsRedirected(curr_lp))
+			{
+				/* Can't redirect to another redirect. */
+				if (ItemIdIsRedirected(next_lp))
+				{
+					report_corruption(&ctx,
+									  psprintf("redirected line pointer points to another redirected line pointer at offset %u",
+											   (unsigned) nextoffnum));
+					continue;
+				}
+
+				/* Can only redirect to a HOT tuple. */
+				next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
+				if (!HeapTupleHeaderIsHeapOnly(next_htup))
+				{
+					report_corruption(&ctx,
+									  psprintf("redirected line pointer points to a non-heap-only tuple at offset %u",
+											   (unsigned) nextoffnum));
+				}
+
+				/*
+				 * Redirects are created by updates, so successor should be
+				 * the result of an update.
+				 */
+				if ((next_htup->t_infomask & HEAP_UPDATED) == 0)
+				{
+					report_corruption(&ctx,
+									  psprintf("redirected line pointer points to a non-heap-updated tuple at offset %u",
+											   (unsigned) nextoffnum));
+				}
+
+				/* HOT chains should not intersect. */
+				if (predecessor[nextoffnum] != InvalidOffsetNumber)
+				{
+					report_corruption(&ctx,
+									  psprintf("redirect line pointer points to offset %u, but offset %u also points there",
+											   (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
+					continue;
+				}
+
+				/*
+				 * This redirect and the tuple to which it points seem to be
+				 * part of an update chain.
+				 */
+				predecessor[nextoffnum] = ctx.offnum;
+				continue;
+			}
+
+			/*
+			 * If the next line pointer is a redirect, or if it's a tuple
+			 * but the XMAX of this tuple doesn't match the XMIN of the next
+			 * tuple, then the two aren't part of the same update chain and
+			 * there is nothing more to do.
+			 */
+			if (ItemIdIsRedirected(next_lp))
+				continue;
+			curr_htup = (HeapTupleHeader) PageGetItem(ctx.page, curr_lp);
+			curr_xmax = HeapTupleHeaderGetUpdateXid(curr_htup);
+			next_htup = (HeapTupleHeader) PageGetItem(ctx.page, next_lp);
+			next_xmin = HeapTupleHeaderGetXmin(next_htup);
+			if (!TransactionIdIsValid(curr_xmax) ||
+				!TransactionIdEquals(curr_xmax, next_xmin))
+				continue;
+
+			/* HOT chains should not intersect. */
+			if (predecessor[nextoffnum] != InvalidOffsetNumber)
+			{
+				report_corruption(&ctx,
+								  psprintf("tuple points to new version at offset %u, but offset %u also points there",
+										   (unsigned) nextoffnum, (unsigned) predecessor[nextoffnum]));
+				continue;
+			}
+
+			/*
+			 * This tuple and the tuple to which it points seem to be part
+			 * of an update chain.
+			 */
+			predecessor[nextoffnum] = ctx.offnum;
+
+			/*
+			 * If the current tuple is marked as HOT-updated, then the next
+			 * tuple should be marked as a heap-only tuple. Conversely, if the
+			 * current tuple isn't marked as HOT-updated, then the next tuple
+			 * shouldn't be marked as a heap-only tuple.
+			 */
+			if (!HeapTupleHeaderIsHotUpdated(curr_htup) &&
+				HeapTupleHeaderIsHeapOnly(next_htup))
+			{
+				report_corruption(&ctx,
+								  psprintf("non-heap-only update produced a heap-only tuple at offset %u",
+										   (unsigned) nextoffnum));
+			}
+			if (HeapTupleHeaderIsHotUpdated(curr_htup) &&
+				!HeapTupleHeaderIsHeapOnly(next_htup))
+			{
+				report_corruption(&ctx,
+								  psprintf("heap-only update produced a non-heap only tuple at offset %u",
+										   (unsigned) nextoffnum));
+			}
+
+			/*
+			 * If the current tuple's xmin is still in progress but the
+			 * successor tuple's xmin is committed, that's corruption.
+			 *
+			 * NB: We recheck the commit status of the current tuple's xmin
+			 * here, because it might have committed after we checked it and
+			 * before we rechecked the commit status of the successor tuple's
+			 * xmin. This should be safe because the xmin itself can't have
+			 * changed, only its commit status.
+			 */
+			curr_xmin = HeapTupleHeaderGetXmin(curr_htup);
+			if (xmin_commit_status_ok[ctx.offnum] &&
+				xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS &&
+				xmin_commit_status_ok[nextoffnum] &&
+				xmin_commit_status[nextoffnum] == XID_COMMITTED &&
+				TransactionIdIsInProgress(curr_xmin))
+			{
+				report_corruption(&ctx,
+								  psprintf("tuple with in-progress xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
+										   (unsigned) curr_xmin,
+										   (unsigned) ctx.offnum,
+										   (unsigned) next_xmin));
+			}
+
+			/*
+			 * If the current tuple's xmin is aborted but the successor tuple's
+			 * xmin is in-progress or committed, that's corruption.
+			 */
+			if (xmin_commit_status_ok[ctx.offnum] &&
+				xmin_commit_status[ctx.offnum] == XID_ABORTED &&
+				xmin_commit_status_ok[nextoffnum])
+			{
+				if (xmin_commit_status[nextoffnum] == XID_IN_PROGRESS)
+					report_corruption(&ctx,
+									  psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with in-progress xmin %u",
+											   (unsigned) curr_xmin,
+											   (unsigned) ctx.offnum,
+											   (unsigned) next_xmin));
+				else if (xmin_commit_status[nextoffnum] == XID_COMMITTED)
+					report_corruption(&ctx,
+									  psprintf("tuple with aborted xmin %u was updated to produce a tuple at offset %u with committed xmin %u",
+											   (unsigned) curr_xmin,
+											   (unsigned) ctx.offnum,
+											   (unsigned) next_xmin));
+			}
+		}
+
+		/*
+		 * An update chain can start either with a non-heap-only tuple or with
+		 * a redirect line pointer, but not with a heap-only tuple.
+		 *
+		 * (This check is in a separate loop because we need the predecessor
+		 * array to be fully populated before we can perform it.)
+		 */
+		for (ctx.offnum = FirstOffsetNumber;
+			 ctx.offnum <= maxoff;
+			 ctx.offnum = OffsetNumberNext(ctx.offnum))
+		{
+			if (xmin_commit_status_ok[ctx.offnum] &&
+				(xmin_commit_status[ctx.offnum] == XID_COMMITTED ||
+				 xmin_commit_status[ctx.offnum] == XID_IN_PROGRESS) &&
+				predecessor[ctx.offnum] == InvalidOffsetNumber)
+			{
+				ItemId		curr_lp;
+
+				curr_lp = PageGetItemId(ctx.page, ctx.offnum);
+				if (!ItemIdIsRedirected(curr_lp))
+				{
+					HeapTupleHeader curr_htup;
+
+					curr_htup = (HeapTupleHeader)
+						PageGetItem(ctx.page, curr_lp);
+					if (HeapTupleHeaderIsHeapOnly(curr_htup))
+						report_corruption(&ctx,
+										  psprintf("tuple is root of chain but is marked as heap-only tuple"));
+				}
+			}
 		}
 
 		/* clean up */
@@ -638,6 +889,7 @@ check_tuple_header(HeapCheckContext *ctx)
 {
 	HeapTupleHeader tuphdr = ctx->tuphdr;
 	uint16		infomask = tuphdr->t_infomask;
+	TransactionId curr_xmax = HeapTupleHeaderGetUpdateXid(tuphdr);
 	bool		result = true;
 	unsigned	expected_hoff;
 
@@ -663,6 +915,19 @@ check_tuple_header(HeapCheckContext *ctx)
 		 */
 	}
 
+	if (!TransactionIdIsValid(curr_xmax) &&
+		HeapTupleHeaderIsHotUpdated(tuphdr))
+	{
+		report_corruption(ctx,
+						  psprintf("tuple has been HOT updated, but xmax is 0"));
+
+		/*
+		 * As above, even though this shouldn't happen, it's not sufficient
+		 * justification for skipping further checks, we should still be able
+		 * to perform sensibly.
+		 */
+	}
+
 	if (infomask & HEAP_HASNULL)
 		expected_hoff = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(ctx->natts));
 	else
@@ -718,9 +983,14 @@ check_tuple_header(HeapCheckContext *ctx)
  * Returns true if the tuple itself should be checked, false otherwise.  Sets
  * ctx->tuple_could_be_pruned if the tuple -- and thus also any associated
  * TOAST tuples -- are eligible for pruning.
+ *
+ * Sets *xmin_commit_status_ok to true if the commit status of xmin is known
+ * and false otherwise. If it's set to true, then also set *xid_commit_status
+ * to the actual commit status.
  */
 static bool
-check_tuple_visibility(HeapCheckContext *ctx)
+check_tuple_visibility(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
+					   XidCommitStatus *xmin_commit_status)
 {
 	TransactionId xmin;
 	TransactionId xvac;
@@ -731,13 +1001,17 @@ check_tuple_visibility(HeapCheckContext *ctx)
 	HeapTupleHeader tuphdr = ctx->tuphdr;
 
 	ctx->tuple_could_be_pruned = true;	/* have not yet proven otherwise */
+	*xmin_commit_status_ok = false;		/* have not yet proven otherwise */
 
 	/* If xmin is normal, it should be within valid range */
 	xmin = HeapTupleHeaderGetXmin(tuphdr);
 	switch (get_xid_status(xmin, ctx, &xmin_status))
 	{
 		case XID_INVALID:
+			break;
 		case XID_BOUNDS_OK:
+			*xmin_commit_status_ok = true;
+			*xmin_commit_status = xmin_status;
 			break;
 		case XID_IN_FUTURE:
 			report_corruption(ctx,
@@ -1515,9 +1789,13 @@ check_toasted_attribute(HeapCheckContext *ctx, ToastedAttribute *ta)
 /*
  * Check the current tuple as tracked in ctx, recording any corruption found in
  * ctx->tupstore.
+ *
+ * We return some information about the status of xmin to aid in validating
+ * update chains.
  */
 static void
-check_tuple(HeapCheckContext *ctx)
+check_tuple(HeapCheckContext *ctx, bool *xmin_commit_status_ok,
+			XidCommitStatus *xmin_commit_status)
 {
 	/*
 	 * Check various forms of tuple header corruption, and if the header is
@@ -1531,7 +1809,8 @@ check_tuple(HeapCheckContext *ctx)
 	 * cannot assume our relation description matches the tuple structure, and
 	 * therefore cannot check it.
 	 */
-	if (!check_tuple_visibility(ctx))
+	if (!check_tuple_visibility(ctx, xmin_commit_status_ok,
+							xmin_commit_status))
 		return;
 
 	/*
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
index 215c30eaa8..7e6791ea2c 100644
--- a/src/bin/pg_amcheck/t/004_verify_heapam.pl
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -174,12 +174,16 @@ sub write_tuple
 # Set umask so test directories and files are created with default permissions
 umask(0077);
 
+my $pred_xmax;
+my $pred_posid;
+my $aborted_xid;
 # Set up the node.  Once we create and corrupt the table,
 # autovacuum workers visiting the table could crash the backend.
 # Disable autovacuum so that won't happen.
 my $node = PostgreSQL::Test::Cluster->new('test');
 $node->init;
 $node->append_conf('postgresql.conf', 'autovacuum=off');
+$node->append_conf('postgresql.conf','max_prepared_transactions=10');
 
 # Start the node and load the extensions.  We depend on both
 # amcheck and pageinspect for this test.
@@ -217,7 +221,9 @@ my $rel = $node->safe_psql('postgres',
 my $relpath = "$pgdata/$rel";
 
 # Insert data and freeze public.test
-use constant ROWCOUNT => 16;
+use constant ROWCOUNT => 43 ; # Total row count in this page.
+use constant ROWCOUNT_HOTCHAIN => 27; # Row count related to test of HOT chains validations and redirected LP.
+# First insert data needed for non-HOT chain validation.
 $node->safe_psql(
 	'postgres', qq(
 	INSERT INTO public.test (a, b, c)
@@ -227,7 +233,74 @@ $node->safe_psql(
 			repeat('w', 10000)
 		);
 	VACUUM FREEZE public.test
-	)) for (1 .. ROWCOUNT);
+	)) for (1 .. ROWCOUNT-ROWCOUNT_HOTCHAIN);
+
+# Data for Redirected LP.
+$node->safe_psql(
+	'postgres', qq(
+		INSERT INTO public.test (a, b, c)
+			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg', generate_series(1,2));
+		UPDATE public.test SET c = 'a' WHERE c = '1';
+		UPDATE public.test SET c = 'a' WHERE c = '2';
+		INSERT INTO public.test (a, b, c)
+			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg', generate_series(3,6));
+		UPDATE public.test SET c = 'a' WHERE c = '3';
+		UPDATE public.test SET c = 'a' WHERE c = '4';
+	));
+
+# Negative test case of HOT-pruning with aborted tuple.
+$node->safe_psql(
+        'postgres', qq(
+                BEGIN;
+                        UPDATE public.test SET c = 'a' WHERE c = '5';
+                ABORT;
+		VACUUM FREEZE public.test;
+        ));
+# Now Next update on any tuple will be stored at the same place of tuple inserted by aborted transaction.
+# This should not raise any corruption.
+$node->safe_psql(
+        'postgres', qq(
+                        UPDATE public.test SET c = 'a' WHERE c = '6';
+                VACUUM FREEZE public.test;
+        ));
+
+
+# Data for HOT chains validation, so not calling VACUUM FREEZE.
+$node->safe_psql(
+	'postgres', qq(
+		INSERT INTO public.test (a, b, c)
+			VALUES ( x'DEADF9F9DEADF9F9'::bigint, 'abcdefg', generate_series(7,15));
+		UPDATE public.test SET c = 'a' WHERE c = '7';
+		UPDATE public.test SET c = 'a' WHERE c = '10';
+		UPDATE public.test SET c = 'a' WHERE c = '11';
+		UPDATE public.test SET c = 'a' WHERE c = '12';
+		UPDATE public.test SET c = 'a' WHERE c = '13';
+		UPDATE public.test SET c = 'a' WHERE c = '14';
+		UPDATE public.test SET c = 'a' WHERE c = '15';
+	));
+
+# Need one aborted transaction to test corruption in HOT chains.
+$node->safe_psql(
+	'postgres', qq(
+		BEGIN;
+			UPDATE public.test SET c = 'a' WHERE c = '9';
+		ABORT;
+	));
+
+# Need one in-progress transaction to test few corruption in HOT chains.
+# We are creating PREPARE TRANSACTION here as these will not be aborted
+# even if we stop the node.
+$node->safe_psql(
+	'postgres', qq(
+		BEGIN;
+			PREPARE TRANSACTION 'in_progress_tx';
+	));
+my $in_progress_xid = $node->safe_psql(
+				'postgres', qq(
+					SELECT transaction FROM pg_prepared_xacts;
+				));
+
+
 
 my $relfrozenxid = $node->safe_psql('postgres',
 	q(select relfrozenxid from pg_class where relname = 'test'));
@@ -249,12 +322,21 @@ if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid)
 my @lp_off;
 for my $tup (0 .. ROWCOUNT - 1)
 {
-	push(
-		@lp_off,
-		$node->safe_psql(
-			'postgres', qq(
-select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
-	offset $tup limit 1)));
+	my $islpredirected = $node->safe_psql('postgres',
+		qq(select lp_flags from heap_page_items(get_raw_page('test', 'main', 0)) offset $tup limit 1));
+	if ($islpredirected != 2)
+	{
+		push(
+			@lp_off,
+			$node->safe_psql(
+				'postgres', qq(
+			select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
+				offset $tup limit 1)));
+	}
+	else
+	{
+		push(@lp_off, (-1));
+	}
 }
 
 # Sanity check that our 'test' table on disk layout matches expectations.  If
@@ -271,6 +353,10 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
 {
 	my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
 	my $offset = $lp_off[$tupidx];
+	if ($offset == -1)
+	{
+		next;
+	}
 	my $tup = read_tuple($file, $offset);
 
 	# Sanity-check that the data appears on the page where we expect.
@@ -283,7 +369,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
 		$node->clean_node;
 		plan skip_all =>
 		  sprintf(
-			"Page layout differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")",
+			"Page layout of index %d differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")", $tupidx,
 			0xDEADF9F9, 0xDEADF9F9, "abcdefg", $a_1, $a_2, $b);
 		exit;
 	}
@@ -318,6 +404,9 @@ use constant HEAP_XMAX_INVALID   => 0x0800;
 use constant HEAP_NATTS_MASK     => 0x07FF;
 use constant HEAP_XMAX_IS_MULTI  => 0x1000;
 use constant HEAP_KEYS_UPDATED   => 0x2000;
+use constant HEAP_HOT_UPDATED    => 0x4000;
+use constant HEAP_ONLY_TUPLE     => 0x8000;
+use constant HEAP_UPDATED        => 0x2000;
 
 # Helper function to generate a regular expression matching the header we
 # expect verify_heapam() to return given which fields we expect to be non-null.
@@ -349,9 +438,77 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
 {
 	my $offnum = $tupidx + 1;        # offnum is 1-based, not zero-based
 	my $offset = $lp_off[$tupidx];
+	my $header = header(0, $offnum, undef);
+	# offset -1 means its redirected lp.
+	if ($offset == -1)
+	{	# at offnum 19 we will unset HEAP_ONLY_TUPLE and HEAP_UPDATED flags.
+		if ($offnum == 17)
+		{
+			push @expected,
+			  qr/${header}redirected line pointer points to a non-heap-only tuple at offset \d+/;
+			push @expected,
+			  qr/${header}redirected line pointer points to a non-heap-updated tuple at offset \d+/;
+		}
+		elsif ($offnum == 18)
+		{
+			# we re-set lp offset to 17, we need to rewrite the 4 bytes values so that line pointer will be
+			# lp.off = 17, lp_flags = 2, lp_len = 0.
+			if ($ENDIANNESS eq 'little')
+			{
+				sysseek($file, 92, 0)
+				  or BAIL_OUT("sysseek failed: $!");
+				syswrite(
+					$file,
+					pack("L",
+						0x00010011)
+				) or BAIL_OUT("syswrite failed: $!");
+			}
+			else
+			{
+				sysseek($file, 92, 0)
+				  or BAIL_OUT("sysseek failed: $!");
+				syswrite(
+					$file,
+					pack("L",
+						0x11000100)
+				) or BAIL_OUT("syswrite failed: $!");
+
+			}
+			push @expected,
+			  qr/${header}redirected line pointer points to another redirected line pointer at offset \d+/;
+		}
+		elsif ($offnum == 22)
+		{
+			# we re-set lp offset to 25, we need to rewrite the 4 bytes values so that line pointer will be
+			# lp.off = 25, lp_flags = 2, lp_len = 0.
+			if ($ENDIANNESS eq 'little')
+			{
+				sysseek($file, 108, 0)
+				  or BAIL_OUT("sysseek failed: $!");
+				syswrite(
+					$file,
+					pack("L",
+						0x00010019)
+				) or BAIL_OUT("syswrite failed: $!");
+			}
+			else
+			{
+				sysseek($file, 108, 0)
+				  or BAIL_OUT("sysseek failed: $!");
+				syswrite(
+					$file,
+					pack("L",
+						0x19000100)
+				) or BAIL_OUT("syswrite failed: $!");
+
+			}
+			push @expected,
+			  qr/${header}redirect line pointer points to offset \d+, but offset \d+ also points there/;
+		}
+		next;
+	}
 	my $tup = read_tuple($file, $offset);
 
-	my $header = header(0, $offnum, undef);
 	if ($offnum == 1)
 	{
 		# Corruptly set xmin < relfrozenxid
@@ -502,7 +659,7 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
 		push @expected,
 		  qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/;
 	}
-	elsif ($offnum == 15)    # Last offnum must equal ROWCOUNT
+	elsif ($offnum == 15)
 	{
 		# Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
 		$tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
@@ -512,6 +669,111 @@ for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
 		push @expected,
 		  qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
 	}
+	# Test for redirected line pointer.
+	# offnum 17 and 18 are redirected line pointer, so don't need any tuple
+	# validation.
+	elsif ($offnum == 19)
+	{
+		# unset HEAP_ONLY_TUPLE and HEAP_UPDATED flag.
+		$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
+		$tup->{t_infomask} &= ~HEAP_UPDATED;
+	}
+	# offnum 18 is redirected lp and is redirected to offset 20,
+	# We have corrupted it to route its lp.off to point it to line pointer at
+	# offset 17.
+
+	# Test related to HOT chains.
+	elsif ($offnum == 28)
+	{
+		# Unset HEAP_HOT_UPDATED.
+		$tup->{t_infomask2} &= ~HEAP_HOT_UPDATED;
+		$pred_xmax = $tup->{t_xmax}; # to be used for tuple at offnum 29.
+		$pred_posid = $tup->{ip_posid}; # to be used for tuple at offnum 29.
+		push @expected,
+		  qr/${header}non-heap-only update produced a heap-only tuple at offset \d+/;
+	}
+	elsif ($offnum == 29)
+	{
+		# Set ip_posid and t_xmax from ip_posid and t_xmax of tuple at offnum 28.
+		$tup->{t_xmax} = $pred_xmax;
+		$tup->{ip_posid} = $pred_posid;
+		push @expected,
+		  qr/${header}tuple points to new version at offset \d+, but offset \d+ also points there/;
+	}
+	elsif ($offnum == 30)
+	{
+		# Get aborted xid, that is needed to test corruption at offnum 31.
+		$aborted_xid = $tup->{t_xmax};
+	}
+	elsif ($offnum == 31)
+	{
+		# Set xmin to aborted xid.
+		$tup->{t_xmin} = $aborted_xid;
+		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+		push @expected,
+		  qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
+	}
+	elsif ($offnum == 32)
+	{
+		# Raised corruption as root of HOT chain can't be HEAP_ONLY_TUPLE.
+		# set HEAP_ONLY_TUPLE.
+		$tup->{t_infomask2} |= HEAP_ONLY_TUPLE;
+		push @expected,
+		  qr/${header}tuple is root of chain but is marked as heap-only tuple/;
+	}
+	elsif ($offnum == 33)
+	{
+		# Next updated Tuple at offnum 37 is corrupted.
+		push @expected,
+		  qr/${header}heap-only update produced a non-heap only tuple at offset \d+/;
+	}
+	elsif ($offnum == 34)
+	{
+		# set xmax to invalid transaction id.
+		$tup->{t_xmax} = 0;
+		push @expected,
+		  qr/${header}tuple has been HOT updated, but xmax is 0/;
+	}
+	elsif ($offnum == 35)
+	{
+		# set xmax to invalid transaction id.
+		$tup->{t_xmin} = $in_progress_xid;
+		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+		push @expected,
+		  qr/${header}tuple with in-progress xmin \d+ was updated to produce a tuple at offset \d+ with committed xmin \d+/;
+	}
+	elsif ($offnum == 36)
+	{
+		# set xmax to invalid transaction id.
+		$tup->{t_xmin} = $aborted_xid;
+		$tup->{t_xmax} = $in_progress_xid;
+		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+		push @expected,
+		  qr/${header}tuple with aborted xmin \d+ was updated to produce a tuple at offset \d+ with in-progress xmin \d+/;
+	}
+	# Tuple at offnum 37 is an update of tuple at offnum 28.
+
+	# Tuple at offnum 38 is an update of tuple at offnum 31.
+
+	# Tuple at offnum 39 is an update of tuple at offnum 32.
+
+	elsif($offnum == 40)
+	{
+		# Unset HEAP_ONLY_TUPLE, corrupton will be raised for tuple at offnum #33
+		$tup->{t_infomask2} &= ~HEAP_ONLY_TUPLE;
+	}
+	# Tuple at offnum 41 is an update of corrupted tuple at offnum 34.
+	# Tuple at offnum 42 is an update of corrupted tuple at offnum 35.
+	# Tuple at offnum 43 is an update of tuple at offnum 36..
+	elsif ($offnum == 43)
+	{
+		# set xmax to invalid transaction id.
+		$tup->{t_xmin} = $in_progress_xid;
+		$tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+	}
+	# Tuple at offnum 44 is an update of tuple at offnum 30.
+	# offset 44 is an updated tuple of tuple at offset #30 and was updated by an aborted transaction.
+	# this is needed to have aborted transaction xid to test corruption related to aborted transaction at offset #36.
 	write_tuple($file, $offset, $tup);
 }
 close($file)
@@ -523,6 +785,10 @@ $node->start;
 $node->command_checks_all(
 	[ 'pg_amcheck', '--no-dependent-indexes', '-p', $port, 'postgres' ],
 	2, [@expected], [], 'Expected corruption message output');
+$node->safe_psql(
+        'postgres', qq(
+                        COMMIT PREPARED 'in_progress_tx';
+        ));
 
 $node->teardown_node;
 $node->clean_node;
-- 
2.25.1

Re: HOT chain validation in verify_heapam()

Reply via email to