From 48a11fb4436b58f25914b6726b589e6b34bc083b Mon Sep 17 00:00:00 2001
From: Mark Dilger <mark.dilger@enterprisedb.com>
Date: Sun, 3 Oct 2021 09:52:22 -0700
Subject: [PATCH v3] Fix BUG #17212

Stop attempting to check inappropriate relations from pg_amcheck.
Specifically, temporary tables and indexes belonging to other
sessions, and indexes marked invalid or not ready.  The old behavior
printed an error message for each of these and ultimately exited
with non-zero status.  The tool did not exit early, nor fail to
check relations which were checkable, but users found the error
messages and exit status unfriendly.

While at it, change amcheck's verify_heapam function to skip
unlogged tables with a debug message when run against a server in
recovery.  This solves the problem of pg_amcheck failing against
such servers when they happen to contain unlogged relations.  We fix
it here rather than in pg_amcheck because non-pg_amcheck users will
presumably want the same behavior.

Change verify_nbtree to issue DEBUG1 rather than NOTICE for unlogged
indexes on servers in recovery.  This is the same issue as for
verify_heapam, except that verify_nbtree was already handling this
issue, just with a higher ereport level.
---
 contrib/amcheck/verify_heapam.c         |  15 ++
 contrib/amcheck/verify_nbtree.c         |   2 +-
 doc/src/sgml/ref/pg_amcheck.sgml        |   7 +
 src/bin/pg_amcheck/pg_amcheck.c         |  83 ++++++---
 src/bin/pg_amcheck/t/006_bad_targets.pl | 233 ++++++++++++++++++++++++
 5 files changed, 310 insertions(+), 30 deletions(-)
 create mode 100644 src/bin/pg_amcheck/t/006_bad_targets.pl

diff --git a/contrib/amcheck/verify_heapam.c b/contrib/amcheck/verify_heapam.c
index 91ef09a8ca..0376cf6480 100644
--- a/contrib/amcheck/verify_heapam.c
+++ b/contrib/amcheck/verify_heapam.c
@@ -323,6 +323,21 @@ verify_heapam(PG_FUNCTION_ARGS)
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 				 errmsg("only heap AM is supported")));
 
+	/*
+	 * Early exit for unlogged relations during recovery.  These will have no
+	 * relation fork, so there won't be anything to check.
+	 */
+	if (ctx.rel->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
+		RecoveryInProgress())
+	{
+		ereport(DEBUG1,
+				(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
+				 errmsg("cannot verify unlogged relation \"%s\" during recovery, skipping",
+						RelationGetRelationName(ctx.rel))));
+		relation_close(ctx.rel, AccessShareLock);
+		PG_RETURN_NULL();
+	}
+
 	/* Early exit if the relation is empty */
 	nblocks = RelationGetNumberOfBlocks(ctx.rel);
 	if (!nblocks)
diff --git a/contrib/amcheck/verify_nbtree.c b/contrib/amcheck/verify_nbtree.c
index d19f73127c..35a5c3ae98 100644
--- a/contrib/amcheck/verify_nbtree.c
+++ b/contrib/amcheck/verify_nbtree.c
@@ -383,7 +383,7 @@ btree_index_mainfork_expected(Relation rel)
 		!RecoveryInProgress())
 		return true;
 
-	ereport(NOTICE,
+	ereport(DEBUG1,
 			(errcode(ERRCODE_READ_ONLY_SQL_TRANSACTION),
 			 errmsg("cannot verify unlogged index \"%s\" during recovery, skipping",
 					RelationGetRelationName(rel))));
diff --git a/doc/src/sgml/ref/pg_amcheck.sgml b/doc/src/sgml/ref/pg_amcheck.sgml
index 1fd0ecd911..54ec14ca37 100644
--- a/doc/src/sgml/ref/pg_amcheck.sgml
+++ b/doc/src/sgml/ref/pg_amcheck.sgml
@@ -435,6 +435,13 @@ PostgreSQL documentation
    </variablelist>
   </para>
 
+  <warning>
+   <para>
+    The <option>--parent-check</option> and <option>--rootdescend</option>
+    options will fail when executed against a server in Hot Standby mode.
+   </para>
+  </warning>
+
   <para>
    The following command-line options control the connection to the server:
 
diff --git a/src/bin/pg_amcheck/pg_amcheck.c b/src/bin/pg_amcheck/pg_amcheck.c
index ec04b977de..d4a53c8e63 100644
--- a/src/bin/pg_amcheck/pg_amcheck.c
+++ b/src/bin/pg_amcheck/pg_amcheck.c
@@ -816,6 +816,9 @@ main(int argc, char *argv[])
  * names matching the expectations of verify_heap_slot_handler, which will
  * receive and handle each row returned from the verify_heapam() function.
  *
+ * The constructed SQL command will silently skip temporary tables, as checking
+ * them would needlessly draw errors from the underlying amcheck function.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the heap table to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -825,10 +828,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
 	resetPQExpBuffer(sql);
 	appendPQExpBuffer(sql,
-					  "SELECT blkno, offnum, attnum, msg FROM %s.verify_heapam("
-					  "\nrelation := %u, on_error_stop := %s, check_toast := %s, skip := '%s'",
+					  "SELECT v.blkno, v.offnum, v.attnum, v.msg "
+					  "FROM pg_catalog.pg_class c, %s.verify_heapam("
+					  "\nrelation := c.oid, on_error_stop := %s, check_toast := %s, skip := '%s'",
 					  rel->datinfo->amcheck_schema,
-					  rel->reloid,
 					  opts.on_error_stop ? "true" : "false",
 					  opts.reconcile_toast ? "true" : "false",
 					  opts.skip);
@@ -838,7 +841,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 	if (opts.endblock >= 0)
 		appendPQExpBuffer(sql, ", endblock := " INT64_FORMAT, opts.endblock);
 
-	appendPQExpBufferChar(sql, ')');
+	appendPQExpBuffer(sql,
+					  "\n) v WHERE c.oid = %u "
+					  "AND c.relpersistence != 't'",
+					  rel->reloid);
 }
 
 /*
@@ -849,6 +855,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
  * functions do not return any, but rather return corruption information by
  * raising errors, which verify_btree_slot_handler expects.
  *
+ * The constructed SQL command will silently skip temporary indexes, and
+ * indexes being reindexed concurrently, as checking them would needlessly draw
+ * errors from the underlying amcheck functions.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the index to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -858,27 +868,31 @@ prepare_btree_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
 	resetPQExpBuffer(sql);
 
-	/*
-	 * Embed the database, schema, and relation name in the query, so if the
-	 * check throws an error, the user knows which relation the error came
-	 * from.
-	 */
 	if (opts.parent_check)
 		appendPQExpBuffer(sql,
-						  "SELECT * FROM %s.bt_index_parent_check("
-						  "index := '%u'::regclass, heapallindexed := %s, "
-						  "rootdescend := %s)",
+						  "SELECT %s.bt_index_parent_check("
+						  "index := c.oid, heapallindexed := %s, rootdescend := %s)"
+						  "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+						  "WHERE c.oid = %u "
+						  "AND c.oid = i.indexrelid "
+						  "AND c.relpersistence != 't' "
+						  "AND i.indisready AND i.indisvalid AND i.indislive",
 						  rel->datinfo->amcheck_schema,
-						  rel->reloid,
 						  (opts.heapallindexed ? "true" : "false"),
-						  (opts.rootdescend ? "true" : "false"));
+						  (opts.rootdescend ? "true" : "false"),
+						  rel->reloid);
 	else
 		appendPQExpBuffer(sql,
-						  "SELECT * FROM %s.bt_index_check("
-						  "index := '%u'::regclass, heapallindexed := %s)",
+						  "SELECT %s.bt_index_check("
+						  "index := c.oid, heapallindexed := %s)"
+						  "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+						  "WHERE c.oid = %u "
+						  "AND c.oid = i.indexrelid "
+						  "AND c.relpersistence != 't' "
+						  "AND i.indisready AND i.indisvalid AND i.indislive",
 						  rel->datinfo->amcheck_schema,
-						  rel->reloid,
-						  (opts.heapallindexed ? "true" : "false"));
+						  (opts.heapallindexed ? "true" : "false"),
+						  rel->reloid);
 }
 
 /*
@@ -1086,15 +1100,17 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
 
 	if (PQresultStatus(res) == PGRES_TUPLES_OK)
 	{
-		int			ntups = PQntuples(res);
+		int                     ntups = PQntuples(res);
 
-		if (ntups != 1)
+		if (ntups > 1)
 		{
 			/*
 			 * We expect the btree checking functions to return one void row
-			 * each, so we should output some sort of warning if we get
-			 * anything else, not because it indicates corruption, but because
-			 * it suggests a mismatch between amcheck and pg_amcheck versions.
+			 * each, or zero rows if the check was skipped due to the object
+			 * being in the wrong state to be checked, so we should output some
+			 * sort of warning if we get anything more, not because it
+			 * indicates corruption, but because it suggests a mismatch between
+			 * amcheck and pg_amcheck versions.
 			 *
 			 * In conjunction with --progress, anything written to stderr at
 			 * this time would present strangely to the user without an extra
@@ -1889,10 +1905,16 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "\nAND (c.relam = %u OR NOT ep.btree_only OR ep.rel_regex IS NULL)",
 						  HEAP_TABLE_AM_OID, BTREE_AM_OID);
 
+	/*
+	 * Exclude temporary tables and indexes, which must necessarily belong to
+	 * other sessions.  (We don't create any ourselves.)  We must ultimately
+	 * exclude indexes marked invalid or not ready, but we delay that decision
+	 * until firing off the amcheck command, as the state of an index may
+	 * change by then.
+	 */
+	appendPQExpBufferStr(&sql, "\nWHERE c.relpersistence != 't'");
 	if (opts.excludetbl || opts.excludeidx || opts.excludensp)
-		appendPQExpBufferStr(&sql, "\nWHERE ep.pattern_id IS NULL");
-	else
-		appendPQExpBufferStr(&sql, "\nWHERE true");
+		appendPQExpBufferStr(&sql, "\nAND ep.pattern_id IS NULL");
 
 	/*
 	 * We need to be careful not to break the --no-dependent-toast and
@@ -1944,7 +1966,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 								 "\nON ('pg_toast' ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
 								 "\nAND (t.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
 								 "\nAND ep.heap_only"
-								 "\nWHERE ep.pattern_id IS NULL");
+								 "\nWHERE ep.pattern_id IS NULL"
+								 "\nAND t.relpersistence != 't'");
 		appendPQExpBufferStr(&sql,
 							 "\n)");
 	}
@@ -1962,7 +1985,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "\nINNER JOIN pg_catalog.pg_index i "
 						  "ON r.oid = i.indrelid "
 						  "INNER JOIN pg_catalog.pg_class c "
-						  "ON i.indexrelid = c.oid");
+						  "ON i.indexrelid = c.oid "
+						  "AND c.relpersistence != 't'");
 		if (opts.excludeidx || opts.excludensp)
 			appendPQExpBufferStr(&sql,
 								 "\nINNER JOIN pg_catalog.pg_namespace n "
@@ -2000,7 +2024,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "INNER JOIN pg_catalog.pg_index i "
 						  "ON t.oid = i.indrelid"
 						  "\nINNER JOIN pg_catalog.pg_class c "
-						  "ON i.indexrelid = c.oid");
+						  "ON i.indexrelid = c.oid "
+						  "AND c.relpersistence != 't'");
 		if (opts.excludeidx)
 			appendPQExpBufferStr(&sql,
 								 "\nLEFT OUTER JOIN exclude_pat ep "
diff --git a/src/bin/pg_amcheck/t/006_bad_targets.pl b/src/bin/pg_amcheck/t/006_bad_targets.pl
new file mode 100644
index 0000000000..e89d7d1be8
--- /dev/null
+++ b/src/bin/pg_amcheck/t/006_bad_targets.pl
@@ -0,0 +1,233 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# This regression test checks the behavior of pg_amcheck in the presence of
+# inappropriate target relations.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 52;
+
+# Establish a primary and standby server, with temporary and unlogged tables.
+# The temporary tables should not be checked on either system, as pg_amcheck's
+# sessions won't be able to see their contents.  The unlogged tables should not
+# be checked on the standby, as they won't have relation forks there.
+#
+my $node_primary = PostgresNode->new('primary');
+$node_primary->init(
+	allows_streaming => 1,
+	auth_extra       => [ '--create-role', 'repl_role' ]);
+$node_primary->start;
+$node_primary->safe_psql('postgres', qq(
+CREATE EXTENSION amcheck;
+CREATE UNLOGGED TABLE unlogged_heap (i INTEGER[], b BOX);
+INSERT INTO unlogged_heap (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX unlogged_btree ON unlogged_heap USING BTREE (i);
+CREATE INDEX unlogged_brin ON unlogged_heap USING BRIN (b);
+CREATE INDEX unlogged_gin ON unlogged_heap USING GIN (i);
+CREATE INDEX unlogged_gist ON unlogged_heap USING GIST (b);
+CREATE INDEX unlogged_hash ON unlogged_heap USING HASH (i);
+));
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Hold open a session with temporary tables and indexes defined
+#
+my $in = '';
+my $out = '';
+my $timer = IPC::Run::timeout(180);
+my $h = $node_primary->background_psql('postgres', \$in, \$out, $timer);
+$in = qq(
+BEGIN;
+CREATE TEMPORARY TABLE heap_temporary (i INTEGER[], b BOX) ON COMMIT PRESERVE ROWS;
+INSERT INTO heap_temporary (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX btree_temporary ON heap_temporary USING BTREE (i);
+CREATE INDEX brin_temporary ON heap_temporary USING BRIN (b);
+CREATE INDEX gin_temporary ON heap_temporary USING GIN (i);
+CREATE INDEX gist_temporary ON heap_temporary USING GIST (b);
+CREATE INDEX hash_temporary ON heap_temporary USING HASH (i);
+COMMIT;
+BEGIN;
+);
+$h->pump_nb;
+
+my $node_standby = PostgresNode->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+	has_streaming => 1);
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby, 'replay',
+	$node_primary->lsn('replay'));
+
+# Check that running amcheck functions from SQL against inappropriate targets
+# fails sensibly.  This portion of the test arguably belongs in contrib/amcheck
+# rather than here, but we have already set up the necessary test environment,
+# so check here rather than duplicating the environment there.
+#
+my ($stdout, $stderr);
+for my $test (
+	[ $node_standby => "SELECT * FROM verify_heapam('unlogged_heap')",
+	  qr/^$/,
+	 "checking unlogged table during recovery does not complain" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_btree')",
+	  qr/^$/,
+	 "checking unlogged btree index during recovery does not complain" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_brin')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_brin" is not a B-Tree index/s,
+	 "checking unlogged brin index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_gin')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gin" is not a B-Tree index/s,
+	 "checking unlogged gin index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_gist')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gist" is not a B-Tree index/s,
+	 "checking unlogged gist index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_hash')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_hash" is not a B-Tree index/s,
+	 "checking unlogged hash index during recovery fails appropriately" ],
+
+	)
+{
+	$test->[0]->psql('postgres', $test->[1],
+					 stdout => \$stdout, stderr => \$stderr);
+	like ($stderr, $test->[2], $test->[3]);
+}
+
+# Verify that --all excludes the temporary relations and handles unlogged
+# relations as appropriate, without raising any warnings or exiting with a
+# non-zero status.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck all objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck all objects on standby');
+
+# Verify that explicitly asking for unlogged relations to be checked does not
+# raise any warnings or exit with a non-zero exit status, even when they cannot
+# be checked due to recovery being in progress.
+#
+# These relations will have no relation fork during recovery, so even without
+# checking them, we can say (by definition) that they are not corrupt, because
+# it is meaningless to declare a non-existent relation fork corrupt.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*unlogged*'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck unlogged objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*unlogged*'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck unlogged objects on standby');
+
+# Verify that the --heapallindexed check works on both primary and standby.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --helpallindexed on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --helpallindexed on standby');
+
+# Verify that the --parent-check and --rootdescend options work on the primary.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --rootdescend on primary');
+
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --parent-check on primary');
+
+# Check that the failures on the standby from using --parent-check and
+# --rootdescend are the failures we expect
+#
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+	2,
+	[ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+	  qr/btree index "postgres\.pg_catalog\./,
+	  qr/btree index "postgres\.pg_toast\./,
+	],
+	[ qr/^$/ ],
+	'pg_amcheck --rootdescend on standby');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+	2,
+	[ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+	  qr/btree index "postgres\.pg_catalog\./,
+	  qr/btree index "postgres\.pg_toast\./,
+	],
+	[ qr/^$/ ],
+	'pg_amcheck --parent-check on standby');
+
+# Bug #17212
+#
+# Verify that explicitly asking for another session's temporary relations to be
+# checked fails, but only in the sense that nothing matched the parameter.  We
+# don't complain that they are uncheckable, only that you gave a --relation
+# option and we didn't find anything checkable matching the pattern.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on standby');
+
+# Verify that a relation pattern which only matches temporary relations draws
+# an error, even when other relation patterns are ok.  This differs from the
+# test above in that the set of all relations to check is not empty.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on standby');
-- 
2.21.1 (Apple Git-122.3)

