From 054875a59b14eb49e7cf6d82f9f95da5240d4c7d Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Mon, 11 Oct 2021 16:20:34 -0700
Subject: [PATCH v4] pg_amcheck: avoid unhelpful verification attempts.

Avoid calling contrib/amcheck functions with relations that are
unsuitable for checking.  Specifically, don't attempt verification of
temporary relations, or indexes whose pg_index entry indicates that the
index is invalid, or not ready.

These relations are not supported by any of the contrib/amcheck
functions, for reasons that are pretty fundamental.  For example, the
implementation of REINDEX CONCURRENTLY can add its own "transient"
pg_index entries, which has rather unclear implications for the B-Tree
verification functions, at least in the general case -- so they just
treat it as an error.  It falls to the amcheck caller (in this case
pg_amcheck) to deal with the situation at a higher level.

pg_amcheck now simply treats these conditions as additional "visibility
concerns" when it queries system catalogs.  This is a little arbitrary.
It seems to have the least problems among any of the available
alternatives.

Author: Mark Dilger <mark.dilger@enterprisedb.com>
Reported-By: Alexander Lakhin <exclusion@gmail.com>
Reviewed-By: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Robert Haas <robertmhaas@gmail.com>
Bug: #17212
Discussion: https://postgr.es/m/17212-34dd4a1d6bba98bf@postgresql.org
Backpatch: 14-, where pg_amcheck was introduced.
---
 src/bin/pg_amcheck/pg_amcheck.c         |  83 ++++++---
 src/bin/pg_amcheck/t/006_bad_targets.pl | 233 ++++++++++++++++++++++++
 doc/src/sgml/ref/pg_amcheck.sgml        |   7 +
 3 files changed, 294 insertions(+), 29 deletions(-)
 create mode 100644 src/bin/pg_amcheck/t/006_bad_targets.pl

diff --git a/src/bin/pg_amcheck/pg_amcheck.c b/src/bin/pg_amcheck/pg_amcheck.c
index ec04b977d..d4a53c8e6 100644
--- a/src/bin/pg_amcheck/pg_amcheck.c
+++ b/src/bin/pg_amcheck/pg_amcheck.c
@@ -816,6 +816,9 @@ main(int argc, char *argv[])
  * names matching the expectations of verify_heap_slot_handler, which will
  * receive and handle each row returned from the verify_heapam() function.
  *
+ * The constructed SQL command will silently skip temporary tables, as checking
+ * them would needlessly draw errors from the underlying amcheck function.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the heap table to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -825,10 +828,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
 	resetPQExpBuffer(sql);
 	appendPQExpBuffer(sql,
-					  "SELECT blkno, offnum, attnum, msg FROM %s.verify_heapam("
-					  "\nrelation := %u, on_error_stop := %s, check_toast := %s, skip := '%s'",
+					  "SELECT v.blkno, v.offnum, v.attnum, v.msg "
+					  "FROM pg_catalog.pg_class c, %s.verify_heapam("
+					  "\nrelation := c.oid, on_error_stop := %s, check_toast := %s, skip := '%s'",
 					  rel->datinfo->amcheck_schema,
-					  rel->reloid,
 					  opts.on_error_stop ? "true" : "false",
 					  opts.reconcile_toast ? "true" : "false",
 					  opts.skip);
@@ -838,7 +841,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 	if (opts.endblock >= 0)
 		appendPQExpBuffer(sql, ", endblock := " INT64_FORMAT, opts.endblock);
 
-	appendPQExpBufferChar(sql, ')');
+	appendPQExpBuffer(sql,
+					  "\n) v WHERE c.oid = %u "
+					  "AND c.relpersistence != 't'",
+					  rel->reloid);
 }
 
 /*
@@ -849,6 +855,10 @@ prepare_heap_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
  * functions do not return any, but rather return corruption information by
  * raising errors, which verify_btree_slot_handler expects.
  *
+ * The constructed SQL command will silently skip temporary indexes, and
+ * indexes being reindexed concurrently, as checking them would needlessly draw
+ * errors from the underlying amcheck functions.
+ *
  * sql: buffer into which the heap table checking command will be written
  * rel: relation information for the index to be checked
  * conn: the connection to be used, for string escaping purposes
@@ -858,27 +868,31 @@ prepare_btree_command(PQExpBuffer sql, RelationInfo *rel, PGconn *conn)
 {
 	resetPQExpBuffer(sql);
 
-	/*
-	 * Embed the database, schema, and relation name in the query, so if the
-	 * check throws an error, the user knows which relation the error came
-	 * from.
-	 */
 	if (opts.parent_check)
 		appendPQExpBuffer(sql,
-						  "SELECT * FROM %s.bt_index_parent_check("
-						  "index := '%u'::regclass, heapallindexed := %s, "
-						  "rootdescend := %s)",
+						  "SELECT %s.bt_index_parent_check("
+						  "index := c.oid, heapallindexed := %s, rootdescend := %s)"
+						  "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+						  "WHERE c.oid = %u "
+						  "AND c.oid = i.indexrelid "
+						  "AND c.relpersistence != 't' "
+						  "AND i.indisready AND i.indisvalid AND i.indislive",
 						  rel->datinfo->amcheck_schema,
-						  rel->reloid,
 						  (opts.heapallindexed ? "true" : "false"),
-						  (opts.rootdescend ? "true" : "false"));
+						  (opts.rootdescend ? "true" : "false"),
+						  rel->reloid);
 	else
 		appendPQExpBuffer(sql,
-						  "SELECT * FROM %s.bt_index_check("
-						  "index := '%u'::regclass, heapallindexed := %s)",
+						  "SELECT %s.bt_index_check("
+						  "index := c.oid, heapallindexed := %s)"
+						  "\nFROM pg_catalog.pg_class c, pg_catalog.pg_index i "
+						  "WHERE c.oid = %u "
+						  "AND c.oid = i.indexrelid "
+						  "AND c.relpersistence != 't' "
+						  "AND i.indisready AND i.indisvalid AND i.indislive",
 						  rel->datinfo->amcheck_schema,
-						  rel->reloid,
-						  (opts.heapallindexed ? "true" : "false"));
+						  (opts.heapallindexed ? "true" : "false"),
+						  rel->reloid);
 }
 
 /*
@@ -1086,15 +1100,17 @@ verify_btree_slot_handler(PGresult *res, PGconn *conn, void *context)
 
 	if (PQresultStatus(res) == PGRES_TUPLES_OK)
 	{
-		int			ntups = PQntuples(res);
+		int                     ntups = PQntuples(res);
 
-		if (ntups != 1)
+		if (ntups > 1)
 		{
 			/*
 			 * We expect the btree checking functions to return one void row
-			 * each, so we should output some sort of warning if we get
-			 * anything else, not because it indicates corruption, but because
-			 * it suggests a mismatch between amcheck and pg_amcheck versions.
+			 * each, or zero rows if the check was skipped due to the object
+			 * being in the wrong state to be checked, so we should output some
+			 * sort of warning if we get anything more, not because it
+			 * indicates corruption, but because it suggests a mismatch between
+			 * amcheck and pg_amcheck versions.
 			 *
 			 * In conjunction with --progress, anything written to stderr at
 			 * this time would present strangely to the user without an extra
@@ -1889,10 +1905,16 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "\nAND (c.relam = %u OR NOT ep.btree_only OR ep.rel_regex IS NULL)",
 						  HEAP_TABLE_AM_OID, BTREE_AM_OID);
 
+	/*
+	 * Exclude temporary tables and indexes, which must necessarily belong to
+	 * other sessions.  (We don't create any ourselves.)  We must ultimately
+	 * exclude indexes marked invalid or not ready, but we delay that decision
+	 * until firing off the amcheck command, as the state of an index may
+	 * change by then.
+	 */
+	appendPQExpBufferStr(&sql, "\nWHERE c.relpersistence != 't'");
 	if (opts.excludetbl || opts.excludeidx || opts.excludensp)
-		appendPQExpBufferStr(&sql, "\nWHERE ep.pattern_id IS NULL");
-	else
-		appendPQExpBufferStr(&sql, "\nWHERE true");
+		appendPQExpBufferStr(&sql, "\nAND ep.pattern_id IS NULL");
 
 	/*
 	 * We need to be careful not to break the --no-dependent-toast and
@@ -1944,7 +1966,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 								 "\nON ('pg_toast' ~ ep.nsp_regex OR ep.nsp_regex IS NULL)"
 								 "\nAND (t.relname ~ ep.rel_regex OR ep.rel_regex IS NULL)"
 								 "\nAND ep.heap_only"
-								 "\nWHERE ep.pattern_id IS NULL");
+								 "\nWHERE ep.pattern_id IS NULL"
+								 "\nAND t.relpersistence != 't'");
 		appendPQExpBufferStr(&sql,
 							 "\n)");
 	}
@@ -1962,7 +1985,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "\nINNER JOIN pg_catalog.pg_index i "
 						  "ON r.oid = i.indrelid "
 						  "INNER JOIN pg_catalog.pg_class c "
-						  "ON i.indexrelid = c.oid");
+						  "ON i.indexrelid = c.oid "
+						  "AND c.relpersistence != 't'");
 		if (opts.excludeidx || opts.excludensp)
 			appendPQExpBufferStr(&sql,
 								 "\nINNER JOIN pg_catalog.pg_namespace n "
@@ -2000,7 +2024,8 @@ compile_relation_list_one_db(PGconn *conn, SimplePtrList *relations,
 						  "INNER JOIN pg_catalog.pg_index i "
 						  "ON t.oid = i.indrelid"
 						  "\nINNER JOIN pg_catalog.pg_class c "
-						  "ON i.indexrelid = c.oid");
+						  "ON i.indexrelid = c.oid "
+						  "AND c.relpersistence != 't'");
 		if (opts.excludeidx)
 			appendPQExpBufferStr(&sql,
 								 "\nLEFT OUTER JOIN exclude_pat ep "
diff --git a/src/bin/pg_amcheck/t/006_bad_targets.pl b/src/bin/pg_amcheck/t/006_bad_targets.pl
new file mode 100644
index 000000000..e89d7d1be
--- /dev/null
+++ b/src/bin/pg_amcheck/t/006_bad_targets.pl
@@ -0,0 +1,233 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# This regression test checks the behavior of pg_amcheck in the presence of
+# inappropriate target relations.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 52;
+
+# Establish a primary and standby server, with temporary and unlogged tables.
+# The temporary tables should not be checked on either system, as pg_amcheck's
+# sessions won't be able to see their contents.  The unlogged tables should not
+# be checked on the standby, as they won't have relation forks there.
+#
+my $node_primary = PostgresNode->new('primary');
+$node_primary->init(
+	allows_streaming => 1,
+	auth_extra       => [ '--create-role', 'repl_role' ]);
+$node_primary->start;
+$node_primary->safe_psql('postgres', qq(
+CREATE EXTENSION amcheck;
+CREATE UNLOGGED TABLE unlogged_heap (i INTEGER[], b BOX);
+INSERT INTO unlogged_heap (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX unlogged_btree ON unlogged_heap USING BTREE (i);
+CREATE INDEX unlogged_brin ON unlogged_heap USING BRIN (b);
+CREATE INDEX unlogged_gin ON unlogged_heap USING GIN (i);
+CREATE INDEX unlogged_gist ON unlogged_heap USING GIST (b);
+CREATE INDEX unlogged_hash ON unlogged_heap USING HASH (i);
+));
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Hold open a session with temporary tables and indexes defined
+#
+my $in = '';
+my $out = '';
+my $timer = IPC::Run::timeout(180);
+my $h = $node_primary->background_psql('postgres', \$in, \$out, $timer);
+$in = qq(
+BEGIN;
+CREATE TEMPORARY TABLE heap_temporary (i INTEGER[], b BOX) ON COMMIT PRESERVE ROWS;
+INSERT INTO heap_temporary (i, b) VALUES (ARRAY[1,2,3]::INTEGER[], '((1,2),(3,4))'::BOX);
+CREATE INDEX btree_temporary ON heap_temporary USING BTREE (i);
+CREATE INDEX brin_temporary ON heap_temporary USING BRIN (b);
+CREATE INDEX gin_temporary ON heap_temporary USING GIN (i);
+CREATE INDEX gist_temporary ON heap_temporary USING GIST (b);
+CREATE INDEX hash_temporary ON heap_temporary USING HASH (i);
+COMMIT;
+BEGIN;
+);
+$h->pump_nb;
+
+my $node_standby = PostgresNode->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+	has_streaming => 1);
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby, 'replay',
+	$node_primary->lsn('replay'));
+
+# Check that running amcheck functions from SQL against inappropriate targets
+# fails sensibly.  This portion of the test arguably belongs in contrib/amcheck
+# rather than here, but we have already set up the necessary test environment,
+# so check here rather than duplicating the environment there.
+#
+my ($stdout, $stderr);
+for my $test (
+	[ $node_standby => "SELECT * FROM verify_heapam('unlogged_heap')",
+	  qr/^$/,
+	 "checking unlogged table during recovery does not complain" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_btree')",
+	  qr/^$/,
+	 "checking unlogged btree index during recovery does not complain" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_brin')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_brin" is not a B-Tree index/s,
+	 "checking unlogged brin index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_gin')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gin" is not a B-Tree index/s,
+	 "checking unlogged gin index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_gist')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_gist" is not a B-Tree index/s,
+	 "checking unlogged gist index during recovery fails appropriately" ],
+
+	[ $node_standby => "SELECT * FROM bt_index_check('unlogged_hash')",
+	  qr/ERROR:  only B-Tree indexes are supported as targets for verification.*DETAIL:  Relation "unlogged_hash" is not a B-Tree index/s,
+	 "checking unlogged hash index during recovery fails appropriately" ],
+
+	)
+{
+	$test->[0]->psql('postgres', $test->[1],
+					 stdout => \$stdout, stderr => \$stderr);
+	like ($stderr, $test->[2], $test->[3]);
+}
+
+# Verify that --all excludes the temporary relations and handles unlogged
+# relations as appropriate, without raising any warnings or exiting with a
+# non-zero status.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck all objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck all objects on standby');
+
+# Verify that explicitly asking for unlogged relations to be checked does not
+# raise any warnings or exit with a non-zero exit status, even when they cannot
+# be checked due to recovery being in progress.
+#
+# These relations will have no relation fork during recovery, so even without
+# checking them, we can say (by definition) that they are not corrupt, because
+# it is meaningless to declare a non-existent relation fork corrupt.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*unlogged*'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck unlogged objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*unlogged*'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck unlogged objects on standby');
+
+# Verify that the --heapallindexed check works on both primary and standby.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --helpallindexed on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--heapallindexed'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --helpallindexed on standby');
+
+# Verify that the --parent-check and --rootdescend options work on the primary.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --rootdescend on primary');
+
+$node_primary->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+	0,
+	[ qr/^$/ ],
+	[ qr/^$/ ],
+	'pg_amcheck --parent-check on primary');
+
+# Check that the failures on the standby from using --parent-check and
+# --rootdescend are the failures we expect
+#
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--rootdescend'],
+	2,
+	[ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+	  qr/btree index "postgres\.pg_catalog\./,
+	  qr/btree index "postgres\.pg_toast\./,
+	],
+	[ qr/^$/ ],
+	'pg_amcheck --rootdescend on standby');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--all', '-D', 'template1', '--parent-check'],
+	2,
+	[ qr/ERROR:  cannot acquire lock mode ShareLock on database objects while recovery is in progress/,
+	  qr/btree index "postgres\.pg_catalog\./,
+	  qr/btree index "postgres\.pg_toast\./,
+	],
+	[ qr/^$/ ],
+	'pg_amcheck --parent-check on standby');
+
+# Bug #17212
+#
+# Verify that explicitly asking for another session's temporary relations to be
+# checked fails, but only in the sense that nothing matched the parameter.  We
+# don't complain that they are uncheckable, only that you gave a --relation
+# option and we didn't find anything checkable matching the pattern.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on standby');
+
+# Verify that a relation pattern which only matches temporary relations draws
+# an error, even when other relation patterns are ok.  This differs from the
+# test above in that the set of all relations to check is not empty.
+#
+$node_primary->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on primary');
+
+$node_standby->command_checks_all(
+	['pg_amcheck', '--relation', '*temporary*', '--relation', '*unlogged*'],
+	1,
+	[ qr/^$/ ],
+	[ qr/error: no relations to check matching "\*temporary\*"/ ],
+	'pg_amcheck temporary objects on standby');
diff --git a/doc/src/sgml/ref/pg_amcheck.sgml b/doc/src/sgml/ref/pg_amcheck.sgml
index 1fd0ecd91..54ec14ca3 100644
--- a/doc/src/sgml/ref/pg_amcheck.sgml
+++ b/doc/src/sgml/ref/pg_amcheck.sgml
@@ -435,6 +435,13 @@ PostgreSQL documentation
    </variablelist>
   </para>
 
+  <warning>
+   <para>
+    The <option>--parent-check</option> and <option>--rootdescend</option>
+    options will fail when executed against a server in Hot Standby mode.
+   </para>
+  </warning>
+
   <para>
    The following command-line options control the connection to the server:
 
-- 
2.30.2

