From 3b6b704f2236e0bc3a08d7173a57aec8de9207a5 Mon Sep 17 00:00:00 2001
From: "Andrey M. Borodin" <x4mmm@flight.local>
Date: Sat, 23 Jul 2022 14:17:44 +0500
Subject: [PATCH v23 2/3] Add gist_index_check() function to verify GiST index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This function traverses GiST with a depth-fisrt search and checks
that all downlink tuples are included into parent tuple keyspace.
This traverse takes lock of any page until some discapency found.
To re-check suspicious pair of parent and child tuples it aqcuires
locks on both parent and child pages in the same order as page
split does.

Author: Andrey Borodin <amborodin@acm.org>
Author: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-By: José Villanova <jose.arthur@gmail.com>
Reviewed-By: Aleksander Alekseev <aleksander@timescale.com>
Reviewed-By: Nikolay Samokhvalov <samokhvalov@gmail.com>
Reviewed-By: Andres Freund <andres@anarazel.de>
Discussion: https://postgr.es/m/45AC9B0A-2B45-40EE-B08F-BDCF5739D1E1%40yandex-team.ru
---
 contrib/amcheck/Makefile                |   6 +-
 contrib/amcheck/amcheck--1.3--1.4.sql   |  14 +
 contrib/amcheck/amcheck.control         |   2 +-
 contrib/amcheck/expected/check_gist.out | 119 +++++
 contrib/amcheck/meson.build             |   3 +
 contrib/amcheck/sql/check_gist.sql      |  42 ++
 contrib/amcheck/verify_gist.c           | 581 ++++++++++++++++++++++++
 doc/src/sgml/amcheck.sgml               |  19 +
 8 files changed, 783 insertions(+), 3 deletions(-)
 create mode 100644 contrib/amcheck/amcheck--1.3--1.4.sql
 create mode 100644 contrib/amcheck/expected/check_gist.out
 create mode 100644 contrib/amcheck/sql/check_gist.sql
 create mode 100644 contrib/amcheck/verify_gist.c

diff --git a/contrib/amcheck/Makefile b/contrib/amcheck/Makefile
index 6d26551fe3..e9e0198276 100644
--- a/contrib/amcheck/Makefile
+++ b/contrib/amcheck/Makefile
@@ -4,14 +4,16 @@ MODULE_big	= amcheck
 OBJS = \
 	$(WIN32RES) \
 	amcheck.o \
+	verify_gist.o \
 	verify_heapam.o \
 	verify_nbtree.o
 
 EXTENSION = amcheck
-DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql
+DATA = amcheck--1.2--1.3.sql amcheck--1.1--1.2.sql amcheck--1.0--1.1.sql amcheck--1.0.sql \
+		amcheck--1.3--1.4.sql
 PGFILEDESC = "amcheck - function for verifying relation integrity"
 
-REGRESS = check check_btree check_heap
+REGRESS = check check_btree check_gist check_heap
 
 TAP_TESTS = 1
 
diff --git a/contrib/amcheck/amcheck--1.3--1.4.sql b/contrib/amcheck/amcheck--1.3--1.4.sql
new file mode 100644
index 0000000000..5d30784b44
--- /dev/null
+++ b/contrib/amcheck/amcheck--1.3--1.4.sql
@@ -0,0 +1,14 @@
+/* contrib/amcheck/amcheck--1.3--1.4.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "ALTER EXTENSION amcheck UPDATE TO '1.4'" to load this file. \quit
+
+
+-- gist_index_check()
+--
+CREATE FUNCTION gist_index_check(index regclass, heapallindexed boolean)
+RETURNS VOID
+AS 'MODULE_PATHNAME', 'gist_index_check'
+LANGUAGE C STRICT;
+
+REVOKE ALL ON FUNCTION gist_index_check(regclass, boolean) FROM PUBLIC;
\ No newline at end of file
diff --git a/contrib/amcheck/amcheck.control b/contrib/amcheck/amcheck.control
index ab50931f75..e67ace01c9 100644
--- a/contrib/amcheck/amcheck.control
+++ b/contrib/amcheck/amcheck.control
@@ -1,5 +1,5 @@
 # amcheck extension
 comment = 'functions for verifying relation integrity'
-default_version = '1.3'
+default_version = '1.4'
 module_pathname = '$libdir/amcheck'
 relocatable = true
diff --git a/contrib/amcheck/expected/check_gist.out b/contrib/amcheck/expected/check_gist.out
new file mode 100644
index 0000000000..4f3baa3776
--- /dev/null
+++ b/contrib/amcheck/expected/check_gist.out
@@ -0,0 +1,119 @@
+SELECT setseed(1);
+ setseed 
+---------
+ 
+(1 row)
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', false);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx1', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+SELECT gist_index_check('gist_check_idx2', true);
+ gist_index_check 
+------------------
+ 
+(1 row)
+
+-- cleanup
+DROP TABLE gist_check;
diff --git a/contrib/amcheck/meson.build b/contrib/amcheck/meson.build
index cd81cbf3bc..9e7ebc0499 100644
--- a/contrib/amcheck/meson.build
+++ b/contrib/amcheck/meson.build
@@ -2,6 +2,7 @@
 
 amcheck_sources = files(
   'amcheck.c',
+  'verify_gist.c',
   'verify_heapam.c',
   'verify_nbtree.c',
 )
@@ -24,6 +25,7 @@ install_data(
   'amcheck--1.0--1.1.sql',
   'amcheck--1.1--1.2.sql',
   'amcheck--1.2--1.3.sql',
+  'amcheck--1.3--1.4.sql',
   kwargs: contrib_data_args,
 )
 
@@ -35,6 +37,7 @@ tests += {
     'sql': [
       'check',
       'check_btree',
+      'check_gist',
       'check_heap',
     ],
   },
diff --git a/contrib/amcheck/sql/check_gist.sql b/contrib/amcheck/sql/check_gist.sql
new file mode 100644
index 0000000000..0e3a8cf3bb
--- /dev/null
+++ b/contrib/amcheck/sql/check_gist.sql
@@ -0,0 +1,42 @@
+
+SELECT setseed(1);
+
+-- Test that index built with bulk load is correct
+CREATE TABLE gist_check AS SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+CREATE INDEX gist_check_idx1 ON gist_check USING gist(c);
+CREATE INDEX gist_check_idx2 ON gist_check USING gist(c) INCLUDE(p);
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after inserts
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+-- Test that index is correct after vacuuming
+DELETE FROM gist_check WHERE c[1] < 5000; -- delete clustered data
+DELETE FROM gist_check WHERE c[1]::int % 2 = 0; -- delete scattered data
+
+-- We need two passes through the index and one global vacuum to actually
+-- reuse page
+VACUUM gist_check;
+VACUUM;
+
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+
+
+-- Test that index is correct after reusing pages
+INSERT INTO gist_check SELECT point(random(),s) c, random() p FROM generate_series(1,10000) s;
+SELECT gist_index_check('gist_check_idx1', false);
+SELECT gist_index_check('gist_check_idx2', false);
+SELECT gist_index_check('gist_check_idx1', true);
+SELECT gist_index_check('gist_check_idx2', true);
+-- cleanup
+DROP TABLE gist_check;
diff --git a/contrib/amcheck/verify_gist.c b/contrib/amcheck/verify_gist.c
new file mode 100644
index 0000000000..9776969b4c
--- /dev/null
+++ b/contrib/amcheck/verify_gist.c
@@ -0,0 +1,581 @@
+/*-------------------------------------------------------------------------
+ *
+ * verify_gist.c
+ *		Verifies the integrity of GiST indexes based on invariants.
+ *
+ * Verification checks that all paths in GiST graph contain
+ * consistent keys: tuples on parent pages consistently include tuples
+ * from children pages. Also, verification checks graph invariants:
+ * internal page must have at least one downlinks, internal page can
+ * reference either only leaf pages or only internal pages.
+ *
+ *
+ * Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  contrib/amcheck/verify_gist.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/gist_private.h"
+#include "access/tableam.h"
+#include "amcheck.h"
+#include "catalog/index.h"
+#include "catalog/pg_am.h"
+#include "common/pg_prng.h"
+#include "lib/bloomfilter.h"
+#include "utils/memutils.h"
+
+
+/*
+ * GistScanItem represents one item of depth-first scan of GiST index.
+ */
+typedef struct GistScanItem
+{
+	int			depth;
+
+	/* Referenced block number to check next */
+	BlockNumber blkno;
+
+	/*
+	 * Correctess of this parent tuple will be checked against contents of referenced page.
+	 * This tuple will be NULL for root block.
+	 */
+	IndexTuple	parenttup;
+
+	/*
+	 * LSN to hande concurrent scan of the page.
+	 * It's necessary to avoid missing some subtrees from page, that was
+	 * split just before we read it.
+	 */
+	XLogRecPtr	parentlsn;
+
+	/*
+	 * Reference to parent page for re-locking in case of found parent-child
+	 * tuple discrapencies.
+	 */
+	BlockNumber parentblk;
+
+	/* Pointer to a next stack item. */
+	struct GistScanItem *next;
+} GistScanItem;
+
+typedef struct GistCheckState
+{
+	/* Bloom filter fingerprints index tuples */
+	bloom_filter *filter;
+	/* Debug counter */
+	int64		heaptuplespresent;
+	/* GiST state */
+	GISTSTATE  *state;
+
+	Snapshot	snapshot;
+	Relation	rel;
+	Relation	heaprel;
+
+	/* progress reporting stuff */
+	BlockNumber totalblocks;
+	BlockNumber reportedblocks;
+	BlockNumber scannedblocks;
+	BlockNumber deltablocks;
+} GistCheckState;
+
+PG_FUNCTION_INFO_V1(gist_index_check);
+
+static void gist_init_heapallindexed(Relation rel, GistCheckState * result);
+static void gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+											   void *callback_state);
+static void check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo);
+static IndexTuple gist_refind_parent(Relation rel, BlockNumber parentblkno,
+									 BlockNumber childblkno,
+									 BufferAccessStrategy strategy);
+static ItemId PageGetItemIdCareful(Relation rel, BlockNumber block,
+								   Page page, OffsetNumber offset);
+static void gist_tuple_present_callback(Relation index, ItemPointer tid,
+										Datum *values, bool *isnull,
+										bool tupleIsAlive, void *checkstate);
+
+/*
+ * gist_index_check(index regclass)
+ *
+ * Verify integrity of GiST index.
+ *
+ * Acquires AccessShareLock on heap & index relations.
+ */
+Datum
+gist_index_check(PG_FUNCTION_ARGS)
+{
+	Oid			indrelid = PG_GETARG_OID(0);
+	bool		heapallindexed = PG_GETARG_BOOL(1);
+
+	amcheck_lock_relation_and_check(indrelid,
+									GIST_AM_OID,
+									gist_check_parent_keys_consistency,
+									AccessShareLock,
+									&heapallindexed);
+
+	PG_RETURN_VOID();
+}
+
+static void
+gist_init_heapallindexed(Relation rel, GistCheckState * result)
+{
+	int64		total_pages;
+	int64		total_elems;
+	uint64		seed;
+
+	/*
+	 * Size Bloom filter based on estimated number of tuples in index. This
+	 * logic is similar to B-tree, see verify_btree.c .
+	 */
+	total_pages = result->totalblocks;
+	total_elems = Max(total_pages * (MaxOffsetNumber / 5),
+					  (int64) rel->rd_rel->reltuples);
+	seed = pg_prng_uint64(&pg_global_prng_state);
+	result->filter = bloom_create(total_elems, maintenance_work_mem, seed);
+
+	result->snapshot = RegisterSnapshot(GetTransactionSnapshot());
+
+
+	/*
+	 * GetTransactionSnapshot() always acquires a new MVCC snapshot in READ
+	 * COMMITTED mode.  A new snapshot is guaranteed to have all the entries
+	 * it requires in the index.
+	 *
+	 * We must defend against the possibility that an old xact snapshot was
+	 * returned at higher isolation levels when that snapshot is not safe for
+	 * index scans of the target index.  This is possible when the snapshot
+	 * sees tuples that are before the index's indcheckxmin horizon.  Throwing
+	 * an error here should be very rare.  It doesn't seem worth using a
+	 * secondary snapshot to avoid this.
+	 */
+	if (IsolationUsesXactSnapshot() && rel->rd_index->indcheckxmin &&
+		!TransactionIdPrecedes(HeapTupleHeaderGetXmin(rel->rd_indextuple->t_data),
+							   result->snapshot->xmin))
+		ereport(ERROR,
+				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+				 errmsg("index \"%s\" cannot be verified using transaction snapshot",
+						RelationGetRelationName(rel))));
+}
+
+/*
+ * Main entry point for GiST check. Allocates memory context and scans through
+ * GiST graph. This scan is performed in a depth-first search using a stack of
+ * GistScanItem-s. Initially this stack contains only root block number. On
+ * each iteration top block numbmer is replcaed by referenced block numbers.
+ *
+ * This function verifies that tuples of internal pages cover all
+ * the key space of each tuple on leaf page.  To do this we invoke
+ * gist_check_internal_page() for every internal page.
+ *
+ * gist_check_internal_page() in it's turn takes every tuple and tries to
+ * adjust it by tuples on referenced child page.  Parent gist tuple should
+ * never require any adjustments.
+ */
+static void
+gist_check_parent_keys_consistency(Relation rel, Relation heaprel,
+								   void *callback_state)
+{
+	BufferAccessStrategy strategy = GetAccessStrategy(BAS_BULKREAD);
+	GistScanItem *stack;
+	MemoryContext mctx;
+	MemoryContext oldcontext;
+	GISTSTATE  *state;
+	int			leafdepth;
+	bool		heapallindexed = *((bool *) callback_state);
+	GistCheckState check_state;
+
+	mctx = AllocSetContextCreate(CurrentMemoryContext,
+								 "amcheck context",
+								 ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(mctx);
+
+	state = initGISTstate(rel);
+
+	check_state.state = state;
+	check_state.rel = rel;
+	check_state.heaprel = heaprel;
+
+	check_state.totalblocks = RelationGetNumberOfBlocks(rel);
+	check_state.reportedblocks = 0;
+	check_state.scannedblocks = 0;
+	/* report every 100 blocks or 5%, whichever is bigger */
+	check_state.deltablocks = Max(check_state.totalblocks / 20, 100);
+
+	if (heapallindexed)
+		gist_init_heapallindexed(rel, &check_state);
+
+	/*
+	 * We don't know the height of the tree yet, but as soon as we encounter a
+	 * leaf page, we will set 'leafdepth' to its depth.
+	 */
+	leafdepth = -1;
+
+	/* Start the scan at the root page */
+	stack = (GistScanItem *) palloc0(sizeof(GistScanItem));
+	stack->depth = 0;
+	stack->parenttup = NULL;
+	stack->parentblk = InvalidBlockNumber;
+	stack->parentlsn = InvalidXLogRecPtr;
+	stack->blkno = GIST_ROOT_BLKNO;
+
+	/*
+	 * This GiST scan is effectively "old" VACUUM version before commit
+	 * fe280694d which introduced physical order scanning.
+	 */
+
+	while (stack)
+	{
+		GistScanItem *stack_next;
+		Buffer		buffer;
+		Page		page;
+		OffsetNumber i,
+					maxoff;
+		XLogRecPtr	lsn;
+
+		CHECK_FOR_INTERRUPTS();
+
+		/* Report progress */
+		if (check_state.scannedblocks > check_state.reportedblocks +
+			check_state.deltablocks)
+		{
+			elog(DEBUG1, "verified level %u blocks of approximately %u total",
+				 check_state.scannedblocks, check_state.totalblocks);
+			check_state.reportedblocks = check_state.scannedblocks;
+		}
+		check_state.scannedblocks++;
+
+		buffer = ReadBufferExtended(rel, MAIN_FORKNUM, stack->blkno,
+									RBM_NORMAL, strategy);
+		LockBuffer(buffer, GIST_SHARE);
+		page = (Page) BufferGetPage(buffer);
+		lsn = BufferGetLSNAtomic(buffer);
+
+		/* Do basic sanity checks on the page headers */
+		check_index_page(rel, buffer, stack->blkno);
+
+		/*
+		 * It's possible that the page was split since we looked at the
+		 * parent, so that we didn't missed the downlink of the right sibling
+		 * when we scanned the parent.  If so, add the right sibling to the
+		 * stack now.
+		 */
+		if (GistFollowRight(page) || stack->parentlsn < GistPageGetNSN(page))
+		{
+			/* split page detected, install right link to the stack */
+			GistScanItem *ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+
+			ptr->depth = stack->depth;
+			ptr->parenttup = CopyIndexTuple(stack->parenttup);
+			ptr->parentblk = stack->parentblk;
+			ptr->parentlsn = stack->parentlsn;
+			ptr->blkno = GistPageGetOpaque(page)->rightlink;
+			ptr->next = stack->next;
+			stack->next = ptr;
+		}
+
+		/* Check that the tree has the same height in all branches */
+		if (GistPageIsLeaf(page))
+		{
+			if (leafdepth == -1)
+				leafdepth = stack->depth;
+			else if (stack->depth != leafdepth)
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\": internal pages traversal encountered leaf page unexpectedly on block %u",
+								RelationGetRelationName(rel), stack->blkno)));
+		}
+
+		/*
+		 * Check that each tuple looks valid, and is consistent with the
+		 * downlink we followed when we stepped on this page.
+		 */
+		maxoff = PageGetMaxOffsetNumber(page);
+		for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
+		{
+			ItemId		iid = PageGetItemIdCareful(rel, stack->blkno, page, i);
+			IndexTuple	idxtuple = (IndexTuple) PageGetItem(page, iid);
+
+			/*
+			 * Check that it's not a leftover invalid tuple from pre-9.1 See
+			 * also gistdoinsert() and gistbulkdelete() handling of such
+			 * tuples. We do consider it error here.
+			 */
+			if (GistTupleIsInvalid(idxtuple))
+				ereport(ERROR,
+						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+						 errmsg("index \"%s\" contains an inner tuple marked as invalid, block %u, offset %u",
+								RelationGetRelationName(rel), stack->blkno, i),
+						 errdetail("This is caused by an incomplete page split at crash recovery before upgrading to PostgreSQL 9.1."),
+						 errhint("Please REINDEX it.")));
+
+			if (MAXALIGN(ItemIdGetLength(iid)) != MAXALIGN(IndexTupleSize(idxtuple)))
+				ereport(ERROR,
+						(errcode(ERRCODE_INDEX_CORRUPTED),
+						 errmsg("index \"%s\" has inconsistent tuple sizes, block %u, offset %u",
+								RelationGetRelationName(rel), stack->blkno, i)));
+
+			/*
+			 * Check if this tuple is consistent with the downlink in the
+			 * parent.
+			 */
+			if (stack->parenttup &&
+				gistgetadjusted(rel, stack->parenttup, idxtuple, state))
+			{
+				/*
+				 * There was a discrepancy between parent and child tuples. We
+				 * need to verify it is not a result of concurrent call of
+				 * gistplacetopage(). So, lock parent and try to find downlink
+				 * for current page. It may be missing due to concurrent page
+				 * split, this is OK.
+				 *
+				 * Note that when we aquire parent tuple now we hold lock for
+				 * both parent and child buffers. Thus parent tuple must
+				 * include keyspace of the child.
+				 */
+				pfree(stack->parenttup);
+				stack->parenttup = gist_refind_parent(rel, stack->parentblk,
+													  stack->blkno, strategy);
+
+				/* We found it - make a final check before failing */
+				if (!stack->parenttup)
+					elog(NOTICE, "Unable to find parent tuple for block %u on block %u due to concurrent split",
+						 stack->blkno, stack->parentblk);
+				else if (gistgetadjusted(rel, stack->parenttup, idxtuple, state))
+					ereport(ERROR,
+							(errcode(ERRCODE_INDEX_CORRUPTED),
+							 errmsg("index \"%s\" has inconsistent records on page %u offset %u",
+									RelationGetRelationName(rel), stack->blkno, i)));
+				else
+				{
+					/*
+					 * But now it is properly adjusted - nothing to do here.
+					 */
+				}
+			}
+
+			if (GistPageIsLeaf(page))
+			{
+				if (heapallindexed)
+					bloom_add_element(check_state.filter,
+									  (unsigned char *) idxtuple,
+									  IndexTupleSize(idxtuple));
+			}
+			else
+			{
+				/* Internal page, so recurse to the child */
+				GistScanItem *ptr;
+
+				ptr = (GistScanItem *) palloc(sizeof(GistScanItem));
+				ptr->depth = stack->depth + 1;
+				ptr->parenttup = CopyIndexTuple(idxtuple);
+				ptr->parentblk = stack->blkno;
+				ptr->blkno = ItemPointerGetBlockNumber(&(idxtuple->t_tid));
+				ptr->parentlsn = lsn;
+				ptr->next = stack->next;
+				stack->next = ptr;
+			}
+		}
+
+		LockBuffer(buffer, GIST_UNLOCK);
+		ReleaseBuffer(buffer);
+
+		/* Step to next item in the queue */
+		stack_next = stack->next;
+		if (stack->parenttup)
+			pfree(stack->parenttup);
+		pfree(stack);
+		stack = stack_next;
+	}
+
+	if (heapallindexed)
+	{
+		IndexInfo  *indexinfo = BuildIndexInfo(rel);
+		TableScanDesc scan;
+
+		scan = table_beginscan_strat(heaprel,	/* relation */
+									 check_state.snapshot,	/* snapshot */
+									 0, /* number of keys */
+									 NULL,	/* scan key */
+									 true,	/* buffer access strategy OK */
+									 true); /* syncscan OK? */
+
+		/*
+		 * Scan will behave as the first scan of a CREATE INDEX CONCURRENTLY.
+		 */
+		indexinfo->ii_Concurrent = true;
+
+		indexinfo->ii_Unique = false;
+		indexinfo->ii_ExclusionOps = NULL;
+		indexinfo->ii_ExclusionProcs = NULL;
+		indexinfo->ii_ExclusionStrats = NULL;
+
+		elog(DEBUG1, "verifying that tuples from index \"%s\" are present in \"%s\"",
+			 RelationGetRelationName(rel),
+			 RelationGetRelationName(heaprel));
+
+		table_index_build_scan(heaprel, rel, indexinfo, true, false,
+							   gist_tuple_present_callback, (void *) &check_state, scan);
+
+		ereport(DEBUG1,
+				(errmsg_internal("finished verifying presence of " INT64_FORMAT " tuples from table \"%s\" with bitset %.2f%% set",
+								 check_state.heaptuplespresent,
+								 RelationGetRelationName(heaprel),
+								 100.0 * bloom_prop_bits_set(check_state.filter))));
+
+		UnregisterSnapshot(check_state.snapshot);
+		bloom_free(check_state.filter);
+	}
+
+	MemoryContextSwitchTo(oldcontext);
+	MemoryContextDelete(mctx);
+}
+
+static void
+gist_tuple_present_callback(Relation index, ItemPointer tid, Datum *values,
+							bool *isnull, bool tupleIsAlive, void *checkstate)
+{
+	GistCheckState *state = (GistCheckState *) checkstate;
+	IndexTuple	itup = gistFormTuple(state->state, index, values, isnull, true);
+
+	itup->t_tid = *tid;
+	/* Probe Bloom filter -- tuple should be present */
+	if (bloom_lacks_element(state->filter, (unsigned char *) itup,
+							IndexTupleSize(itup)))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("heap tuple (%u,%u) from table \"%s\" lacks matching index tuple within index \"%s\"",
+						ItemPointerGetBlockNumber(&(itup->t_tid)),
+						ItemPointerGetOffsetNumber(&(itup->t_tid)),
+						RelationGetRelationName(state->heaprel),
+						RelationGetRelationName(state->rel))));
+
+	state->heaptuplespresent++;
+
+	pfree(itup);
+}
+
+static void
+check_index_page(Relation rel, Buffer buffer, BlockNumber blockNo)
+{
+	Page		page = BufferGetPage(buffer);
+
+	gistcheckpage(rel, buffer);
+
+	if (GistPageGetOpaque(page)->gist_page_id != GIST_PAGE_ID)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" has corrupted page %d",
+						RelationGetRelationName(rel), blockNo)));
+
+	if (GistPageIsDeleted(page))
+	{
+		if (!GistPageIsLeaf(page))
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" has deleted internal page %d",
+							RelationGetRelationName(rel), blockNo)));
+		if (PageGetMaxOffsetNumber(page) > InvalidOffsetNumber)
+			ereport(ERROR,
+					(errcode(ERRCODE_INDEX_CORRUPTED),
+					 errmsg("index \"%s\" has deleted page %d with tuples",
+							RelationGetRelationName(rel), blockNo)));
+	}
+	else if (PageGetMaxOffsetNumber(page) > MaxIndexTuplesPerPage)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("index \"%s\" has page %d with exceeding count of tuples",
+						RelationGetRelationName(rel), blockNo)));
+}
+
+/*
+ * Try to re-find downlink pointing to 'blkno', in 'parentblkno'.
+ *
+ * If found, returns a palloc'd copy of the downlink tuple. Otherwise,
+ * returns NULL.
+ */
+static IndexTuple
+gist_refind_parent(Relation rel,
+				   BlockNumber parentblkno, BlockNumber childblkno,
+				   BufferAccessStrategy strategy)
+{
+	Buffer		parentbuf;
+	Page		parentpage;
+	OffsetNumber o,
+				parent_maxoff;
+	IndexTuple	result = NULL;
+
+	parentbuf = ReadBufferExtended(rel, MAIN_FORKNUM, parentblkno, RBM_NORMAL,
+								   strategy);
+
+	LockBuffer(parentbuf, GIST_SHARE);
+	parentpage = BufferGetPage(parentbuf);
+
+	if (GistPageIsLeaf(parentpage))
+	{
+		/* That's somewhat suspicious - parent page converted to leaf? */
+		/* Anyway, it's definitely not a page we were looking for */
+		UnlockReleaseBuffer(parentbuf);
+		return result;
+	}
+
+	parent_maxoff = PageGetMaxOffsetNumber(parentpage);
+	for (o = FirstOffsetNumber; o <= parent_maxoff; o = OffsetNumberNext(o))
+	{
+		ItemId		p_iid = PageGetItemIdCareful(rel, parentblkno, parentpage, o);
+		IndexTuple	itup = (IndexTuple) PageGetItem(parentpage, p_iid);
+
+		if (ItemPointerGetBlockNumber(&(itup->t_tid)) == childblkno)
+		{
+			/*
+			 * Found it! Make copy and return it while both parent and child
+			 * pages are locked. This guaranties that at this particular moment
+			 * tuples must be coherent to each other.
+			 */
+			result = CopyIndexTuple(itup);
+			break;
+		}
+	}
+
+	UnlockReleaseBuffer(parentbuf);
+
+	return result;
+}
+
+static ItemId
+PageGetItemIdCareful(Relation rel, BlockNumber block, Page page,
+					 OffsetNumber offset)
+{
+	ItemId		itemid = PageGetItemId(page, offset);
+
+	if (ItemIdGetOffset(itemid) + ItemIdGetLength(itemid) >
+		BLCKSZ - MAXALIGN(sizeof(GISTPageOpaqueData)))
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("line pointer points past end of tuple space in index \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+									block, offset, ItemIdGetOffset(itemid),
+									ItemIdGetLength(itemid),
+									ItemIdGetFlags(itemid))));
+
+	/*
+	 * Verify that line pointer isn't LP_REDIRECT or LP_UNUSED, since nbtree
+	 * and gist never uses either.  Verify that line pointer has storage, too,
+	 * since even LP_DEAD items should.
+	 */
+	if (ItemIdIsRedirected(itemid) || !ItemIdIsUsed(itemid) ||
+		ItemIdGetLength(itemid) == 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_INDEX_CORRUPTED),
+				 errmsg("invalid line pointer storage in index \"%s\"",
+						RelationGetRelationName(rel)),
+				 errdetail_internal("Index tid=(%u,%u) lp_off=%u, lp_len=%u lp_flags=%u.",
+									block, offset, ItemIdGetOffset(itemid),
+									ItemIdGetLength(itemid),
+									ItemIdGetFlags(itemid))));
+
+	return itemid;
+}
diff --git a/doc/src/sgml/amcheck.sgml b/doc/src/sgml/amcheck.sgml
index 2b9c1a9205..40de7c33f5 100644
--- a/doc/src/sgml/amcheck.sgml
+++ b/doc/src/sgml/amcheck.sgml
@@ -179,6 +179,25 @@ ORDER BY c.relpages DESC LIMIT 10;
      </para>
     </listitem>
    </varlistentry>
+
+   <varlistentry>
+    <term>
+     <function>gist_index_check(index regclass, heapallindexed boolean) returns void</function>
+     <indexterm>
+      <primary>gist_index_check</primary>
+     </indexterm>
+    </term>
+
+    <listitem>
+     <para>
+      <function>gist_index_check</function> tests that its target GiST
+      has consistent parent-child tuples relations (no parent tuples
+      require tuple adjustement) and page graph respects balanced-tree
+      invariants (internal pages reference only leaf page or only internal
+      pages).
+     </para>
+    </listitem>
+   </varlistentry>
   </variablelist>
   <tip>
    <para>
-- 
2.32.0 (Apple Git-132)

