Hi,

On 2020-10-15 01:37:35 -0700, Andres Freund wrote:
> Attached is a *prototype* implemention of this concept, which clearly is
> lacking some comment work (and is intentionally lacking some
> re-indentation).
> 
> I described my thoughts about how to limit the horizons for temp tables in
> https://www.postgresql.org/message-id/20201014203103.72oke6hqywcyhx7s%40alap3.anarazel.de

Attached is an updated version of this patch. Quite a bit of polish,
added removal of the isTopLevel arguments added a7212be8b9e that are now
unnecessary, and changed the initialization of the temp table horizons
to be latestCompletedXid + 1 instead of just latestCompletedXid when no
xid is assigned.


> Besides comments this probably mainly needs a bit more tests around temp
> table vacuuming. Should have at least an isolation test that verifies
> that temp table rows can be a) vacuumed b) pruned away in the presence
> of other sessions with xids.

I added an isolationtester test for this. It verifies that dead rows in
temp tables get vacuumed and pruned despite concurrent sessions having
older snapshots. It does so by forcing an IOS and checking the number of
heap fetches reported by EXPLAIN. I also added a companion test for
permanent relations, ensuring that such rows do not get removed.


Any comments? Otherwise I'll push that patch tomorrow.

Greetings,

Andres Freund
>From 1ba3c5eeb250d3e755ad5c044aede1816d040ae4 Mon Sep 17 00:00:00 2001
From: Andres Freund <and...@anarazel.de>
Date: Tue, 27 Oct 2020 20:39:57 -0700
Subject: [PATCH v2] Centralize horizon determination for temp tables, fix bug
 due to skew.

Also add tests to verify that temp tables can be pruned despite other
sessions having older snapshots.

Author: Andres Freund
Discussion: https://postgr.es/m/20201014203103.72oke6hqywcyh...@alap3.anarazel.de
Discussion: https://postgr.es/m/20201015083735.derdzysdtqdvx...@alap3.anarazel.de
---
 src/include/commands/cluster.h           |   3 +-
 src/include/commands/vacuum.h            |   1 -
 src/backend/access/heap/vacuumlazy.c     |   1 -
 src/backend/commands/cluster.c           |  28 +--
 src/backend/commands/vacuum.c            |  76 +++---
 src/backend/storage/ipc/procarray.c      |  59 ++++-
 src/test/isolation/expected/horizons.out | 281 +++++++++++++++++++++++
 src/test/isolation/isolation_schedule    |   1 +
 src/test/isolation/specs/horizons.spec   | 165 +++++++++++++
 9 files changed, 541 insertions(+), 74 deletions(-)
 create mode 100644 src/test/isolation/expected/horizons.out
 create mode 100644 src/test/isolation/specs/horizons.spec

diff --git a/src/include/commands/cluster.h b/src/include/commands/cluster.h
index 1eb144204b6..e05884781b9 100644
--- a/src/include/commands/cluster.h
+++ b/src/include/commands/cluster.h
@@ -19,8 +19,7 @@
 
 
 extern void cluster(ClusterStmt *stmt, bool isTopLevel);
-extern void cluster_rel(Oid tableOid, Oid indexOid, int options,
-						bool isTopLevel);
+extern void cluster_rel(Oid tableOid, Oid indexOid, int options);
 extern void check_index_is_clusterable(Relation OldHeap, Oid indexOid,
 									   bool recheck, LOCKMODE lockmode);
 extern void mark_index_clustered(Relation rel, Oid indexOid, bool is_internal);
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index d9475c99890..a4cd7214009 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -267,7 +267,6 @@ extern void vacuum_set_xid_limits(Relation rel,
 								  int freeze_min_age, int freeze_table_age,
 								  int multixact_freeze_min_age,
 								  int multixact_freeze_table_age,
-								  bool isTopLevel,
 								  TransactionId *oldestXmin,
 								  TransactionId *freezeLimit,
 								  TransactionId *xidFullScanLimit,
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 4f2f38168dc..be5439dd9d8 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -471,7 +471,6 @@ heap_vacuum_rel(Relation onerel, VacuumParams *params,
 						  params->freeze_table_age,
 						  params->multixact_freeze_min_age,
 						  params->multixact_freeze_table_age,
-						  true, /* we must be a top-level command */
 						  &OldestXmin, &FreezeLimit, &xidFullScanLimit,
 						  &MultiXactCutoff, &mxactFullScanLimit);
 
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 0d647e912c4..04d12a7ece6 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -67,13 +67,10 @@ typedef struct
 } RelToCluster;
 
 
-static void rebuild_relation(Relation OldHeap, Oid indexOid,
-							 bool isTopLevel, bool verbose);
+static void rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose);
 static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
-							bool isTopLevel, bool verbose,
-							bool *pSwapToastByContent,
-							TransactionId *pFreezeXid,
-							MultiXactId *pCutoffMulti);
+							bool verbose, bool *pSwapToastByContent,
+							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
 
 
@@ -173,7 +170,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		table_close(rel, NoLock);
 
 		/* Do the job. */
-		cluster_rel(tableOid, indexOid, stmt->options, isTopLevel);
+		cluster_rel(tableOid, indexOid, stmt->options);
 	}
 	else
 	{
@@ -222,8 +219,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 			PushActiveSnapshot(GetTransactionSnapshot());
 			/* Do the job. */
 			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						stmt->options | CLUOPT_RECHECK,
-						isTopLevel);
+						stmt->options | CLUOPT_RECHECK);
 			PopActiveSnapshot();
 			CommitTransactionCommand();
 		}
@@ -254,7 +250,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
  * and error messages should refer to the operation as VACUUM not CLUSTER.
  */
 void
-cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
+cluster_rel(Oid tableOid, Oid indexOid, int options)
 {
 	Relation	OldHeap;
 	bool		verbose = ((options & CLUOPT_VERBOSE) != 0);
@@ -404,7 +400,7 @@ cluster_rel(Oid tableOid, Oid indexOid, int options, bool isTopLevel)
 	TransferPredicateLocksToHeapRelation(OldHeap);
 
 	/* rebuild_relation does all the dirty work */
-	rebuild_relation(OldHeap, indexOid, isTopLevel, verbose);
+	rebuild_relation(OldHeap, indexOid, verbose);
 
 	/* NB: rebuild_relation does table_close() on OldHeap */
 
@@ -549,12 +545,11 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
  *
  * OldHeap: table to rebuild --- must be opened and exclusive-locked!
  * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
- * isTopLevel: should be passed down from ProcessUtility.
  *
  * NB: this routine closes OldHeap at the right time; caller should not.
  */
 static void
-rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
+rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose)
 {
 	Oid			tableOid = RelationGetRelid(OldHeap);
 	Oid			tableSpace = OldHeap->rd_rel->reltablespace;
@@ -582,7 +577,7 @@ rebuild_relation(Relation OldHeap, Oid indexOid, bool isTopLevel, bool verbose)
 							   AccessExclusiveLock);
 
 	/* Copy the heap data into the new table in the desired order */
-	copy_table_data(OIDNewHeap, tableOid, indexOid, isTopLevel, verbose,
+	copy_table_data(OIDNewHeap, tableOid, indexOid, verbose,
 					&swap_toast_by_content, &frozenXid, &cutoffMulti);
 
 	/*
@@ -733,8 +728,7 @@ make_new_heap(Oid OIDOldHeap, Oid NewTableSpace, char relpersistence,
  * *pCutoffMulti receives the MultiXactId used as a cutoff point.
  */
 static void
-copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
-				bool isTopLevel, bool verbose,
+copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
 				bool *pSwapToastByContent, TransactionId *pFreezeXid,
 				MultiXactId *pCutoffMulti)
 {
@@ -832,7 +826,7 @@ copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 	 * Since we're going to rewrite the whole table anyway, there's no reason
 	 * not to be aggressive about this.
 	 */
-	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0, isTopLevel,
+	vacuum_set_xid_limits(OldHeap, 0, 0, 0, 0,
 						  &OldestXmin, &FreezeXid, NULL, &MultiXactCutoff,
 						  NULL);
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index ddeec870d81..1b6717f727e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -907,8 +907,7 @@ get_all_vacuum_rels(int options)
 /*
  * vacuum_set_xid_limits() -- compute oldestXmin and freeze cutoff points
  *
- * Input parameters are the target relation, applicable freeze age settings,
- * and isTopLevel which should be passed down from ProcessUtility.
+ * Input parameters are the target relation, applicable freeze age settings.
  *
  * The output parameters are:
  * - oldestXmin is the cutoff value used to distinguish whether tuples are
@@ -934,7 +933,6 @@ vacuum_set_xid_limits(Relation rel,
 					  int freeze_table_age,
 					  int multixact_freeze_min_age,
 					  int multixact_freeze_table_age,
-					  bool isTopLevel,
 					  TransactionId *oldestXmin,
 					  TransactionId *freezeLimit,
 					  TransactionId *xidFullScanLimit,
@@ -950,53 +948,33 @@ vacuum_set_xid_limits(Relation rel,
 	MultiXactId mxactLimit;
 	MultiXactId safeMxactLimit;
 
-	if (RELATION_IS_LOCAL(rel) && !IsInTransactionBlock(isTopLevel))
-	{
-		/*
-		 * If we are processing a temp relation (which by prior checks must be
-		 * one belonging to our session), and we are not inside any
-		 * transaction block, then there can be no tuples in the rel that are
-		 * still in-doubt, nor can there be any that are dead but possibly
-		 * still interesting to some snapshot our session holds.  We don't
-		 * need to care whether other sessions could see such tuples, either.
-		 * So we can aggressively set the cutoff xmin to be the nextXid.
-		 */
-		*oldestXmin = ReadNewTransactionId();
-	}
-	else
-	{
-		/*
-		 * Otherwise, calculate the cutoff xmin normally.
-		 *
-		 * We can always ignore processes running lazy vacuum.  This is
-		 * because we use these values only for deciding which tuples we must
-		 * keep in the tables.  Since lazy vacuum doesn't write its XID
-		 * anywhere (usually no XID assigned), it's safe to ignore it.  In
-		 * theory it could be problematic to ignore lazy vacuums in a full
-		 * vacuum, but keep in mind that only one vacuum process can be
-		 * working on a particular table at any time, and that each vacuum is
-		 * always an independent transaction.
-		 */
-		*oldestXmin = GetOldestNonRemovableTransactionId(rel);
+	/*
+	 * We can always ignore processes running lazy vacuum.  This is because we
+	 * use these values only for deciding which tuples we must keep in the
+	 * tables.  Since lazy vacuum doesn't write its XID anywhere (usually no
+	 * XID assigned), it's safe to ignore it.  In theory it could be
+	 * problematic to ignore lazy vacuums in a full vacuum, but keep in mind
+	 * that only one vacuum process can be working on a particular table at
+	 * any time, and that each vacuum is always an independent transaction.
+	 */
+	*oldestXmin = GetOldestNonRemovableTransactionId(rel);
 
-		if (OldSnapshotThresholdActive())
+	if (OldSnapshotThresholdActive())
+	{
+		TransactionId limit_xmin;
+		TimestampTz limit_ts;
+
+		if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
+												&limit_xmin, &limit_ts))
 		{
-			TransactionId limit_xmin;
-			TimestampTz limit_ts;
-
-			if (TransactionIdLimitedForOldSnapshots(*oldestXmin, rel,
-													&limit_xmin, &limit_ts))
-			{
-				/*
-				 * TODO: We should only set the threshold if we are pruning on
-				 * the basis of the increased limits.  Not as crucial here as
-				 * it is for opportunistic pruning (which often happens at a
-				 * much higher frequency), but would still be a significant
-				 * improvement.
-				 */
-				SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
-				*oldestXmin = limit_xmin;
-			}
+			/*
+			 * TODO: We should only set the threshold if we are pruning on the
+			 * basis of the increased limits.  Not as crucial here as it is
+			 * for opportunistic pruning (which often happens at a much higher
+			 * frequency), but would still be a significant improvement.
+			 */
+			SetOldSnapshotThresholdTimestamp(limit_ts, limit_xmin);
+			*oldestXmin = limit_xmin;
 		}
 	}
 
@@ -1930,7 +1908,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 			cluster_options |= CLUOPT_VERBOSE;
 
 		/* VACUUM FULL is now a variant of CLUSTER; see cluster.c */
-		cluster_rel(relid, InvalidOid, cluster_options, true);
+		cluster_rel(relid, InvalidOid, cluster_options);
 	}
 	else
 		table_relation_vacuum(onerel, params, vac_strategy);
diff --git a/src/backend/storage/ipc/procarray.c b/src/backend/storage/ipc/procarray.c
index 07c5eeb7495..ee4caa51152 100644
--- a/src/backend/storage/ipc/procarray.c
+++ b/src/backend/storage/ipc/procarray.c
@@ -131,7 +131,7 @@ typedef struct ProcArrayStruct
  * different types of relations. As e.g. a normal user defined table in one
  * database is inaccessible to backends connected to another database, a test
  * specific to a relation can be more aggressive than a test for a shared
- * relation.  Currently we track three different states:
+ * relation.  Currently we track four different states:
  *
  * 1) GlobalVisSharedRels, which only considers an XID's
  *    effects visible-to-everyone if neither snapshots in any database, nor a
@@ -153,6 +153,9 @@ typedef struct ProcArrayStruct
  *    I.e. the difference to GlobalVisCatalogRels is that
  *    replication slot's catalog_xmin is not taken into account.
  *
+ * 4) GlobalVisTempRels, which only considers the current session, as temp
+ *    tables are not visible to other sessions.
+ *
  * GlobalVisTestFor(relation) returns the appropriate state
  * for the relation.
  *
@@ -234,6 +237,13 @@ typedef struct ComputeXidHorizonsResult
 	 * defined tables.
 	 */
 	TransactionId data_oldest_nonremovable;
+
+	/*
+	 * Oldest xid for which deleted tuples need to be retained in this
+	 * session's temporary tables.
+	 */
+	TransactionId temp_oldest_nonremovable;
+
 } ComputeXidHorizonsResult;
 
 
@@ -257,12 +267,13 @@ static TransactionId standbySnapshotPendingXmin;
 
 /*
  * State for visibility checks on different types of relations. See struct
- * GlobalVisState for details. As shared, catalog, and user defined
+ * GlobalVisState for details. As shared, catalog, normal and temporary
  * relations can have different horizons, one such state exists for each.
  */
 static GlobalVisState GlobalVisSharedRels;
 static GlobalVisState GlobalVisCatalogRels;
 static GlobalVisState GlobalVisDataRels;
+static GlobalVisState GlobalVisTempRels;
 
 /*
  * This backend's RecentXmin at the last time the accurate xmin horizon was
@@ -1668,6 +1679,23 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 		h->oldest_considered_running = initial;
 		h->shared_oldest_nonremovable = initial;
 		h->data_oldest_nonremovable = initial;
+
+		/*
+		 * Only modifications made by this backend affect the horizon for
+		 * temporary relations. Instead of a check in each iteration of the
+		 * loop over all PGPROCs it is cheaper to just initialize to the
+		 * current top-level xid any.
+		 *
+		 * Without an assigned xid we could use a horizon as agressive as
+		 * ReadNewTransactionid(), but we can get away with the much cheaper
+		 * latestCompletedXid + 1: If this backend has no xid there, by
+		 * definition, can't be any newer changes in the temp table than
+		 * latestCompletedXid.
+		 */
+		if (TransactionIdIsValid(MyProc->xid))
+			h->temp_oldest_nonremovable = MyProc->xid;
+		else
+			h->temp_oldest_nonremovable = initial;
 	}
 
 	/*
@@ -1760,6 +1788,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 			TransactionIdOlder(h->shared_oldest_nonremovable, kaxmin);
 		h->data_oldest_nonremovable =
 			TransactionIdOlder(h->data_oldest_nonremovable, kaxmin);
+		/* temp relations cannot be accessed in recovery */
 	}
 	else
 	{
@@ -1785,6 +1814,7 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 		h->data_oldest_nonremovable =
 			TransactionIdRetreatedBy(h->data_oldest_nonremovable,
 									 vacuum_defer_cleanup_age);
+		/* defer doesn't apply to temp relations */
 	}
 
 	/*
@@ -1844,6 +1874,8 @@ ComputeXidHorizons(ComputeXidHorizonsResult *h)
 										 h->catalog_oldest_nonremovable));
 	Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
 										 h->data_oldest_nonremovable));
+	Assert(TransactionIdPrecedesOrEquals(h->oldest_considered_running,
+										 h->temp_oldest_nonremovable));
 	Assert(!TransactionIdIsValid(h->slot_xmin) ||
 		   TransactionIdPrecedesOrEquals(h->oldest_considered_running,
 										 h->slot_xmin));
@@ -1878,6 +1910,8 @@ GetOldestNonRemovableTransactionId(Relation rel)
 		return horizons.shared_oldest_nonremovable;
 	else if (RelationIsAccessibleInLogicalDecoding(rel))
 		return horizons.catalog_oldest_nonremovable;
+	else if (RELATION_IS_LOCAL(rel))
+		return horizons.temp_oldest_nonremovable;
 	else
 		return horizons.data_oldest_nonremovable;
 }
@@ -2054,8 +2088,8 @@ GetSnapshotDataReuse(Snapshot snapshot)
  *		RecentXmin: the xmin computed for the most recent snapshot.  XIDs
  *			older than this are known not running any more.
  *
- * And try to advance the bounds of GlobalVisSharedRels, GlobalVisCatalogRels,
- * GlobalVisDataRels for the benefit of theGlobalVisTest* family of functions.
+ * And try to advance the bounds of GlobalVis{Shared,Catalog,Data,Temp}Rels
+ * for the benefit of theGlobalVisTest* family of functions.
  *
  * Note: this function should probably not be called with an argument that's
  * not statically allocated (see xip allocation below).
@@ -2357,6 +2391,15 @@ GetSnapshotData(Snapshot snapshot)
 		GlobalVisDataRels.definitely_needed =
 			FullTransactionIdNewer(def_vis_fxid_data,
 								   GlobalVisDataRels.definitely_needed);
+		/* See temp_oldest_nonremovable computation in ComputeXidHorizons() */
+		if (TransactionIdIsNormal(myxid))
+			GlobalVisTempRels.definitely_needed =
+				FullXidRelativeTo(latest_completed, myxid);
+		else
+		{
+			GlobalVisTempRels.definitely_needed = latest_completed;
+			FullTransactionIdAdvance(&GlobalVisTempRels.definitely_needed);
+		}
 
 		/*
 		 * Check if we know that we can initialize or increase the lower
@@ -2375,6 +2418,8 @@ GetSnapshotData(Snapshot snapshot)
 		GlobalVisDataRels.maybe_needed =
 			FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
 								   oldestfxid);
+		/* accurate value known */
+		GlobalVisTempRels.maybe_needed = GlobalVisTempRels.definitely_needed;
 	}
 
 	RecentXmin = xmin;
@@ -3892,6 +3937,8 @@ GlobalVisTestFor(Relation rel)
 		state = &GlobalVisSharedRels;
 	else if (need_catalog)
 		state = &GlobalVisCatalogRels;
+	else if (RELATION_IS_LOCAL(rel))
+		state = &GlobalVisTempRels;
 	else
 		state = &GlobalVisDataRels;
 
@@ -3942,6 +3989,9 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
 	GlobalVisDataRels.maybe_needed =
 		FullXidRelativeTo(horizons->latest_completed,
 						  horizons->data_oldest_nonremovable);
+	GlobalVisTempRels.maybe_needed =
+		FullXidRelativeTo(horizons->latest_completed,
+						  horizons->temp_oldest_nonremovable);
 
 	/*
 	 * In longer running transactions it's possible that transactions we
@@ -3957,6 +4007,7 @@ GlobalVisUpdateApply(ComputeXidHorizonsResult *horizons)
 	GlobalVisDataRels.definitely_needed =
 		FullTransactionIdNewer(GlobalVisDataRels.maybe_needed,
 							   GlobalVisDataRels.definitely_needed);
+	GlobalVisTempRels.definitely_needed = GlobalVisTempRels.maybe_needed;
 
 	ComputeXidHorizonsResultLastXmin = RecentXmin;
 }
diff --git a/src/test/isolation/expected/horizons.out b/src/test/isolation/expected/horizons.out
new file mode 100644
index 00000000000..07bbc9832cd
--- /dev/null
+++ b/src/test/isolation/expected/horizons.out
@@ -0,0 +1,281 @@
+Parsed test spec with 2 sessions
+
+starting permutation: pruner_create_perm ll_start pruner_query_plan pruner_query pruner_query pruner_delete pruner_query pruner_query ll_commit pruner_drop
+step pruner_create_perm: 
+    CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+
+step ll_start: 
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+
+?column?       
+
+1              
+step pruner_query_plan: 
+    EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data;
+
+QUERY PLAN     
+
+Index Only Scan using horizons_tst_data_key on horizons_tst
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_delete: 
+    DELETE FROM horizons_tst;
+
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step ll_commit: COMMIT;
+step pruner_drop: 
+    DROP TABLE horizons_tst;
+
+
+starting permutation: pruner_create_temp ll_start pruner_query_plan pruner_query pruner_query pruner_delete pruner_query pruner_query ll_commit pruner_drop
+step pruner_create_temp: 
+    CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+
+step ll_start: 
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+
+?column?       
+
+1              
+step pruner_query_plan: 
+    EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data;
+
+QUERY PLAN     
+
+Index Only Scan using horizons_tst_data_key on horizons_tst
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_delete: 
+    DELETE FROM horizons_tst;
+
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+0              
+step ll_commit: COMMIT;
+step pruner_drop: 
+    DROP TABLE horizons_tst;
+
+
+starting permutation: pruner_create_temp ll_start pruner_query pruner_query pruner_begin pruner_delete pruner_query pruner_query ll_commit pruner_commit pruner_drop
+step pruner_create_temp: 
+    CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+
+step ll_start: 
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+
+?column?       
+
+1              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_begin: BEGIN;
+step pruner_delete: 
+    DELETE FROM horizons_tst;
+
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step ll_commit: COMMIT;
+step pruner_commit: COMMIT;
+step pruner_drop: 
+    DROP TABLE horizons_tst;
+
+
+starting permutation: pruner_create_perm ll_start pruner_query pruner_query pruner_delete pruner_vacuum pruner_query pruner_query ll_commit pruner_drop
+step pruner_create_perm: 
+    CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+
+step ll_start: 
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+
+?column?       
+
+1              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_delete: 
+    DELETE FROM horizons_tst;
+
+step pruner_vacuum: 
+    VACUUM horizons_tst;
+
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step ll_commit: COMMIT;
+step pruner_drop: 
+    DROP TABLE horizons_tst;
+
+
+starting permutation: pruner_create_temp ll_start pruner_query pruner_query pruner_delete pruner_vacuum pruner_query pruner_query ll_commit pruner_drop
+step pruner_create_temp: 
+    CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+
+step ll_start: 
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+
+?column?       
+
+1              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+2              
+step pruner_delete: 
+    DELETE FROM horizons_tst;
+
+step pruner_vacuum: 
+    VACUUM horizons_tst;
+
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+0              
+step pruner_query: 
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+
+?column?       
+
+0              
+step ll_commit: COMMIT;
+step pruner_drop: 
+    DROP TABLE horizons_tst;
+
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index aa386ab1a25..f2e752c4454 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -78,6 +78,7 @@ test: timeouts
 test: vacuum-concurrent-drop
 test: vacuum-conflict
 test: vacuum-skip-locked
+test: horizons
 test: predicate-hash
 test: predicate-gist
 test: predicate-gin
diff --git a/src/test/isolation/specs/horizons.spec b/src/test/isolation/specs/horizons.spec
new file mode 100644
index 00000000000..19cc0a086ae
--- /dev/null
+++ b/src/test/isolation/specs/horizons.spec
@@ -0,0 +1,165 @@
+# Test that pruning and vacuuming pay attention to concurrent sessions
+# in the right way. For normal relations that means that rows cannot
+# be pruned away if there's an older snapshot, in contrast to that
+# temporary tables should nearly always be prunable.
+
+setup {
+    CREATE OR REPLACE FUNCTION explain_json(p_query text)
+    RETURNS json
+    LANGUAGE plpgsql AS $$
+        DECLARE
+            v_ret json;
+        BEGIN
+            EXECUTE p_query INTO STRICT v_ret;
+            RETURN v_ret;
+        END;$$;
+}
+
+teardown {
+    DROP FUNCTION explain_json(text);
+}
+
+session "lifeline"
+
+# Start a transaction, force a snapshot to be held
+step "ll_start"
+{
+    BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+    SELECT 1;
+}
+
+step "ll_commit" { COMMIT; }
+
+
+session "pruner"
+
+setup
+{
+    SET enable_seqscan = false;
+    SET enable_indexscan = false;
+    SET enable_bitmapscan = false;
+}
+
+step "pruner_create_temp"
+{
+    CREATE TEMPORARY TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+}
+
+step "pruner_create_perm"
+{
+    CREATE TABLE horizons_tst (data int unique) WITH (autovacuum_enabled = off);
+    INSERT INTO horizons_tst(data) VALUES(1),(2);
+}
+
+# Temp tables cannot be dropped in the teardown, so just always do so
+# as part of the permutation
+step "pruner_drop"
+{
+    DROP TABLE horizons_tst;
+}
+
+step "pruner_delete"
+{
+    DELETE FROM horizons_tst;
+}
+
+step "pruner_begin" { BEGIN; }
+step "pruner_commit" { COMMIT; }
+
+step "pruner_vacuum"
+{
+    VACUUM horizons_tst;
+}
+
+# Show the heap fetches of an ordered index-only-scan (other plans
+# have been forbidden above) - that tells us how many non-killed leaf
+# entries there are.
+step "pruner_query"
+{
+    SELECT explain_json($$
+        EXPLAIN (FORMAT json, BUFFERS, ANALYZE)
+	  SELECT * FROM horizons_tst ORDER BY data;$$)->0->'Plan'->'Heap Fetches';
+}
+
+# Verify that the query plan still is an IOS
+step "pruner_query_plan"
+{
+    EXPLAIN (COSTS OFF) SELECT * FROM horizons_tst ORDER BY data;
+}
+
+
+# Show that with a permanent relation deleted rows cannot be pruned
+# away if there's a concurrent session still seeing the rows.
+permutation
+    "pruner_create_perm"
+    "ll_start"
+    "pruner_query_plan"
+    # Run query that could do pruning twice, first has chance to prune,
+    # second would not perform heap fetches if first query did.
+    "pruner_query"
+    "pruner_query"
+    "pruner_delete"
+    "pruner_query"
+    "pruner_query"
+    "ll_commit"
+    "pruner_drop"
+
+# Show that with a temporary relation deleted rows can be pruned away,
+# even if there's a concurrent session with a snapshot from before the
+# deletion. That's safe because the session with the older snapshot
+# cannot access the temporary table.
+permutation
+    "pruner_create_temp"
+    "ll_start"
+    "pruner_query_plan"
+    "pruner_query"
+    "pruner_query"
+    "pruner_delete"
+    "pruner_query"
+    "pruner_query"
+    "ll_commit"
+    "pruner_drop"
+
+# Verify that pruning in temporary relations doesn't remove rows still
+# visible in the current session
+permutation
+    "pruner_create_temp"
+    "ll_start"
+    "pruner_query"
+    "pruner_query"
+    "pruner_begin"
+    "pruner_delete"
+    "pruner_query"
+    "pruner_query"
+    "ll_commit"
+    "pruner_commit"
+    "pruner_drop"
+
+# Show that vacuum cannot remove deleted rows still visible to another
+# session's snapshot, when accessing a permanent table.
+permutation
+    "pruner_create_perm"
+    "ll_start"
+    "pruner_query"
+    "pruner_query"
+    "pruner_delete"
+    "pruner_vacuum"
+    "pruner_query"
+    "pruner_query"
+    "ll_commit"
+    "pruner_drop"
+
+# Show that vacuum can remove deleted rows still visible to another
+# session's snapshot, when accessing a temporary table.
+permutation
+    "pruner_create_temp"
+    "ll_start"
+    "pruner_query"
+    "pruner_query"
+    "pruner_delete"
+    "pruner_vacuum"
+    "pruner_query"
+    "pruner_query"
+    "ll_commit"
+    "pruner_drop"
-- 
2.28.0.651.g306ee63a70

Reply via email to