On Sat, Feb 11, 2012 at 4:46 AM, Noah Misch <n...@leadboat.com> wrote:

>> But I have to admit I'm intrigued by the idea of extending this to
>> other cases, if there's a reasonable way to do that.  For example, if
>> we could fix things up so that we don't see a table at all if it was
>> created after we took our snapshot, that would remove one of the
>> obstacles to marking pages bulk-loaded into that table with FrozenXID
>> and PD_ALL_VISIBLE from the get-go.  I think a lot of people would be
>> mighty happy about that.
>
> Exactly.
>
>> But the necessary semantics seem somewhat different.  For TRUNCATE,
>> you want to throw a serialization error; but is that also what you
>> want for CREATE TABLE?  Or do you just want it to appear empty?
>
> I think an error helps just as much there.  If you create a table and populate
> it with data in the same transaction, letting some snapshot see an empty table
> is an atomicity failure.
>
> Your comment illustrates a helpful point: this is just another kind of
> ordinary serialization failure, one that can happen at any isolation level.
> No serial transaction sequence can yield one of these errors.

Thanks Noah for drawing attention to this thread. I hadn't been
watching. As you say, this work would allow me to freeze rows at load
time and avoid the overhead of hint bit setting, which avoids
performance issues from hint bit setting in checksum patch.

I've reviewed this patch and it seems OK to me. Good work Marti.

v2 patch attached, updated to latest HEAD. Patch adds
* a GUC called strict_mvcc to isolate the new behaviour if required
* relvalidxid is reset by VACUUM/ANALYZE to avoid wraparound failure

At present this lacks docs for strict_mvcc and doesn't attempt to
handle CREATE TABLE case yet, but should be straightforward to do so.

Hint bit setting is in separate patch on other thread.

-- 
 Simon Riggs                   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training & Services
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index c910863..4387f49 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -73,7 +73,7 @@
 
 /* GUC variable */
 bool		synchronize_seqscans = true;
-
+bool		StrictMVCC = true;
 
 static HeapScanDesc heap_beginscan_internal(Relation relation,
 						Snapshot snapshot,
@@ -1175,6 +1175,24 @@ heap_beginscan_internal(Relation relation, Snapshot snapshot,
 	HeapScanDesc scan;
 
 	/*
+	 * If the snapshot is older than relvalidxmin then that either a table
+	 * has only recently been created or that a TRUNCATE has removed data
+	 * that we should still be able to see. Either way, we aren't allowed
+	 * to view the rows in StrictMVCC mode.
+	 */
+	if (StrictMVCC &&
+		TransactionIdIsValid(relation->rd_rel->relvalidxmin) &&
+		TransactionIdIsValid(snapshot->xmax) &&
+		NormalTransactionIdPrecedes(snapshot->xmax, relation->rd_rel->relvalidxmin))
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
+				 errmsg("canceling statement due to conflict with TRUNCATE TABLE on %s",
+						 NameStr(relation->rd_rel->relname)),
+				 errdetail("User query attempting to see row versions that have been removed.")));
+	}
+
+	/*
 	 * increment relation ref count while scanning relation
 	 *
 	 * This is just to make really sure the relcache entry won't go away while
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index aef410a..3f9bd5d 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -787,6 +787,7 @@ InsertPgClassTuple(Relation pg_class_desc,
 	values[Anum_pg_class_relhastriggers - 1] = BoolGetDatum(rd_rel->relhastriggers);
 	values[Anum_pg_class_relhassubclass - 1] = BoolGetDatum(rd_rel->relhassubclass);
 	values[Anum_pg_class_relfrozenxid - 1] = TransactionIdGetDatum(rd_rel->relfrozenxid);
+	values[Anum_pg_class_relvalidxmin - 1] = TransactionIdGetDatum(rd_rel->relvalidxmin);
 	if (relacl != (Datum) 0)
 		values[Anum_pg_class_relacl - 1] = relacl;
 	else
@@ -884,6 +885,7 @@ AddNewRelationTuple(Relation pg_class_desc,
 		new_rel_reltup->relfrozenxid = InvalidTransactionId;
 	}
 
+	new_rel_reltup->relvalidxmin = InvalidTransactionId;
 	new_rel_reltup->relowner = relowner;
 	new_rel_reltup->reltype = new_type_oid;
 	new_rel_reltup->reloftype = reloftype;
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index bfbe642..0d96a6a 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2854,7 +2854,7 @@ reindex_index(Oid indexId, bool skip_constraint_checks)
 		}
 
 		/* We'll build a new physical relation for the index */
-		RelationSetNewRelfilenode(iRel, InvalidTransactionId);
+		RelationSetNewRelfilenode(iRel, InvalidTransactionId, InvalidTransactionId);
 
 		/* Initialize the index and rebuild */
 		/* Note: we do not need to re-establish pkey setting */
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index b40e57b..0578241 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -82,6 +82,8 @@ static int	elevel = -1;
 
 static MemoryContext anl_context = NULL;
 
+static TransactionId OldestXmin;
+
 static BufferAccessStrategy vac_strategy;
 
 
@@ -538,7 +540,8 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
 							totalrows,
 							visibilitymap_count(onerel),
 							hasindex,
-							InvalidTransactionId);
+							InvalidTransactionId,
+							OldestXmin);
 
 	/*
 	 * Same for indexes. Vacuum always scans all indexes, so if we're part of
@@ -558,6 +561,7 @@ do_analyze_rel(Relation onerel, VacuumStmt *vacstmt, bool inh)
 								totalindexrows,
 								0,
 								false,
+								InvalidTransactionId,
 								InvalidTransactionId);
 		}
 	}
@@ -1025,7 +1029,6 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
 	double		deadrows = 0;	/* # dead rows seen */
 	double		rowstoskip = -1;	/* -1 means not set yet */
 	BlockNumber totalblocks;
-	TransactionId OldestXmin;
 	BlockSamplerData bs;
 	double		rstate;
 
diff --git a/src/backend/commands/sequence.c b/src/backend/commands/sequence.c
index 1f95abc..ab4aec2 100644
--- a/src/backend/commands/sequence.c
+++ b/src/backend/commands/sequence.c
@@ -287,7 +287,7 @@ ResetSequence(Oid seq_relid)
 	 * Create a new storage file for the sequence.	We want to keep the
 	 * sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs.
 	 */
-	RelationSetNewRelfilenode(seq_rel, InvalidTransactionId);
+	RelationSetNewRelfilenode(seq_rel, InvalidTransactionId, InvalidTransactionId);
 
 	/*
 	 * Insert the modified tuple into the new storage file.
diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c
index cd4490a..aa9d2e9 100644
--- a/src/backend/commands/tablecmds.c
+++ b/src/backend/commands/tablecmds.c
@@ -19,6 +19,7 @@
 #include "access/reloptions.h"
 #include "access/relscan.h"
 #include "access/sysattr.h"
+#include "access/transam.h"
 #include "access/xact.h"
 #include "catalog/catalog.h"
 #include "catalog/dependency.h"
@@ -1121,7 +1122,7 @@ ExecuteTruncate(TruncateStmt *stmt)
 			 * as the relfilenode value. The old storage file is scheduled for
 			 * deletion at commit.
 			 */
-			RelationSetNewRelfilenode(rel, RecentXmin);
+			RelationSetNewRelfilenode(rel, RecentXmin, GetCurrentTransactionId());
 
 			heap_relid = RelationGetRelid(rel);
 			toast_relid = rel->rd_rel->reltoastrelid;
@@ -1132,7 +1133,12 @@ ExecuteTruncate(TruncateStmt *stmt)
 			if (OidIsValid(toast_relid))
 			{
 				rel = relation_open(toast_relid, AccessExclusiveLock);
-				RelationSetNewRelfilenode(rel, RecentXmin);
+				RelationSetNewRelfilenode(rel, RecentXmin, InvalidTransactionId);
+
+				/*
+				 * We don't need a cmin here because there can't be any open
+				 * cursors in the same session as the TRUNCATE command.
+				 */
 				heap_close(rel, NoLock);
 			}
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 353af50..7609e59 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -573,7 +573,8 @@ void
 vac_update_relstats(Relation relation,
 					BlockNumber num_pages, double num_tuples,
 					BlockNumber num_all_visible_pages,
-					bool hasindex, TransactionId frozenxid)
+					bool hasindex, TransactionId frozenxid,
+					TransactionId oldestxmin)
 {
 	Oid			relid = RelationGetRelid(relation);
 	Relation	rd;
@@ -647,6 +648,17 @@ vac_update_relstats(Relation relation,
 		dirty = true;
 	}
 
+	/*
+	 * Reset relvalidxmin if its old enough
+	 */
+	if (TransactionIdIsValid(oldestxmin) &&
+		TransactionIdIsNormal(pgcform->relvalidxmin) &&
+		TransactionIdPrecedes(pgcform->relvalidxmin, oldestxmin))
+	{
+		pgcform->relvalidxmin = InvalidTransactionId;
+		dirty = true;
+	}
+
 	/* If anything changed, write out the tuple. */
 	if (dirty)
 		heap_inplace_update(rd, ctup);
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index 2fc749e..46d6ab8 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -255,7 +255,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
 						new_rel_tuples,
 						new_rel_allvisible,
 						vacrelstats->hasindex,
-						new_frozen_xid);
+						new_frozen_xid,
+						OldestXmin);
 
 	/* report results to the stats collector, too */
 	pgstat_report_vacuum(RelationGetRelid(onerel),
@@ -1185,6 +1186,7 @@ lazy_cleanup_index(Relation indrel,
 							stats->num_index_tuples,
 							0,
 							false,
+							InvalidTransactionId,
 							InvalidTransactionId);
 
 	ereport(elevel,
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index a59950e..17303e8 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -2605,7 +2605,8 @@ RelationBuildLocalRelation(const char *relname,
  * the XIDs that will be put into the new relation contents.
  */
 void
-RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
+RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid,
+						  TransactionId validxmin)
 {
 	Oid			newrelfilenode;
 	RelFileNodeBackend newrnode;
@@ -2674,6 +2675,10 @@ RelationSetNewRelfilenode(Relation relation, TransactionId freezeXid)
 	}
 	classform->relfrozenxid = freezeXid;
 
+	if (validxmin != InvalidTransactionId &&
+		TransactionIdPrecedes(classform->relvalidxmin, validxmin))
+		classform->relvalidxmin = validxmin;
+
 	simple_heap_update(pg_class, &tuple->t_self, tuple);
 	CatalogUpdateIndexes(pg_class, tuple);
 
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 486bdcd..1561889 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1430,6 +1430,19 @@ static struct config_bool ConfigureNamesBool[] =
 	},
 
 	{
+		{"strict_mvcc", PGC_POSTMASTER, COMPAT_OPTIONS_PREVIOUS,
+			gettext_noop("Enables backward compatibility mode of strict MVCC "
+						 "for TRUNCATE and CREATE TABLE."),
+			gettext_noop("When enabled viewing a truncated or newly created table "
+						 "will throw a serialization error to prevent MVCC "
+						 "discrepancies that were possible priot to 9.2.")
+		},
+		&StrictMVCC,
+		true,
+		NULL, NULL, NULL
+	},
+
+	{
 		{"quote_all_identifiers", PGC_USERSET, COMPAT_OPTIONS_PREVIOUS,
 			gettext_noop("When generating SQL fragments, quote all identifiers."),
 			NULL,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 96da086..f56e374 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -548,6 +548,7 @@
 #quote_all_identifiers = off
 #sql_inheritance = on
 #standard_conforming_strings = on
+#strict_mvcc = on
 #synchronize_seqscans = on
 
 # - Other Platforms and Clients -
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 0335347..223f157 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	201203021
+#define CATALOG_VERSION_NO	201203031
 
 #endif
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index 1567206..d6ee70c 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -67,6 +67,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 	bool		relhastriggers; /* has (or has had) any TRIGGERs */
 	bool		relhassubclass; /* has (or has had) derived classes */
 	TransactionId relfrozenxid; /* all Xids < this are frozen in this rel */
+	TransactionId relvalidxmin;	/* data is only valid for snapshots > this Xid */
 
 #ifdef CATALOG_VARLEN			/* variable-length fields start here */
 	/* NOTE: These fields are not present in a relcache entry's rd_rel field. */
@@ -77,7 +78,7 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 
 /* Size of fixed part of pg_class tuples, not counting var-length fields */
 #define CLASS_TUPLE_SIZE \
-	 (offsetof(FormData_pg_class,relfrozenxid) + sizeof(TransactionId))
+	 (offsetof(FormData_pg_class,relvalidxmin) + sizeof(TransactionId))
 
 /* ----------------
  *		Form_pg_class corresponds to a pointer to a tuple with
@@ -91,7 +92,7 @@ typedef FormData_pg_class *Form_pg_class;
  * ----------------
  */
 
-#define Natts_pg_class					27
+#define Natts_pg_class					28
 #define Anum_pg_class_relname			1
 #define Anum_pg_class_relnamespace		2
 #define Anum_pg_class_reltype			3
@@ -117,8 +118,9 @@ typedef FormData_pg_class *Form_pg_class;
 #define Anum_pg_class_relhastriggers	23
 #define Anum_pg_class_relhassubclass	24
 #define Anum_pg_class_relfrozenxid		25
-#define Anum_pg_class_relacl			26
-#define Anum_pg_class_reloptions		27
+#define Anum_pg_class_relvalidxmin		26
+#define Anum_pg_class_relacl			27
+#define Anum_pg_class_reloptions		28
 
 /* ----------------
  *		initial contents of pg_class
@@ -130,13 +132,13 @@ typedef FormData_pg_class *Form_pg_class;
  */
 
 /* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId */
-DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f 3 0 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f 3 0 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 0 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 27 0 t f f f f 3 _null_ _null_ ));
+DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f 3 0 _null_ _null_ ));
 DESCR("");
 
 
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 4526648..fa79231 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -151,7 +151,8 @@ extern void vac_update_relstats(Relation relation,
 					double num_tuples,
 					BlockNumber num_all_visible_pages,
 					bool hasindex,
-					TransactionId frozenxid);
+					TransactionId frozenxid,
+					TransactionId oldestxmin);
 extern void vacuum_set_xid_limits(int freeze_min_age, int freeze_table_age,
 					  bool sharedRel,
 					  TransactionId *oldestXmin,
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 610cb59..9d2be31 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -342,6 +342,8 @@ extern ProcessingMode Mode;
 
 #define GetProcessingMode() Mode
 
+/* in access/heap/heapam.c */
+extern bool	StrictMVCC;
 
 /*****************************************************************************
  *	  pinit.h --															 *
diff --git a/src/include/utils/relcache.h b/src/include/utils/relcache.h
index 2f39486..60c2eb6 100644
--- a/src/include/utils/relcache.h
+++ b/src/include/utils/relcache.h
@@ -76,7 +76,8 @@ extern Relation RelationBuildLocalRelation(const char *relname,
  * Routine to manage assignment of new relfilenode to a relation
  */
 extern void RelationSetNewRelfilenode(Relation relation,
-						  TransactionId freezeXid);
+						  TransactionId freezeXid,
+						  TransactionId validxmin);
 
 /*
  * Routines for flushing/rebuilding relcache entries in various scenarios
diff --git a/src/test/isolation/isolation_schedule b/src/test/isolation/isolation_schedule
index 669c0f2..bba722a 100644
--- a/src/test/isolation/isolation_schedule
+++ b/src/test/isolation/isolation_schedule
@@ -14,3 +14,4 @@ test: fk-contention
 test: fk-deadlock
 test: fk-deadlock2
 test: eval-plan-qual
+test: truncate
diff --git a/src/test/isolation/expected/truncate.out b/src/test/isolation/expected/truncate.out
new file mode 100644
index ...2b9f161
*** a/src/test/isolation/expected/truncate.out
--- b/src/test/isolation/expected/truncate.out
***************
*** 0 ****
--- 1,63 ----
+ Parsed test spec with 2 sessions
+ 
+ starting permutation: begin select roll trunc
+ step begin: 
+   BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   -- Force snapshot open
+   SELECT 1 AS foo FROM txid_current();
+ 
+ foo            
+ 
+ 1              
+ step select: SELECT * FROM foo;
+ i              
+ 
+ 1              
+ step roll: ROLLBACK;
+ step trunc: TRUNCATE TABLE foo;
+ 
+ starting permutation: begin select trunc roll
+ step begin: 
+   BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   -- Force snapshot open
+   SELECT 1 AS foo FROM txid_current();
+ 
+ foo            
+ 
+ 1              
+ step select: SELECT * FROM foo;
+ i              
+ 
+ 1              
+ step trunc: TRUNCATE TABLE foo; <waiting ...>
+ step roll: ROLLBACK;
+ step trunc: <... completed>
+ 
+ starting permutation: begin trunc select roll
+ step begin: 
+   BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   -- Force snapshot open
+   SELECT 1 AS foo FROM txid_current();
+ 
+ foo            
+ 
+ 1              
+ step trunc: TRUNCATE TABLE foo;
+ step select: SELECT * FROM foo;
+ ERROR:  canceling statement due to conflict with TRUNCATE TABLE on foo
+ step roll: ROLLBACK;
+ 
+ starting permutation: trunc begin select roll
+ step trunc: TRUNCATE TABLE foo;
+ step begin: 
+   BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   -- Force snapshot open
+   SELECT 1 AS foo FROM txid_current();
+ 
+ foo            
+ 
+ 1              
+ step select: SELECT * FROM foo;
+ i              
+ 
+ step roll: ROLLBACK;
diff --git a/src/test/isolation/specs/truncate.spec b/src/test/isolation/specs/truncate.spec
new file mode 100644
index ...2c7b707
*** a/src/test/isolation/specs/truncate.spec
--- b/src/test/isolation/specs/truncate.spec
***************
*** 0 ****
--- 1,23 ----
+ setup
+ {
+   CREATE TABLE foo (i int);
+   INSERT INTO foo VALUES (1);
+ }
+ 
+ teardown
+ {
+   DROP TABLE foo;
+ }
+ 
+ session "s1"
+ step "begin"
+ {
+   BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
+   -- Force snapshot open
+   SELECT 1 AS foo FROM txid_current();
+ }
+ step "select"	{ SELECT * FROM foo; }
+ step "roll"		{ ROLLBACK; }
+ 
+ session "s2"
+ step "trunc"	{ TRUNCATE TABLE foo; }
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to