On Thu, Jul 16, 2015 at 8:51 PM, Sawada Masahiko <sawada.m...@gmail.com> wrote:
> On Wed, Jul 15, 2015 at 3:07 AM, Sawada Masahiko <sawada.m...@gmail.com> 
> wrote:
>> On Wed, Jul 15, 2015 at 12:55 AM, Simon Riggs <si...@2ndquadrant.com> wrote:
>>> On 10 July 2015 at 15:11, Sawada Masahiko <sawada.m...@gmail.com> wrote:
>>>>
>>>>
>>>> Oops, I had forgotten to add new file heapfuncs.c.
>>>> Latest patch is attached.
>>>
>>>
>>> I think we've established the approach is desirable and defined the way
>>> forwards for this, so this is looking good.
>>
>> If we want to move stuff like pg_stattuple, pg_freespacemap into core,
>> we could move them into heapfuncs.c.
>>
>>> Some of my requests haven't been actioned yet, so I personally would not
>>> commit this yet. I am happy to continue as reviewer/committer unless others
>>> wish to take over.
>>> The main missing item is pg_upgrade support, which won't happen by end of
>>> CF1, so I am marking this as Returned With Feedback. Hopefully we can review
>>> this again before CF2.
>>
>> I appreciate your reviewing.
>> Yeah, the pg_upgrade support and regression test for VFM patch is
>> almost done now, I will submit the patch in this week after testing it
>> .
>
> Attached patch is latest v9 patch.
>
> I added:
> - regression test for visibility map (visibilitymap.sql and
> visibilitymap.out files)
> - pg_upgrade support (rewriting vm file to vfm file)
> - regression test for pg_upgrade
>

Previous patch has some fail to apply, so attached the rebased patch.
Catalog version is not decided yet, so we will need to rewrite
VISIBILITY_MAP_FROZEN_BIT_CAT_VER in pg_upgrade.h
Please review it.

Regards,

--
Masahiko Sawada
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 22c5f7a..b1b6a06 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
 		 * If the page has only visible tuples, then we can find out the free
 		 * space from the FSM and move on.
 		 */
-		if (visibilitymap_test(rel, blkno, &vmbuffer))
+		if (visibilitymap_test(rel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			freespace = GetRecordedFreeSpace(rel, blkno);
 			stat->tuple_len += BLCKSZ - freespace;
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index b83d496..806ce27 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,6 +12,7 @@ subdir = src/backend/access/heap
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o \
+	heapfuncs.o
 
 include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 050efdc..2dbabc8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2176,8 +2176,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
 
 	/*
-	 * Find buffer to insert this tuple into.  If the page is all visible,
-	 * this will also pin the requisite visibility map page.
+	 * Find buffer to insert this tuple into.  If the page is all visible
+	 * or all frozen, this will also pin the requisite visibility map and
+	 * frozen map page.
 	 */
 	buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
 									   InvalidBuffer, options, bistate,
@@ -2192,7 +2193,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(buffer));
+		PageClearAllFrozen(BufferGetPage(buffer));
+
 		visibilitymap_clear(relation,
 							ItemPointerGetBlockNumber(&(heaptup->t_self)),
 							vmbuffer);
@@ -2493,7 +2498,11 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
 		if (PageIsAllVisible(page))
 		{
 			all_visible_cleared = true;
+
+			/* all-frozen information is also cleared at the same time */
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+
 			visibilitymap_clear(relation,
 								BufferGetBlockNumber(buffer),
 								vmbuffer);
@@ -2776,9 +2785,9 @@ heap_delete(Relation relation, ItemPointer tid,
 
 	/*
 	 * If we didn't pin the visibility map page and the page has become all
-	 * visible while we were busy locking the buffer, we'll have to unlock and
-	 * re-lock, to avoid holding the buffer lock across an I/O.  That's a bit
-	 * unfortunate, but hopefully shouldn't happen often.
+	 * visible or all frozen while we were busy locking the buffer, we'll
+	 * have to unlock and re-lock, to avoid holding the buffer lock across an
+	 * I/O.  That's a bit unfortunate, but hopefully shouldn't happen often.
 	 */
 	if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
 	{
@@ -2970,10 +2979,15 @@ l1:
 	 */
 	PageSetPrunable(page, xid);
 
+	/* clear PD_ALL_VISIBLE and PD_ALL_FORZEN flags */
 	if (PageIsAllVisible(page))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(page);
+		PageClearAllFrozen(page);
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
 							vmbuffer);
 	}
@@ -3252,7 +3266,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
 	 * in the middle of changing this, so we'll need to recheck after we have
 	 * the lock.
 	 */
-	if (PageIsAllVisible(page))
+	if (PageIsAllVisible(page) || PageIsAllFrozen(page))
 		visibilitymap_pin(relation, block, &vmbuffer);
 
 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3846,14 +3860,22 @@ l2:
 	if (PageIsAllVisible(BufferGetPage(buffer)))
 	{
 		all_visible_cleared = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(buffer));
+		PageClearAllFrozen(BufferGetPage(buffer));
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
 							vmbuffer);
 	}
 	if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
 	{
 		all_visible_cleared_new = true;
+
+		/* all-frozen information is also cleared at the same time */
 		PageClearAllVisible(BufferGetPage(newbuf));
+		PageClearAllFrozen(BufferGetPage(newbuf));
+
 		visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
 							vmbuffer_new);
 	}
@@ -6938,7 +6960,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
  */
 XLogRecPtr
 log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
-				 TransactionId cutoff_xid)
+				 TransactionId cutoff_xid, uint8 vmflags)
 {
 	xl_heap_visible xlrec;
 	XLogRecPtr	recptr;
@@ -6948,6 +6970,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
 	Assert(BufferIsValid(vm_buffer));
 
 	xlrec.cutoff_xid = cutoff_xid;
+	xlrec.flags = vmflags;
 	XLogBeginInsert();
 	XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
 
@@ -7537,8 +7560,14 @@ heap_xlog_visible(XLogReaderState *record)
 		 * the subsequent update won't be replayed to clear the flag.
 		 */
 		page = BufferGetPage(buffer);
-		PageSetAllVisible(page);
+
+		if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
+			PageSetAllVisible(page);
+		if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
+			PageSetAllFrozen(page);
+
 		MarkBufferDirty(buffer);
+
 	}
 	else if (action == BLK_RESTORED)
 	{
@@ -7589,7 +7618,7 @@ heap_xlog_visible(XLogReaderState *record)
 		 */
 		if (lsn > PageGetLSN(vmpage))
 			visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
-							  xlrec->cutoff_xid);
+							  xlrec->cutoff_xid, xlrec->flags);
 
 		ReleaseBuffer(vmbuffer);
 		FreeFakeRelcacheEntry(reln);
@@ -7739,7 +7768,10 @@ heap_xlog_delete(XLogReaderState *record)
 		PageSetPrunable(page, XLogRecGetXid(record));
 
 		if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		/* Make sure there is no forward chain link in t_ctid */
 		htup->t_ctid = target_tid;
@@ -7843,7 +7875,10 @@ heap_xlog_insert(XLogReaderState *record)
 		PageSetLSN(page, lsn);
 
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		MarkBufferDirty(buffer);
 	}
@@ -7982,7 +8017,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
 		PageSetLSN(page, lsn);
 
 		if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		MarkBufferDirty(buffer);
 	}
@@ -8110,7 +8148,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 		PageSetPrunable(page, XLogRecGetXid(record));
 
 		if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		PageSetLSN(page, lsn);
 		MarkBufferDirty(obuffer);
@@ -8245,7 +8286,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
 			elog(PANIC, "heap_update_redo: failed to add tuple");
 
 		if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
+		{
 			PageClearAllVisible(page);
+			PageClearAllFrozen(page);
+		}
 
 		freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
 
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7c38772..a284b85 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -21,33 +21,45 @@
  *
  * NOTES
  *
- * The visibility map is a bitmap with one bit per heap page. A set bit means
- * that all tuples on the page are known visible to all transactions, and
- * therefore the page doesn't need to be vacuumed. The map is conservative in
- * the sense that we make sure that whenever a bit is set, we know the
- * condition is true, but if a bit is not set, it might or might not be true.
+ * The visibility map is a bitmap with two bits (all-visible and all-frozen)
+ * per heap page. A set all-visible bit means that all tuples on the page are
+ * known visible to all transactions, and therefore the page doesn't need to
+ * be vacuumed. A set all-frozen bit means that all tuples on the page are
+ * completely frozen, and therefore the page doesn't need to be vacuumed even
+ * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
+ * A all-frozen bit must be set only when the page is already all-visible.
+ * That is, all-frozen bit is always set with all-visible bit.
+ *
+ * The map is conservative in the sense that we make sure that whenever a bit
+ * is set, we know the condition is true, but if a bit is not set, it might or
+ * might not be true.
  *
  * Clearing a visibility map bit is not separately WAL-logged.  The callers
  * must make sure that whenever a bit is cleared, the bit is cleared on WAL
- * replay of the updating operation as well.
+ * replay of the updating operation as well.  And all-frozen bit must be
+ * cleared with all-visible at the same time.
  *
  * When we *set* a visibility map during VACUUM, we must write WAL.  This may
  * seem counterintuitive, since the bit is basically a hint: if it is clear,
- * it may still be the case that every tuple on the page is visible to all
- * transactions; we just don't know that for certain.  The difficulty is that
- * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
- * on the page itself, and the visibility map bit.  If a crash occurs after the
- * visibility map page makes it to disk and before the updated heap page makes
- * it to disk, redo must set the bit on the heap page.  Otherwise, the next
- * insert, update, or delete on the heap page will fail to realize that the
- * visibility map bit must be cleared, possibly causing index-only scans to
- * return wrong answers.
+ * it may still be the case that every tuple on the page is visible or frozen
+ * to all transactions; we just don't know that for certain.  The difficulty is
+ * that there are two bits which are typically set together: the PD_ALL_VISIBLE
+ * or PD_ALL_FROZEN bit on the page itself, and the visibility map bit.  If a
+ * crash occurs after the visibility map page makes it to disk and before the
+ * updated heap page makes it to disk, redo must set the bit on the heap page.
+ * Otherwise, the next insert, update, or delete on the heap page will fail to
+ * realize that the visibility map bit must be cleared, possibly causing index-only
+ * scans to return wrong answers.
  *
  * VACUUM will normally skip pages for which the visibility map bit is set;
  * such pages can't contain any dead tuples and therefore don't need vacuuming.
- * The visibility map is not used for anti-wraparound vacuums, because
+ * The visibility map is not used for anti-wraparound vacuums before 9.5, because
  * an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
  * present in the table, even on pages that don't have any dead tuples.
+ * 9.6 or later, the visibility map has a additional bit which indicates all tuple
+ * on single page has been completely forzen, so the visibility map is also used for
+ * anti-wraparound vacuums.
+ *
  *
  * LOCKING
  *
@@ -58,14 +70,14 @@
  * section that logs the page modification. However, we don't want to hold
  * the buffer lock over any I/O that may be required to read in the visibility
  * map page.  To avoid this, we examine the heap page before locking it;
- * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
- * bit.  Then, we lock the buffer.  But this creates a race condition: there
- * is a possibility that in the time it takes to lock the buffer, the
- * PD_ALL_VISIBLE bit gets set.  If that happens, we have to unlock the
- * buffer, pin the visibility map page, and relock the buffer.  This shouldn't
- * happen often, because only VACUUM currently sets visibility map bits,
- * and the race will only occur if VACUUM processes a given page at almost
- * exactly the same time that someone tries to further modify it.
+ * if the page-level PD_ALL_VISIBLE or PD_ALL_FROZEN bit is set, we pin the
+ * visibility map bit.  Then, we lock the buffer.  But this creates a race
+ * condition: there is a possibility that in the time it takes to lock the
+ * buffer, the PD_ALL_VISIBLE or PD_ALL_FROZEN bit gets set.  If that happens,
+ * we have to unlock the buffer, pin the visibility map page, and relock the
+ * buffer.  This shouldn't happen often, because only VACUUM currently sets
+ * visibility map bits, and the race will only occur if VACUUM processes a given
+ * page at almost exactly the same time that someone tries to further modify it.
  *
  * To set a bit, you need to hold a lock on the heap page. That prevents
  * the race condition where VACUUM sees that all tuples on the page are
@@ -101,11 +113,14 @@
  */
 #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
 
-/* Number of bits allocated for each heap block. */
-#define BITS_PER_HEAPBLOCK 1
+/*
+ * Number of bits allocated for each heap block.
+ * One for all-visible, other for all-frozen.
+*/
+#define BITS_PER_HEAPBLOCK 2
 
 /* Number of heap blocks we can represent in one byte. */
-#define HEAPBLOCKS_PER_BYTE 8
+#define HEAPBLOCKS_PER_BYTE 4
 
 /* Number of heap blocks we can represent in one visibility map page. */
 #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
@@ -115,24 +130,42 @@
 #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
 #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
 
-/* table for fast counting of set bits */
-static const uint8 number_of_ones[256] = {
-	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+/* tables for fast counting of set bits for visible and freeze */
+static const uint8 number_of_ones_for_visible[256] = {
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+	1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+	2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
+};
+static const uint8 number_of_ones_for_frozen[256] = {
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+	2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
 };
 
 /* prototypes for internal routines */
@@ -141,7 +174,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
 
 
 /*
- *	visibilitymap_clear - clear a bit in visibility map
+ *	visibilitymap_clear - clear all bits in visibility map
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -153,7 +186,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	int			mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
 	int			mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
-	uint8		mask = 1 << mapBit;
+	uint8		mask = (VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN) <<
+		(BITS_PER_HEAPBLOCK * mapBit);
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
@@ -225,7 +259,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
 }
 
 /*
- *	visibilitymap_set - set a bit on a previously pinned page
+ *	visibilitymap_set - set bit(s) on a previously pinned page
  *
  * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
  * or InvalidXLogRecPtr in normal running.  The page LSN is advanced to the
@@ -234,10 +268,11 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  * marked all-visible; it is needed for Hot Standby, and can be
  * InvalidTransactionId if the page contains no tuples.
  *
- * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
- * this function. Except in recovery, caller should also pass the heap
- * buffer. When checksums are enabled and we're not in recovery, we must add
- * the heap buffer to the WAL chain to protect it from being torn.
+ * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
+ * bit before calling this function. Except in recovery, caller should also
+ * pass the heap buffer and flags which indicates what flag we want to set.
+ * When checksums are enabled and we're not in recovery, we must add the heap
+ * buffer to the WAL chain to protect it from being torn.
  *
  * You must pass a buffer containing the correct map page to this function.
  * Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -245,7 +280,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
  */
 void
 visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
-				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
+				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+				  uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -254,7 +290,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
-	elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
+	elog(DEBUG1, "vm_set %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
 #endif
 
 	Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
@@ -272,11 +308,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 	map = PageGetContents(page);
 	LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
 
-	if (!(map[mapByte] & (1 << mapBit)))
+	if (flags != (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))))
 	{
 		START_CRIT_SECTION();
 
-		map[mapByte] |= (1 << mapBit);
+		map[mapByte] |= (flags << (BITS_PER_HEAPBLOCK * mapBit));
 		MarkBufferDirty(vmBuf);
 
 		if (RelationNeedsWAL(rel))
@@ -285,7 +321,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 			{
 				Assert(!InRecovery);
 				recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
-										  cutoff_xid);
+										  cutoff_xid, flags);
 
 				/*
 				 * If data checksums are enabled (or wal_log_hints=on), we
@@ -295,11 +331,15 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 				{
 					Page		heapPage = BufferGetPage(heapBuf);
 
-					/* caller is expected to set PD_ALL_VISIBLE first */
-					Assert(PageIsAllVisible(heapPage));
+					/*
+					 * caller is expected to set PD_ALL_VISIBLE or
+					 * PD_ALL_FROZEN first.
+					 */
+					Assert(PageIsAllVisible(heapPage) || PageIsAllFrozen(heapPage));
 					PageSetLSN(heapPage, recptr);
 				}
 			}
+
 			PageSetLSN(page, recptr);
 		}
 
@@ -310,15 +350,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
 }
 
 /*
- *	visibilitymap_test - test if a bit is set
+ *	visibilitymap_test - test if bit(s) is set
  *
- * Are all tuples on heapBlk visible to all, according to the visibility map?
+ * Are all tuples on heapBlk visible or frozen to all, according to the visibility map?
  *
  * On entry, *buf should be InvalidBuffer or a valid buffer returned by an
  * earlier call to visibilitymap_pin or visibilitymap_test on the same
  * relation. On return, *buf is a valid buffer with the map page containing
  * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
- * releasing *buf after it's done testing and setting bits.
+ * releasing *buf after it's done testing and setting bits, and must set flags
+ * which indicates what flag we want to test.
  *
  * NOTE: This function is typically called without a lock on the heap page,
  * so somebody else could change the bit just after we look at it.  In fact,
@@ -328,7 +369,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
  * all concurrency issues!
  */
 bool
-visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
+visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf, uint8 flags)
 {
 	BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
 	uint32		mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -337,7 +378,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
 	char	   *map;
 
 #ifdef TRACE_VISIBILITYMAP
-	elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
+	elog(DEBUG1, "vm_test %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
 #endif
 
 	/* Reuse the old pinned buffer if possible */
@@ -360,11 +401,12 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
 	map = PageGetContents(BufferGetPage(*buf));
 
 	/*
-	 * A single-bit read is atomic.  There could be memory-ordering effects
+	 * A single or double bit read is atomic.  There could be memory-ordering effects
 	 * here, but for performance reasons we make it the caller's job to worry
 	 * about that.
 	 */
-	result = (map[mapByte] & (1 << mapBit)) ? true : false;
+	result = (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))) ?
+		true : false;
 
 	return result;
 }
@@ -374,10 +416,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
  *
  * Note: we ignore the possibility of race conditions when the table is being
  * extended concurrently with the call.  New pages added to the table aren't
- * going to be marked all-visible, so they won't affect the result.
+ * going to be marked all-visible or all-frozen, so they won't affect the result.
+ * The caller must set the flags which indicates what flag we want to count.
  */
 BlockNumber
-visibilitymap_count(Relation rel)
+visibilitymap_count(Relation rel, uint8 flags)
 {
 	BlockNumber result = 0;
 	BlockNumber mapBlock;
@@ -406,7 +449,10 @@ visibilitymap_count(Relation rel)
 
 		for (i = 0; i < MAPSIZE; i++)
 		{
-			result += number_of_ones[map[i]];
+			if (flags & VISIBILITYMAP_ALL_VISIBLE)
+				result += number_of_ones_for_visible[map[i]];
+			if (flags & VISIBILITYMAP_ALL_FROZEN)
+				result += number_of_ones_for_frozen[map[i]];
 		}
 
 		ReleaseBuffer(mapBuffer);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 69f35c9..87bf0c8 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1919,11 +1919,18 @@ index_update_stats(Relation rel,
 	{
 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
 		BlockNumber relallvisible;
+		BlockNumber relallfrozen;
 
 		if (rd_rel->relkind != RELKIND_INDEX)
-			relallvisible = visibilitymap_count(rel);
+		{
+			relallvisible = visibilitymap_count(rel, VISIBILITYMAP_ALL_VISIBLE);
+			relallfrozen = visibilitymap_count(rel, VISIBILITYMAP_ALL_FROZEN);
+		}
 		else	/* don't bother for indexes */
+		{
 			relallvisible = 0;
+			relallfrozen = 0;
+		}
 
 		if (rd_rel->relpages != (int32) relpages)
 		{
@@ -1940,6 +1947,11 @@ index_update_stats(Relation rel,
 			rd_rel->relallvisible = (int32) relallvisible;
 			dirty = true;
 		}
+		if (rd_rel->relallfrozen != (int32) relallfrozen)
+		{
+			rd_rel->relallfrozen = (int32) relallfrozen;
+			dirty = true;
+		}
 	}
 
 	/*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 861048f..392c2a4 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -572,7 +572,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 		vac_update_relstats(onerel,
 							relpages,
 							totalrows,
-							visibilitymap_count(onerel),
+							visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE),
+							visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN),
 							hasindex,
 							InvalidTransactionId,
 							InvalidMultiXactId,
@@ -595,6 +596,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
 								RelationGetNumberOfBlocks(Irel[ind]),
 								totalindexrows,
 								0,
+								0,
 								false,
 								InvalidTransactionId,
 								InvalidMultiXactId,
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 7ab4874..d3725dd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -22,6 +22,7 @@
 #include "access/rewriteheap.h"
 #include "access/transam.h"
 #include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
 #include "access/xact.h"
 #include "access/xlog.h"
 #include "catalog/catalog.h"
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index baf66f1..d68c7c4 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -744,6 +744,7 @@ void
 vac_update_relstats(Relation relation,
 					BlockNumber num_pages, double num_tuples,
 					BlockNumber num_all_visible_pages,
+					BlockNumber num_all_frozen_pages,
 					bool hasindex, TransactionId frozenxid,
 					MultiXactId minmulti,
 					bool in_outer_xact)
@@ -781,6 +782,11 @@ vac_update_relstats(Relation relation,
 		pgcform->relallvisible = (int32) num_all_visible_pages;
 		dirty = true;
 	}
+	if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
+	{
+		pgcform->relallfrozen = (int32) num_all_frozen_pages;
+		dirty = true;
+	}
 
 	/* Apply DDL updates, but not inside an outer transaction (see above) */
 
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index a01cfb4..120de63 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -106,6 +106,8 @@ typedef struct LVRelStats
 	BlockNumber rel_pages;		/* total number of pages */
 	BlockNumber scanned_pages;	/* number of pages we examined */
 	BlockNumber pinskipped_pages;		/* # of pages we skipped due to a pin */
+	BlockNumber vmskipped_frozen_pages; /* # of pages we skipped by all-frozen bit
+									of visibility map */
 	double		scanned_tuples; /* counts only tuples on scanned pages */
 	double		old_rel_tuples; /* previous value of pg_class.reltuples */
 	double		new_rel_tuples; /* new estimated total # of tuples */
@@ -156,7 +158,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
 static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
 static int	vac_cmp_itemptr(const void *left, const void *right);
 static bool heap_page_is_all_visible(Relation rel, Buffer buf,
-						 TransactionId *visibility_cutoff_xid);
+						 TransactionId *visibility_cutoff_xid, bool *all_frozen);
 
 
 /*
@@ -188,7 +190,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	MultiXactId mxactFullScanLimit;
 	BlockNumber new_rel_pages;
 	double		new_rel_tuples;
-	BlockNumber new_rel_allvisible;
+	BlockNumber new_rel_allvisible,
+				new_rel_allfrozen;
 	double		new_live_tuples;
 	TransactionId new_frozen_xid;
 	MultiXactId new_min_multi;
@@ -222,6 +225,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	 * than or equal to the requested Xid full-table scan limit; or if the
 	 * table's minimum MultiXactId is older than or equal to the requested
 	 * mxid full-table scan limit.
+	 * Even if scan_all is set so far, we could skip to scan some pages
+	 * according by frozen map.
 	 */
 	scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
 											 xidFullScanLimit);
@@ -253,7 +258,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 	 * NB: We need to check this before truncating the relation, because that
 	 * will change ->rel_pages.
 	 */
-	if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+	if ((vacrelstats->scanned_pages + vacrelstats->vmskipped_frozen_pages)
+		< vacrelstats->rel_pages)
 	{
 		Assert(!scan_all);
 		scanned_all = false;
@@ -301,10 +307,14 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 		new_rel_tuples = vacrelstats->old_rel_tuples;
 	}
 
-	new_rel_allvisible = visibilitymap_count(onerel);
+	new_rel_allvisible = visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE);
 	if (new_rel_allvisible > new_rel_pages)
 		new_rel_allvisible = new_rel_pages;
 
+	new_rel_allfrozen = visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN);
+	if (new_rel_allfrozen > new_rel_pages)
+		new_rel_allfrozen = new_rel_pages;
+
 	new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
 	new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
 
@@ -312,6 +322,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 						new_rel_pages,
 						new_rel_tuples,
 						new_rel_allvisible,
+						new_rel_allfrozen,
 						vacrelstats->hasindex,
 						new_frozen_xid,
 						new_min_multi,
@@ -360,10 +371,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel),
 							 vacrelstats->num_index_scans);
-			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"),
+			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped according to vm\n"),
 							 vacrelstats->pages_removed,
 							 vacrelstats->rel_pages,
-							 vacrelstats->pinskipped_pages);
+							 vacrelstats->pinskipped_pages,
+							 vacrelstats->vmskipped_frozen_pages);
 			appendStringInfo(&buf,
 							 _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"),
 							 vacrelstats->tuples_deleted,
@@ -486,9 +498,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 	 * consecutive pages.  Since we're reading sequentially, the OS should be
 	 * doing readahead for us, so there's no gain in skipping a page now and
 	 * then; that's likely to disable readahead and so be counterproductive.
-	 * Also, skipping even a single page means that we can't update
-	 * relfrozenxid, so we only want to do it if we can skip a goodly number
-	 * of pages.
+	 * Also, skipping even a single page accorinding to all-visible bit of
+	 * visibility map means that we can't update relfrozenxid, so we only want
+	 * to do it if we can skip a goodly number. On the other hand, we count
+	 * both how many pages we skipped according to all-frozen bit of visibility
+	 * map and how many pages we freeze page, so we can update relfrozenxid if
+	 * the sum of them is as many as pages of table.
 	 *
 	 * Before entering the main loop, establish the invariant that
 	 * next_not_all_visible_block is the next block number >= blkno that's not
@@ -515,7 +530,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		 next_not_all_visible_block < nblocks;
 		 next_not_all_visible_block++)
 	{
-		if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+		if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer,
+								VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
 			break;
 		vacuum_delay_point();
 	}
@@ -533,7 +549,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		bool		tupgone,
 					hastup;
 		int			prev_dead_count;
-		int			nfrozen;
+		int			nfrozen; /* # of tuples is frozen */
+		int			nalready_frozen; /* # of tuples is already frozen */
+		int			ntotal_frozen; /* # of tuples is in single page */
+		int			ntup_per_page;
 		Size		freespace;
 		bool		all_visible_according_to_vm;
 		bool		all_visible;
@@ -548,7 +567,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 				 next_not_all_visible_block++)
 			{
 				if (!visibilitymap_test(onerel, next_not_all_visible_block,
-										&vmbuffer))
+										&vmbuffer,
+										VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
 					break;
 				vacuum_delay_point();
 			}
@@ -566,9 +586,25 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		}
 		else
 		{
-			/* Current block is all-visible */
-			if (skipping_all_visible_blocks && !scan_all)
-				continue;
+			/*
+			 * This block is at least all-visible according to visibility map.
+			 * We check whehter this block is all-frozen to skip to vacuum this
+			 * page even if scanning whole page is required.
+			 */
+			if (scan_all)
+			{
+				if (visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+				{
+					vacrelstats->vmskipped_frozen_pages++;
+					continue;
+				}
+			}
+			else
+			{
+				if (skipping_all_visible_blocks)
+					continue;
+			}
+
 			all_visible_according_to_vm = true;
 		}
 
@@ -740,7 +776,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 				PageSetAllVisible(page);
 				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-								  vmbuffer, InvalidTransactionId);
+								  vmbuffer, InvalidTransactionId,
+								  VISIBILITYMAP_ALL_VISIBLE);
 				END_CRIT_SECTION();
 			}
 
@@ -764,6 +801,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		all_visible = true;
 		has_dead_tuples = false;
 		nfrozen = 0;
+		nalready_frozen = 0;
+		ntup_per_page = 0;
 		hastup = false;
 		prev_dead_count = vacrelstats->num_dead_tuples;
 		maxoff = PageGetMaxOffsetNumber(page);
@@ -918,8 +957,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			else
 			{
 				num_tuples += 1;
+				ntup_per_page += 1;;
 				hastup = true;
 
+				/* Check whether this tuple is alrady frozen or not */
+				if (HeapTupleHeaderXminFrozen(tuple.t_data))
+					nalready_frozen += 1;
+
 				/*
 				 * Each non-removable tuple must be checked to see if it needs
 				 * freezing.  Note we already have exclusive buffer lock.
@@ -931,9 +975,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		}						/* scan along page */
 
 		/*
-		 * If we froze any tuples, mark the buffer dirty, and write a WAL
-		 * record recording the changes.  We must log the changes to be
-		 * crash-safe against future truncation of CLOG.
+		 * If we froze any tuples or any tuples are already frozen,
+		 * mark the buffer dirty, and write a WAL record recording the changes.
+		 * We must log the changes to be crash-safe against future truncation
+		 * of CLOG.
 		 */
 		if (nfrozen > 0)
 		{
@@ -966,6 +1011,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			END_CRIT_SECTION();
 		}
 
+		/* Compute the number of frozen tuples in a page */
+		ntotal_frozen = nfrozen + nalready_frozen;
+
 		/*
 		 * If there are no indexes then we can vacuum the page right now
 		 * instead of doing a second scan.
@@ -988,26 +1036,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 
 		freespace = PageGetHeapFreeSpace(page);
 
-		/* mark page all-visible, if appropriate */
-		if (all_visible && !all_visible_according_to_vm)
+		/* This page is all visible */
+		if (all_visible)
 		{
-			/*
-			 * It should never be the case that the visibility map page is set
-			 * while the page-level bit is clear, but the reverse is allowed
-			 * (if checksums are not enabled).  Regardless, set the both bits
-			 * so that we get back in sync.
-			 *
-			 * NB: If the heap page is all-visible but the VM bit is not set,
-			 * we don't need to dirty the heap page.  However, if checksums
-			 * are enabled, we do need to make sure that the heap page is
-			 * dirtied before passing it to visibilitymap_set(), because it
-			 * may be logged.  Given that this situation should only happen in
-			 * rare cases after a crash, it is not worth optimizing.
-			 */
-			PageSetAllVisible(page);
-			MarkBufferDirty(buf);
-			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
-							  vmbuffer, visibility_cutoff_xid);
+			uint8 flags = 0;
+
+			/* mark page all-visible, if appropriate */
+			if (!all_visible_according_to_vm)
+			{
+				/*
+				 * It should never be the case that the visibility map page is set
+				 * while the page-level bit is clear, but the reverse is allowed
+				 * (if checksums are not enabled).  Regardless, set the both bits
+				 * so that we get back in sync.
+				 *
+				 * NB: If the heap page is all-visible but the VM bit is not set,
+				 * we don't need to dirty the heap page.  However, if checksums
+				 * are enabled, we do need to make sure that the heap page is
+				 * dirtied before passing it to visibilitymap_set(), because it
+				 * may be logged.  Given that this situation should only happen in
+				 * rare cases after a crash, it is not worth optimizing.
+				 */
+				PageSetAllVisible(page);
+				flags |= VISIBILITYMAP_ALL_VISIBLE;
+			}
+
+			/* mark page all-frozen, if all tuples are frozen in total */
+			if ((ntotal_frozen == ntup_per_page) &&
+				!visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+			{
+				Assert(PageIsAllVisible(page));
+
+				PageSetAllFrozen(page);
+				flags |= VISIBILITYMAP_ALL_FROZEN;
+			}
+
+			if (flags)
+			{
+				MarkBufferDirty(buf);
+				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+								  vmbuffer, visibility_cutoff_xid, flags);
+			}
 		}
 
 		/*
@@ -1018,7 +1087,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 		 * that something bad has happened.
 		 */
 		else if (all_visible_according_to_vm && !PageIsAllVisible(page)
-				 && visibilitymap_test(onerel, blkno, &vmbuffer))
+				 && visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
 				 relname, blkno);
@@ -1047,6 +1116,17 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 			visibilitymap_clear(onerel, blkno, vmbuffer);
 		}
 
+		/*
+		 * As a result of scanning a page, we set VM all-frozen bit and page header
+		 * if all tuples of single page are frozen.
+		 */
+		if (ntotal_frozen == ntup_per_page)
+		{
+			PageSetAllFrozen(page);
+			visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, vmbuffer,
+							  InvalidTransactionId, VISIBILITYMAP_ALL_FROZEN);
+		}
+
 		UnlockReleaseBuffer(buf);
 
 		/* Remember the location of the last page with nonremovable tuples */
@@ -1078,7 +1158,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 														 num_tuples);
 
 	/*
-	 * Release any remaining pin on visibility map page.
+	 * Release any remaining pin on visibility map and frozen map page.
 	 */
 	if (BufferIsValid(vmbuffer))
 	{
@@ -1115,6 +1195,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
 						tups_vacuumed, vacuumed_pages)));
 
 	/*
+	 * This information would be effective for how much effect all-frozen bit
+	 * of VM had for freezing tuples.
+	 */
+	ereport(elevel,
+			(errmsg("Skipped %d frozen pages acoording to visibility map",
+					vacrelstats->vmskipped_frozen_pages)));
+
+	/*
 	 * This is pretty messy, but we split it up so that we can skip emitting
 	 * individual parts of the message when not applicable.
 	 */
@@ -1226,6 +1314,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 	OffsetNumber unused[MaxOffsetNumber];
 	int			uncnt = 0;
 	TransactionId visibility_cutoff_xid;
+	bool		all_frozen;
 
 	START_CRIT_SECTION();
 
@@ -1277,19 +1366,31 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
 	 * dirty, exclusively locked, and, if needed, a full page image has been
 	 * emitted in the log_heap_clean() above.
 	 */
-	if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
+	if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, &all_frozen))
 		PageSetAllVisible(page);
 
 	/*
 	 * All the changes to the heap page have been done. If the all-visible
-	 * flag is now set, also set the VM bit.
+	 * flag is now set, also set the VM all-visible bit.
+	 * Also, if this page is all-frozen, set VM all-frozen bit and flag.
 	 */
-	if (PageIsAllVisible(page) &&
-		!visibilitymap_test(onerel, blkno, vmbuffer))
+	if (PageIsAllVisible(page))
 	{
-		Assert(BufferIsValid(*vmbuffer));
-		visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
-						  visibility_cutoff_xid);
+		uint8 flags = 0;
+
+		if (!visibilitymap_test(onerel, blkno, vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
+			flags |= VISIBILITYMAP_ALL_VISIBLE;
+
+		/* mark page all-frozen, and set VM all-frozen bit */
+		if (all_frozen)
+		{
+			PageSetAllFrozen(page);
+			flags |= VISIBILITYMAP_ALL_FROZEN;
+		}
+
+		if (flags)
+			visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
+							  visibility_cutoff_xid, flags);
 	}
 
 	return tupindex;
@@ -1408,6 +1509,7 @@ lazy_cleanup_index(Relation indrel,
 							stats->num_pages,
 							stats->num_index_tuples,
 							0,
+							0,
 							false,
 							InvalidTransactionId,
 							InvalidMultiXactId,
@@ -1782,7 +1884,8 @@ vac_cmp_itemptr(const void *left, const void *right)
  * xmin amongst the visible tuples.
  */
 static bool
-heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
+heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid,
+						 bool *all_frozen)
 {
 	Page		page = BufferGetPage(buf);
 	BlockNumber blockno = BufferGetBlockNumber(buf);
@@ -1791,6 +1894,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 	bool		all_visible = true;
 
 	*visibility_cutoff_xid = InvalidTransactionId;
+	*all_frozen = true;
 
 	/*
 	 * This is a stripped down version of the line pointer scan in
@@ -1814,7 +1918,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 
 		/*
 		 * Dead line pointers can have index pointers pointing to them. So
-		 * they can't be treated as visible
+		 * they can't be treated as visible and frozen.
 		 */
 		if (ItemIdIsDead(itemid))
 		{
@@ -1855,6 +1959,10 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 					/* Track newest xmin on page. */
 					if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
 						*visibility_cutoff_xid = xmin;
+
+					/* Check whether this tuple is alrady frozen or not */
+					if (!HeapTupleHeaderXminFrozen(tuple.t_data))
+						*all_frozen = false;
 				}
 				break;
 
@@ -1863,6 +1971,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 			case HEAPTUPLE_DELETE_IN_PROGRESS:
 				all_visible = false;
+				*all_frozen = false;
 				break;
 
 			default:
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 9f54c46..08df289 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -116,7 +116,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
 		 */
 		if (!visibilitymap_test(scandesc->heapRelation,
 								ItemPointerGetBlockNumber(tid),
-								&node->ioss_VMBuffer))
+								&node->ioss_VMBuffer, VISIBILITYMAP_ALL_VISIBLE))
 		{
 			/*
 			 * Rats, we have to visit the heap to check visibility.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 1ef76d0..ee49ddf 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -127,7 +127,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
 	if (attno != resultDesc->natts)
 		ereport(ERROR,
 				(errcode(ERRCODE_DATATYPE_MISMATCH),
-		  errmsg("table row type and query-specified row type do not match"),
+				 errmsg("table row type and query-specified row type do not match"),
 				 errdetail("Query has too few columns.")));
 }
 
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 79d9390..8fededc 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -10,6 +10,7 @@
 #include "postgres_fe.h"
 
 #include "pg_upgrade.h"
+#include "storage/bufpage.h"
 
 #include <fcntl.h>
 
@@ -21,6 +22,27 @@ static int	copy_file(const char *fromfile, const char *tofile, bool force);
 static int	win32_pghardlink(const char *src, const char *dst);
 #endif
 
+static int rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force);
+
+/* table for fast rewriting vm file to vfm file */
+static const uint16 rewrite_vm_to_vfm_table[256] = {
+	0,     1,     4,     5,     16,    17,    20,    21,    64,    65,    68,    69,    80,    81,    84,    85,
+	256,   257,   260,   261,   272,   273,   276,   277,   320,   321,   324,   325,   336,   337,   340,   341,
+	1024,  1025,  1028,  1029,  1040,  1041,  1044,  1045,  1088,  1089,  1092,  1093,  1104,  1105,  1108,  1109,
+	1280,  1281,  1284,  1285,  1296,  1297,  1300,  1301,  1344,  1345,  1348,  1349,  1360,  1361,  1364,  1365,
+	4096,  4097,  4100,  4101,  4112,  4113,  4116,  4117,  4160,  4161,  4164,  4165,  4176,  4177,  4180,  4181,
+	4352,  4353,  4356,  4357,  4368,  4369,  4372,  4373,  4416,  4417,  4420,  4421,  4432,  4433,  4436,  4437,
+	5120,  5121,  5124,  5125,  5136,  5137,  5140,  5141,  5184,  5185,  5188,  5189,  5200,  5201,  5204,  5205,
+	5376,  5377,  5380,  5381,  5392,  5393,  5396,  5397,  5440,  5441,  5444,  5445,  5456,  5457,  5460,  5461,
+	16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+	16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+	17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+	17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+	20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+	20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+	21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+	21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
 
 /*
  * copyAndUpdateFile()
@@ -30,11 +52,19 @@ static int	win32_pghardlink(const char *src, const char *dst);
  */
 const char *
 copyAndUpdateFile(pageCnvCtx *pageConverter,
-				  const char *src, const char *dst, bool force)
+				  const char *src, const char *dst, bool force, bool rewrite_vm)
 {
+
 	if (pageConverter == NULL)
 	{
-		if (pg_copy_file(src, dst, force) == -1)
+		int ret;
+
+		if (rewrite_vm)
+			ret = rewrite_vm_to_vfm(src, dst, force);
+		else
+			ret = pg_copy_file(src, dst, force);
+
+		if (ret)
 			return getErrorText(errno);
 		else
 			return NULL;
@@ -99,7 +129,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
 	}
 }
 
-
 /*
  * linkAndUpdateFile()
  *
@@ -201,6 +230,110 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
 #endif
 
 
+/*
+ * rewriteVisibiiltyMap()
+ *
+ * A additional bit which indicates that all tuples on page is completely
+ * frozen is added into visibility map at PG 9.6. So the format of visibiilty
+ * map has been changed.
+ * Copies a visibility map file while adding all-frozen bit(0) into each bit.
+ */
+static int
+rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force)
+{
+#define REWRITE_BUF_SIZE (50 * BLCKSZ)
+#define BITS_PER_HEAPBLOCK 2
+
+	int			src_fd, dst_fd;
+	uint16 		vfm_bits;
+	ssize_t 	nbytes;
+	char 		*buffer;
+	int			ret = 0;
+	int			save_errno = 0;
+
+	if ((fromfile == NULL) || (tofile == NULL))
+	{
+		errno = EINVAL;
+		return -1;
+	}
+
+	if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+		return -1;
+
+	if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+	{
+		save_errno = errno;
+		if (src_fd != 0)
+			close(src_fd);
+
+		errno = save_errno;
+		return -1;
+	}
+
+	buffer = (char *) pg_malloc(REWRITE_BUF_SIZE);
+
+	/* Copy page header data in advance */
+	if ((nbytes = read(src_fd, buffer, MAXALIGN(SizeOfPageHeaderData))) <= 0)
+	{
+		save_errno = errno;
+		return -1;
+	}
+
+	if (write(dst_fd, buffer, nbytes) != nbytes)
+	{
+		/* if write didn't set errno, assume problem is no disk space */
+		if (errno == 0)
+			errno = ENOSPC;
+		save_errno = errno;
+		return -1;
+	}
+
+	/* perform data rewriting i.e read src srouce, write to destination */
+	while (true)
+	{
+		ssize_t nbytes = read(src_fd, buffer, REWRITE_BUF_SIZE);
+		char *cur, *end;
+
+		if (nbytes < 0)
+		{
+			ret = -1;
+			break;
+		}
+
+		if (nbytes == 0)
+			break;
+
+		cur = buffer;
+		end = buffer + nbytes;
+
+		/*
+		 * Rewrite a byte and write dest_fd per BITS_PER_HEAPBLOCK bytes.
+		 */
+		while (end > cur)
+		{
+			/* Get rewritten bit from table and its string representation */
+			vfm_bits = rewrite_vm_to_vfm_table[(uint8) *cur];
+
+			if (write(dst_fd, &vfm_bits, BITS_PER_HEAPBLOCK) != BITS_PER_HEAPBLOCK)
+			{
+				ret = -1;
+				break;
+			}
+			cur++;
+		}
+	}
+
+	pg_free(buffer);
+
+	if (src_fd != 0)
+		close(src_fd);
+
+	if (dst_fd != 0)
+		close(dst_fd);
+
+	return ret;
+}
+
 void
 check_hard_link(void)
 {
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 13aa891..d957581 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -112,6 +112,11 @@ extern char *output_files[];
 #define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
 
 /*
+ * The format of visibility map changed with this 9.6 commit,
+ *
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201507161
+/*
  * pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
  * ("Improve concurrency of foreign key locking") which also updated catalog
  * version to this value.  pg_upgrade behavior depends on whether old and new
@@ -397,7 +402,7 @@ typedef void *pageCnvCtx;
 #endif
 
 const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
-				  const char *dst, bool force);
+				  const char *dst, bool force, bool rewrite_vm);
 const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
 				  const char *dst);
 
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index c22df42..766a473 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -18,7 +18,7 @@
 static void transfer_single_new_db(pageCnvCtx *pageConverter,
 					   FileNameMap *maps, int size, char *old_tablespace);
 static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *suffix);
+				 const char *type_old_suffix, const char *type_new_suffix);
 
 
 /*
@@ -171,6 +171,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 {
 	int			mapnum;
 	bool		vm_crashsafe_match = true;
+	bool		vm_rewrite_needed = false;
 
 	/*
 	 * Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -180,13 +181,20 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
 		vm_crashsafe_match = false;
 
+	/*
+	 * Do we need to rewrite "vm" to "vfm".
+	 */
+	if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+		new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+		vm_rewrite_needed = true;
+
 	for (mapnum = 0; mapnum < size; mapnum++)
 	{
 		if (old_tablespace == NULL ||
 			strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
 		{
 			/* transfer primary file */
-			transfer_relfile(pageConverter, &maps[mapnum], "");
+			transfer_relfile(pageConverter, &maps[mapnum], "", "");
 
 			/* fsm/vm files added in PG 8.4 */
 			if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -194,9 +202,17 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
 				/*
 				 * Copy/link any fsm and vm files, if they exist
 				 */
-				transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+				transfer_relfile(pageConverter, &maps[mapnum], "_fsm", "_fsm");
 				if (vm_crashsafe_match)
-					transfer_relfile(pageConverter, &maps[mapnum], "_vm");
+				{
+					/*
+					 * vm file is changed to vfm file in PG 9.6.
+					 */
+					if (vm_rewrite_needed)
+						transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vfm");
+					else
+						transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vm");
+				}
 			}
 		}
 	}
@@ -210,7 +226,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
  */
 static void
 transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
-				 const char *type_suffix)
+				 const char *type_old_suffix, const char *type_new_suffix)
 {
 	const char *msg;
 	char		old_file[MAXPGPATH];
@@ -218,6 +234,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 	int			fd;
 	int			segno;
 	char		extent_suffix[65];
+	bool		rewrite_vm = false;
 
 	/*
 	 * Now copy/link any related segments as well. Remember, PG breaks large
@@ -236,18 +253,18 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 				 map->old_tablespace_suffix,
 				 map->old_db_oid,
 				 map->old_relfilenode,
-				 type_suffix,
+				 type_old_suffix,
 				 extent_suffix);
 		snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s",
 				 map->new_tablespace,
 				 map->new_tablespace_suffix,
 				 map->new_db_oid,
 				 map->new_relfilenode,
-				 type_suffix,
+				 type_new_suffix,
 				 extent_suffix);
 
 		/* Is it an extent, fsm, or vm file? */
-		if (type_suffix[0] != '\0' || segno != 0)
+		if (type_old_suffix[0] != '\0' || segno != 0)
 		{
 			/* Did file open fail? */
 			if ((fd = open(old_file, O_RDONLY, 0)) == -1)
@@ -276,7 +293,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
 		{
 			pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
 
-			if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+			/* We need to rewrite vm file to vfm file. */
+			if (strcmp(type_old_suffix, type_new_suffix) != 0)
+				rewrite_vm = true;
+
+			if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true, rewrite_vm)) != NULL)
 				pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
 						 map->nspname, map->relname, old_file, new_file, msg);
 		}
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index f4e5d9a..53b8b2f 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -171,6 +171,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then
 		mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig
 		sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql
 	fi
+
+	vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;"
+	# Test for rewriting visibility map
+	vacuumdb -d regression || visibilitymap_vacuum1_status=$?
+	psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$?
 else
 	make_installcheck_status=$?
 fi
@@ -185,6 +190,14 @@ if [ -n "$pg_dumpall1_status" ]; then
 	echo "pg_dumpall of pre-upgrade database cluster failed"
 	exit 1
 fi
+if [ -n "$visibilitymap_vacuum1_status" ];then
+	echo "VACUUM of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+if [ -n "$visibilitymap_test1_status" ];then
+	echo "SELECT of pre-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
 
 PGDATA=$BASE_PGDATA
 
@@ -200,6 +213,8 @@ case $testhost in
 esac
 
 pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$?
+vacuumdb -d regression || visibilitymap_vacuum2_status=$?
+psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$?
 pg_ctl -m fast stop
 
 # no need to echo commands anymore
@@ -211,11 +226,26 @@ if [ -n "$pg_dumpall2_status" ]; then
 	exit 1
 fi
 
+if [ -n "$visibilitymap_vacuum2_status" ];then
+	echo "VACUUM of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
+if [ -n "$visibilitymap_test2_status" ];then
+	echo "SELECT of post-upgrade database cluster for visibility map test failed"
+	exit 1
+fi
+
 case $testhost in
 	MINGW*)	cmd /c delete_old_cluster.bat ;;
 	*)	    sh ./delete_old_cluster.sh ;;
 esac
 
+if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then
+	echo "Visibility map rewriting test failed"
+	exit 1
+fi
+
 if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then
 	echo PASSED
 	exit 0
diff --git a/src/common/relpath.c b/src/common/relpath.c
index 66dfef1..5898f1b 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -30,11 +30,14 @@
  * If you add a new entry, remember to update the errhint in
  * forkname_to_number() below, and update the SGML documentation for
  * pg_relation_size().
+ * 9.6 or later, the visibility map fork name is changed from "vm" to
+ * "vfm" bacause visibility map has not only information about all-visible
+ * but also information about all-frozen.
  */
 const char *const forkNames[] = {
 	"main",						/* MAIN_FORKNUM */
 	"fsm",						/* FSM_FORKNUM */
-	"vm",						/* VISIBILITYMAP_FORKNUM */
+	"vfm",						/* VISIBILITYMAP_FORKNUM */
 	"init"						/* INIT_FORKNUM */
 };
 
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index caa0f14..93afb10 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
 typedef struct xl_heap_visible
 {
 	TransactionId cutoff_xid;
+	uint8		  flags;
 } xl_heap_visible;
 
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
 
 typedef struct xl_heap_new_cid
 {
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
 extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
 						  xl_heap_freeze_tuple *xlrec_tp);
 extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
-				 Buffer vm_buffer, TransactionId cutoff_xid);
+			     Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
 
 #endif   /* HEAPAM_XLOG_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 0c0e0ef..7270609 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -19,15 +19,20 @@
 #include "storage/buf.h"
 #include "utils/relcache.h"
 
-extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
-					Buffer vmbuf);
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE	0x01
+#define VISIBILITYMAP_ALL_FROZEN	0x02
+
+extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
 				  Buffer *vmbuf);
 extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
 extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
-				  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
-extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern BlockNumber visibilitymap_count(Relation rel);
+							  XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+							  uint8 flags);
+extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf,
+							   uint8 flags);
+extern BlockNumber visibilitymap_count(Relation rel, uint8 flags);
 extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
 
 #endif   /* VISIBILITYMAP_H */
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index e526cd9..ea0f7c1 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -47,6 +47,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
 	float4		reltuples;		/* # of tuples (not always up-to-date) */
 	int32		relallvisible;	/* # of all-visible blocks (not always
 								 * up-to-date) */
+	int32		relallfrozen;	/* # of all-frozen blocks (not always
+								   up-to-date) */
 	Oid			reltoastrelid;	/* OID of toast table; 0 if none */
 	bool		relhasindex;	/* T if has (or has had) any indexes */
 	bool		relisshared;	/* T if shared across databases */
@@ -95,7 +97,7 @@ typedef FormData_pg_class *Form_pg_class;
  * ----------------
  */
 
-#define Natts_pg_class					30
+#define Natts_pg_class					31
 #define Anum_pg_class_relname			1
 #define Anum_pg_class_relnamespace		2
 #define Anum_pg_class_reltype			3
@@ -107,25 +109,26 @@ typedef FormData_pg_class *Form_pg_class;
 #define Anum_pg_class_relpages			9
 #define Anum_pg_class_reltuples			10
 #define Anum_pg_class_relallvisible		11
-#define Anum_pg_class_reltoastrelid		12
-#define Anum_pg_class_relhasindex		13
-#define Anum_pg_class_relisshared		14
-#define Anum_pg_class_relpersistence	15
-#define Anum_pg_class_relkind			16
-#define Anum_pg_class_relnatts			17
-#define Anum_pg_class_relchecks			18
-#define Anum_pg_class_relhasoids		19
-#define Anum_pg_class_relhaspkey		20
-#define Anum_pg_class_relhasrules		21
-#define Anum_pg_class_relhastriggers	22
-#define Anum_pg_class_relhassubclass	23
-#define Anum_pg_class_relrowsecurity	24
-#define Anum_pg_class_relispopulated	25
-#define Anum_pg_class_relreplident		26
-#define Anum_pg_class_relfrozenxid		27
-#define Anum_pg_class_relminmxid		28
-#define Anum_pg_class_relacl			29
-#define Anum_pg_class_reloptions		30
+#define Anum_pg_class_relallfrozen		12
+#define Anum_pg_class_reltoastrelid		13
+#define Anum_pg_class_relhasindex		14
+#define Anum_pg_class_relisshared		15
+#define Anum_pg_class_relpersistence	16
+#define Anum_pg_class_relkind			17
+#define Anum_pg_class_relnatts			18
+#define Anum_pg_class_relchecks			19
+#define Anum_pg_class_relhasoids		20
+#define Anum_pg_class_relhaspkey		21
+#define Anum_pg_class_relhasrules		22
+#define Anum_pg_class_relhastriggers	23
+#define Anum_pg_class_relhassubclass	24
+#define Anum_pg_class_relrowsecurity	25
+#define Anum_pg_class_relispopulated	26
+#define Anum_pg_class_relreplident		27
+#define Anum_pg_class_relfrozenxid		28
+#define Anum_pg_class_relminmxid		29
+#define Anum_pg_class_relacl			30
+#define Anum_pg_class_reloptions		31
 
 /* ----------------
  *		initial contents of pg_class
@@ -140,13 +143,13 @@ typedef FormData_pg_class *Form_pg_class;
  * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
  * similarly, "1" in relminmxid stands for FirstMultiXactId
  */
-DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 (  pg_type		PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 (  pg_attribute	PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 (  pg_proc		PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
-DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 (  pg_class		PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f t n 3 1 _null_ _null_ ));
 DESCR("");
 
 
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 09bf143..dbe16f3 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3213,6 +3213,12 @@ DESCR("sleep until the specified time");
 DATA(insert OID = 2971 (  text				PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "16" _null_ _null_ _null_ _null_ _null_ booltext _null_ _null_ _null_ ));
 DESCR("convert boolean to text");
 
+DATA(insert OID = 3298 (  pg_is_all_visible		PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_visible _null_ _null_ _null_ ));
+DESCR("true if the page is all visible");
+DATA(insert OID = 3299 (  pg_is_all_frozen		PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_frozen _null_ _null_ _null_ ));
+DESCR("true if the page is all frozen");
+
+
 /* Aggregates (moved here from pg_aggregate for 7.3) */
 
 DATA(insert OID = 2100 (  avg				PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ _null_	aggregate_dummy _null_ _null_ _null_ ));
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e3a31af..d2bae2d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -172,6 +172,7 @@ extern void vac_update_relstats(Relation relation,
 					BlockNumber num_pages,
 					double num_tuples,
 					BlockNumber num_all_visible_pages,
+					BlockNumber num_all_frozen_pages,
 					bool hasindex,
 					TransactionId frozenxid,
 					MultiXactId minmulti,
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index a2f78ee..7bf2718 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
 										 * tuple? */
 #define PD_ALL_VISIBLE		0x0004		/* all tuples on page are visible to
 										 * everyone */
+#define PD_ALL_FROZEN		0x0008		/* all tuples on page are completely
+										   frozen */
 
-#define PD_VALID_FLAG_BITS	0x0007		/* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS	0x000F		/* OR of all valid pd_flags bits */
 
 /*
  * Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -369,6 +371,13 @@ typedef PageHeaderData *PageHeader;
 #define PageClearAllVisible(page) \
 	(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
 
+#define PageIsAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
+#define PageSetAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
+#define PageClearAllFrozen(page) \
+	(((PageHeader) (page))->pd_flags &= ~PD_ALL_FROZEN)
+
 #define PageIsPrunable(page, oldestxmin) \
 ( \
 	AssertMacro(TransactionIdIsNormal(oldestxmin)), \
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df15de..893d773 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -108,5 +108,8 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c
 # event triggers cannot run concurrently with any test that runs DDL
 test: event_trigger
 
+# visibility map and vacuum test cannot run concurrently with any test that runs SQL
+test: visibilitymap
+
 # run stats by itself because its delay may be insufficient under heavy load
 test: stats
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 15d74d4..da84aa6 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -157,3 +157,4 @@ test: with
 test: xml
 test: event_trigger
 test: stats
+test: visibilitymap
\ No newline at end of file
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to