On Thu, Jul 16, 2015 at 8:51 PM, Sawada Masahiko <[email protected]> wrote:
> On Wed, Jul 15, 2015 at 3:07 AM, Sawada Masahiko <[email protected]>
> wrote:
>> On Wed, Jul 15, 2015 at 12:55 AM, Simon Riggs <[email protected]> wrote:
>>> On 10 July 2015 at 15:11, Sawada Masahiko <[email protected]> wrote:
>>>>
>>>>
>>>> Oops, I had forgotten to add new file heapfuncs.c.
>>>> Latest patch is attached.
>>>
>>>
>>> I think we've established the approach is desirable and defined the way
>>> forwards for this, so this is looking good.
>>
>> If we want to move stuff like pg_stattuple, pg_freespacemap into core,
>> we could move them into heapfuncs.c.
>>
>>> Some of my requests haven't been actioned yet, so I personally would not
>>> commit this yet. I am happy to continue as reviewer/committer unless others
>>> wish to take over.
>>> The main missing item is pg_upgrade support, which won't happen by end of
>>> CF1, so I am marking this as Returned With Feedback. Hopefully we can review
>>> this again before CF2.
>>
>> I appreciate your reviewing.
>> Yeah, the pg_upgrade support and regression test for VFM patch is
>> almost done now, I will submit the patch in this week after testing it
>> .
>
> Attached patch is latest v9 patch.
>
> I added:
> - regression test for visibility map (visibilitymap.sql and
> visibilitymap.out files)
> - pg_upgrade support (rewriting vm file to vfm file)
> - regression test for pg_upgrade
>
Previous patch has some fail to apply, so attached the rebased patch.
Catalog version is not decided yet, so we will need to rewrite
VISIBILITY_MAP_FROZEN_BIT_CAT_VER in pg_upgrade.h
Please review it.
Regards,
--
Masahiko Sawada
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 22c5f7a..b1b6a06 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
* If the page has only visible tuples, then we can find out the free
* space from the FSM and move on.
*/
- if (visibilitymap_test(rel, blkno, &vmbuffer))
+ if (visibilitymap_test(rel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
{
freespace = GetRecordedFreeSpace(rel, blkno);
stat->tuple_len += BLCKSZ - freespace;
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index b83d496..806ce27 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,6 +12,7 @@ subdir = src/backend/access/heap
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o \
+ heapfuncs.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 050efdc..2dbabc8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -2176,8 +2176,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
/*
- * Find buffer to insert this tuple into. If the page is all visible,
- * this will also pin the requisite visibility map page.
+ * Find buffer to insert this tuple into. If the page is all visible
+ * or all frozen, this will also pin the requisite visibility map and
+ * frozen map page.
*/
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, options, bistate,
@@ -2192,7 +2193,11 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
if (PageIsAllVisible(BufferGetPage(buffer)))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(buffer));
+ PageClearAllFrozen(BufferGetPage(buffer));
+
visibilitymap_clear(relation,
ItemPointerGetBlockNumber(&(heaptup->t_self)),
vmbuffer);
@@ -2493,7 +2498,11 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
if (PageIsAllVisible(page))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+
visibilitymap_clear(relation,
BufferGetBlockNumber(buffer),
vmbuffer);
@@ -2776,9 +2785,9 @@ heap_delete(Relation relation, ItemPointer tid,
/*
* If we didn't pin the visibility map page and the page has become all
- * visible while we were busy locking the buffer, we'll have to unlock and
- * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
- * unfortunate, but hopefully shouldn't happen often.
+ * visible or all frozen while we were busy locking the buffer, we'll
+ * have to unlock and re-lock, to avoid holding the buffer lock across an
+ * I/O. That's a bit unfortunate, but hopefully shouldn't happen often.
*/
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
@@ -2970,10 +2979,15 @@ l1:
*/
PageSetPrunable(page, xid);
+ /* clear PD_ALL_VISIBLE and PD_ALL_FORZEN flags */
if (PageIsAllVisible(page))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
vmbuffer);
}
@@ -3252,7 +3266,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
* in the middle of changing this, so we'll need to recheck after we have
* the lock.
*/
- if (PageIsAllVisible(page))
+ if (PageIsAllVisible(page) || PageIsAllFrozen(page))
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3846,14 +3860,22 @@ l2:
if (PageIsAllVisible(BufferGetPage(buffer)))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(buffer));
+ PageClearAllFrozen(BufferGetPage(buffer));
+
visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
vmbuffer);
}
if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
{
all_visible_cleared_new = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(newbuf));
+ PageClearAllFrozen(BufferGetPage(newbuf));
+
visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
vmbuffer_new);
}
@@ -6938,7 +6960,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
*/
XLogRecPtr
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
- TransactionId cutoff_xid)
+ TransactionId cutoff_xid, uint8 vmflags)
{
xl_heap_visible xlrec;
XLogRecPtr recptr;
@@ -6948,6 +6970,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
Assert(BufferIsValid(vm_buffer));
xlrec.cutoff_xid = cutoff_xid;
+ xlrec.flags = vmflags;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
@@ -7537,8 +7560,14 @@ heap_xlog_visible(XLogReaderState *record)
* the subsequent update won't be replayed to clear the flag.
*/
page = BufferGetPage(buffer);
- PageSetAllVisible(page);
+
+ if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
+ PageSetAllVisible(page);
+ if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
+ PageSetAllFrozen(page);
+
MarkBufferDirty(buffer);
+
}
else if (action == BLK_RESTORED)
{
@@ -7589,7 +7618,7 @@ heap_xlog_visible(XLogReaderState *record)
*/
if (lsn > PageGetLSN(vmpage))
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
- xlrec->cutoff_xid);
+ xlrec->cutoff_xid, xlrec->flags);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
@@ -7739,7 +7768,10 @@ heap_xlog_delete(XLogReaderState *record)
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = target_tid;
@@ -7843,7 +7875,10 @@ heap_xlog_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
MarkBufferDirty(buffer);
}
@@ -7982,7 +8017,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
MarkBufferDirty(buffer);
}
@@ -8110,7 +8148,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
PageSetLSN(page, lsn);
MarkBufferDirty(obuffer);
@@ -8245,7 +8286,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
elog(PANIC, "heap_update_redo: failed to add tuple");
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7c38772..a284b85 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -21,33 +21,45 @@
*
* NOTES
*
- * The visibility map is a bitmap with one bit per heap page. A set bit means
- * that all tuples on the page are known visible to all transactions, and
- * therefore the page doesn't need to be vacuumed. The map is conservative in
- * the sense that we make sure that whenever a bit is set, we know the
- * condition is true, but if a bit is not set, it might or might not be true.
+ * The visibility map is a bitmap with two bits (all-visible and all-frozen)
+ * per heap page. A set all-visible bit means that all tuples on the page are
+ * known visible to all transactions, and therefore the page doesn't need to
+ * be vacuumed. A set all-frozen bit means that all tuples on the page are
+ * completely frozen, and therefore the page doesn't need to be vacuumed even
+ * if whole table scanning vacuum is required (e.g. anti-wraparound vacuum).
+ * A all-frozen bit must be set only when the page is already all-visible.
+ * That is, all-frozen bit is always set with all-visible bit.
+ *
+ * The map is conservative in the sense that we make sure that whenever a bit
+ * is set, we know the condition is true, but if a bit is not set, it might or
+ * might not be true.
*
* Clearing a visibility map bit is not separately WAL-logged. The callers
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
- * replay of the updating operation as well.
+ * replay of the updating operation as well. And all-frozen bit must be
+ * cleared with all-visible at the same time.
*
* When we *set* a visibility map during VACUUM, we must write WAL. This may
* seem counterintuitive, since the bit is basically a hint: if it is clear,
- * it may still be the case that every tuple on the page is visible to all
- * transactions; we just don't know that for certain. The difficulty is that
- * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
- * on the page itself, and the visibility map bit. If a crash occurs after the
- * visibility map page makes it to disk and before the updated heap page makes
- * it to disk, redo must set the bit on the heap page. Otherwise, the next
- * insert, update, or delete on the heap page will fail to realize that the
- * visibility map bit must be cleared, possibly causing index-only scans to
- * return wrong answers.
+ * it may still be the case that every tuple on the page is visible or frozen
+ * to all transactions; we just don't know that for certain. The difficulty is
+ * that there are two bits which are typically set together: the PD_ALL_VISIBLE
+ * or PD_ALL_FROZEN bit on the page itself, and the visibility map bit. If a
+ * crash occurs after the visibility map page makes it to disk and before the
+ * updated heap page makes it to disk, redo must set the bit on the heap page.
+ * Otherwise, the next insert, update, or delete on the heap page will fail to
+ * realize that the visibility map bit must be cleared, possibly causing index-only
+ * scans to return wrong answers.
*
* VACUUM will normally skip pages for which the visibility map bit is set;
* such pages can't contain any dead tuples and therefore don't need vacuuming.
- * The visibility map is not used for anti-wraparound vacuums, because
+ * The visibility map is not used for anti-wraparound vacuums before 9.5, because
* an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
* present in the table, even on pages that don't have any dead tuples.
+ * 9.6 or later, the visibility map has a additional bit which indicates all tuple
+ * on single page has been completely forzen, so the visibility map is also used for
+ * anti-wraparound vacuums.
+ *
*
* LOCKING
*
@@ -58,14 +70,14 @@
* section that logs the page modification. However, we don't want to hold
* the buffer lock over any I/O that may be required to read in the visibility
* map page. To avoid this, we examine the heap page before locking it;
- * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
- * bit. Then, we lock the buffer. But this creates a race condition: there
- * is a possibility that in the time it takes to lock the buffer, the
- * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
- * buffer, pin the visibility map page, and relock the buffer. This shouldn't
- * happen often, because only VACUUM currently sets visibility map bits,
- * and the race will only occur if VACUUM processes a given page at almost
- * exactly the same time that someone tries to further modify it.
+ * if the page-level PD_ALL_VISIBLE or PD_ALL_FROZEN bit is set, we pin the
+ * visibility map bit. Then, we lock the buffer. But this creates a race
+ * condition: there is a possibility that in the time it takes to lock the
+ * buffer, the PD_ALL_VISIBLE or PD_ALL_FROZEN bit gets set. If that happens,
+ * we have to unlock the buffer, pin the visibility map page, and relock the
+ * buffer. This shouldn't happen often, because only VACUUM currently sets
+ * visibility map bits, and the race will only occur if VACUUM processes a given
+ * page at almost exactly the same time that someone tries to further modify it.
*
* To set a bit, you need to hold a lock on the heap page. That prevents
* the race condition where VACUUM sees that all tuples on the page are
@@ -101,11 +113,14 @@
*/
#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-/* Number of bits allocated for each heap block. */
-#define BITS_PER_HEAPBLOCK 1
+/*
+ * Number of bits allocated for each heap block.
+ * One for all-visible, other for all-frozen.
+*/
+#define BITS_PER_HEAPBLOCK 2
/* Number of heap blocks we can represent in one byte. */
-#define HEAPBLOCKS_PER_BYTE 8
+#define HEAPBLOCKS_PER_BYTE 4
/* Number of heap blocks we can represent in one visibility map page. */
#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
@@ -115,24 +130,42 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
-/* table for fast counting of set bits */
-static const uint8 number_of_ones[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+/* tables for fast counting of set bits for visible and freeze */
+static const uint8 number_of_ones_for_visible[256] = {
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
+};
+static const uint8 number_of_ones_for_frozen[256] = {
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* prototypes for internal routines */
@@ -141,7 +174,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
/*
- * visibilitymap_clear - clear a bit in visibility map
+ * visibilitymap_clear - clear all bits in visibility map
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -153,7 +186,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
- uint8 mask = 1 << mapBit;
+ uint8 mask = (VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN) <<
+ (BITS_PER_HEAPBLOCK * mapBit);
char *map;
#ifdef TRACE_VISIBILITYMAP
@@ -225,7 +259,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
}
/*
- * visibilitymap_set - set a bit on a previously pinned page
+ * visibilitymap_set - set bit(s) on a previously pinned page
*
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
@@ -234,10 +268,11 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
* marked all-visible; it is needed for Hot Standby, and can be
* InvalidTransactionId if the page contains no tuples.
*
- * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
- * this function. Except in recovery, caller should also pass the heap
- * buffer. When checksums are enabled and we're not in recovery, we must add
- * the heap buffer to the WAL chain to protect it from being torn.
+ * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
+ * bit before calling this function. Except in recovery, caller should also
+ * pass the heap buffer and flags which indicates what flag we want to set.
+ * When checksums are enabled and we're not in recovery, we must add the heap
+ * buffer to the WAL chain to protect it from being torn.
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -245,7 +280,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
*/
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -254,7 +290,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
char *map;
#ifdef TRACE_VISIBILITYMAP
- elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
+ elog(DEBUG1, "vm_set %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
#endif
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
@@ -272,11 +308,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
map = PageGetContents(page);
LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
- if (!(map[mapByte] & (1 << mapBit)))
+ if (flags != (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))))
{
START_CRIT_SECTION();
- map[mapByte] |= (1 << mapBit);
+ map[mapByte] |= (flags << (BITS_PER_HEAPBLOCK * mapBit));
MarkBufferDirty(vmBuf);
if (RelationNeedsWAL(rel))
@@ -285,7 +321,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Assert(!InRecovery);
recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
- cutoff_xid);
+ cutoff_xid, flags);
/*
* If data checksums are enabled (or wal_log_hints=on), we
@@ -295,11 +331,15 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Page heapPage = BufferGetPage(heapBuf);
- /* caller is expected to set PD_ALL_VISIBLE first */
- Assert(PageIsAllVisible(heapPage));
+ /*
+ * caller is expected to set PD_ALL_VISIBLE or
+ * PD_ALL_FROZEN first.
+ */
+ Assert(PageIsAllVisible(heapPage) || PageIsAllFrozen(heapPage));
PageSetLSN(heapPage, recptr);
}
}
+
PageSetLSN(page, recptr);
}
@@ -310,15 +350,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
}
/*
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_test - test if bit(s) is set
*
- * Are all tuples on heapBlk visible to all, according to the visibility map?
+ * Are all tuples on heapBlk visible or frozen to all, according to the visibility map?
*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
* earlier call to visibilitymap_pin or visibilitymap_test on the same
* relation. On return, *buf is a valid buffer with the map page containing
* the bit for heapBlk, or InvalidBuffer. The caller is responsible for
- * releasing *buf after it's done testing and setting bits.
+ * releasing *buf after it's done testing and setting bits, and must set flags
+ * which indicates what flag we want to test.
*
* NOTE: This function is typically called without a lock on the heap page,
* so somebody else could change the bit just after we look at it. In fact,
@@ -328,7 +369,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
* all concurrency issues!
*/
bool
-visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
+visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf, uint8 flags)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -337,7 +378,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
char *map;
#ifdef TRACE_VISIBILITYMAP
- elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
+ elog(DEBUG1, "vm_test %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
#endif
/* Reuse the old pinned buffer if possible */
@@ -360,11 +401,12 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
map = PageGetContents(BufferGetPage(*buf));
/*
- * A single-bit read is atomic. There could be memory-ordering effects
+ * A single or double bit read is atomic. There could be memory-ordering effects
* here, but for performance reasons we make it the caller's job to worry
* about that.
*/
- result = (map[mapByte] & (1 << mapBit)) ? true : false;
+ result = (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))) ?
+ true : false;
return result;
}
@@ -374,10 +416,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
*
* Note: we ignore the possibility of race conditions when the table is being
* extended concurrently with the call. New pages added to the table aren't
- * going to be marked all-visible, so they won't affect the result.
+ * going to be marked all-visible or all-frozen, so they won't affect the result.
+ * The caller must set the flags which indicates what flag we want to count.
*/
BlockNumber
-visibilitymap_count(Relation rel)
+visibilitymap_count(Relation rel, uint8 flags)
{
BlockNumber result = 0;
BlockNumber mapBlock;
@@ -406,7 +449,10 @@ visibilitymap_count(Relation rel)
for (i = 0; i < MAPSIZE; i++)
{
- result += number_of_ones[map[i]];
+ if (flags & VISIBILITYMAP_ALL_VISIBLE)
+ result += number_of_ones_for_visible[map[i]];
+ if (flags & VISIBILITYMAP_ALL_FROZEN)
+ result += number_of_ones_for_frozen[map[i]];
}
ReleaseBuffer(mapBuffer);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 69f35c9..87bf0c8 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1919,11 +1919,18 @@ index_update_stats(Relation rel,
{
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
BlockNumber relallvisible;
+ BlockNumber relallfrozen;
if (rd_rel->relkind != RELKIND_INDEX)
- relallvisible = visibilitymap_count(rel);
+ {
+ relallvisible = visibilitymap_count(rel, VISIBILITYMAP_ALL_VISIBLE);
+ relallfrozen = visibilitymap_count(rel, VISIBILITYMAP_ALL_FROZEN);
+ }
else /* don't bother for indexes */
+ {
relallvisible = 0;
+ relallfrozen = 0;
+ }
if (rd_rel->relpages != (int32) relpages)
{
@@ -1940,6 +1947,11 @@ index_update_stats(Relation rel,
rd_rel->relallvisible = (int32) relallvisible;
dirty = true;
}
+ if (rd_rel->relallfrozen != (int32) relallfrozen)
+ {
+ rd_rel->relallfrozen = (int32) relallfrozen;
+ dirty = true;
+ }
}
/*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 861048f..392c2a4 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -572,7 +572,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
vac_update_relstats(onerel,
relpages,
totalrows,
- visibilitymap_count(onerel),
+ visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE),
+ visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN),
hasindex,
InvalidTransactionId,
InvalidMultiXactId,
@@ -595,6 +596,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
RelationGetNumberOfBlocks(Irel[ind]),
totalindexrows,
0,
+ 0,
false,
InvalidTransactionId,
InvalidMultiXactId,
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 7ab4874..d3725dd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -22,6 +22,7 @@
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index baf66f1..d68c7c4 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -744,6 +744,7 @@ void
vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
+ BlockNumber num_all_frozen_pages,
bool hasindex, TransactionId frozenxid,
MultiXactId minmulti,
bool in_outer_xact)
@@ -781,6 +782,11 @@ vac_update_relstats(Relation relation,
pgcform->relallvisible = (int32) num_all_visible_pages;
dirty = true;
}
+ if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
+ {
+ pgcform->relallfrozen = (int32) num_all_frozen_pages;
+ dirty = true;
+ }
/* Apply DDL updates, but not inside an outer transaction (see above) */
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index a01cfb4..120de63 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -106,6 +106,8 @@ typedef struct LVRelStats
BlockNumber rel_pages; /* total number of pages */
BlockNumber scanned_pages; /* number of pages we examined */
BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
+ BlockNumber vmskipped_frozen_pages; /* # of pages we skipped by all-frozen bit
+ of visibility map */
double scanned_tuples; /* counts only tuples on scanned pages */
double old_rel_tuples; /* previous value of pg_class.reltuples */
double new_rel_tuples; /* new estimated total # of tuples */
@@ -156,7 +158,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
static int vac_cmp_itemptr(const void *left, const void *right);
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
- TransactionId *visibility_cutoff_xid);
+ TransactionId *visibility_cutoff_xid, bool *all_frozen);
/*
@@ -188,7 +190,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
MultiXactId mxactFullScanLimit;
BlockNumber new_rel_pages;
double new_rel_tuples;
- BlockNumber new_rel_allvisible;
+ BlockNumber new_rel_allvisible,
+ new_rel_allfrozen;
double new_live_tuples;
TransactionId new_frozen_xid;
MultiXactId new_min_multi;
@@ -222,6 +225,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
* than or equal to the requested Xid full-table scan limit; or if the
* table's minimum MultiXactId is older than or equal to the requested
* mxid full-table scan limit.
+ * Even if scan_all is set so far, we could skip to scan some pages
+ * according by frozen map.
*/
scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
xidFullScanLimit);
@@ -253,7 +258,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
* NB: We need to check this before truncating the relation, because that
* will change ->rel_pages.
*/
- if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+ if ((vacrelstats->scanned_pages + vacrelstats->vmskipped_frozen_pages)
+ < vacrelstats->rel_pages)
{
Assert(!scan_all);
scanned_all = false;
@@ -301,10 +307,14 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
new_rel_tuples = vacrelstats->old_rel_tuples;
}
- new_rel_allvisible = visibilitymap_count(onerel);
+ new_rel_allvisible = visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
+ new_rel_allfrozen = visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN);
+ if (new_rel_allfrozen > new_rel_pages)
+ new_rel_allfrozen = new_rel_pages;
+
new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
@@ -312,6 +322,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
+ new_rel_allfrozen,
vacrelstats->hasindex,
new_frozen_xid,
new_min_multi,
@@ -360,10 +371,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel),
vacrelstats->num_index_scans);
- appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"),
+ appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped according to vm\n"),
vacrelstats->pages_removed,
vacrelstats->rel_pages,
- vacrelstats->pinskipped_pages);
+ vacrelstats->pinskipped_pages,
+ vacrelstats->vmskipped_frozen_pages);
appendStringInfo(&buf,
_("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"),
vacrelstats->tuples_deleted,
@@ -486,9 +498,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* consecutive pages. Since we're reading sequentially, the OS should be
* doing readahead for us, so there's no gain in skipping a page now and
* then; that's likely to disable readahead and so be counterproductive.
- * Also, skipping even a single page means that we can't update
- * relfrozenxid, so we only want to do it if we can skip a goodly number
- * of pages.
+ * Also, skipping even a single page accorinding to all-visible bit of
+ * visibility map means that we can't update relfrozenxid, so we only want
+ * to do it if we can skip a goodly number. On the other hand, we count
+ * both how many pages we skipped according to all-frozen bit of visibility
+ * map and how many pages we freeze page, so we can update relfrozenxid if
+ * the sum of them is as many as pages of table.
*
* Before entering the main loop, establish the invariant that
* next_not_all_visible_block is the next block number >= blkno that's not
@@ -515,7 +530,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{
- if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+ if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
break;
vacuum_delay_point();
}
@@ -533,7 +549,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
bool tupgone,
hastup;
int prev_dead_count;
- int nfrozen;
+ int nfrozen; /* # of tuples is frozen */
+ int nalready_frozen; /* # of tuples is already frozen */
+ int ntotal_frozen; /* # of tuples is in single page */
+ int ntup_per_page;
Size freespace;
bool all_visible_according_to_vm;
bool all_visible;
@@ -548,7 +567,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block++)
{
if (!visibilitymap_test(onerel, next_not_all_visible_block,
- &vmbuffer))
+ &vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
break;
vacuum_delay_point();
}
@@ -566,9 +586,25 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
}
else
{
- /* Current block is all-visible */
- if (skipping_all_visible_blocks && !scan_all)
- continue;
+ /*
+ * This block is at least all-visible according to visibility map.
+ * We check whehter this block is all-frozen to skip to vacuum this
+ * page even if scanning whole page is required.
+ */
+ if (scan_all)
+ {
+ if (visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+ {
+ vacrelstats->vmskipped_frozen_pages++;
+ continue;
+ }
+ }
+ else
+ {
+ if (skipping_all_visible_blocks)
+ continue;
+ }
+
all_visible_according_to_vm = true;
}
@@ -740,7 +776,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
PageSetAllVisible(page);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, InvalidTransactionId);
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE);
END_CRIT_SECTION();
}
@@ -764,6 +801,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
all_visible = true;
has_dead_tuples = false;
nfrozen = 0;
+ nalready_frozen = 0;
+ ntup_per_page = 0;
hastup = false;
prev_dead_count = vacrelstats->num_dead_tuples;
maxoff = PageGetMaxOffsetNumber(page);
@@ -918,8 +957,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
else
{
num_tuples += 1;
+ ntup_per_page += 1;;
hastup = true;
+ /* Check whether this tuple is alrady frozen or not */
+ if (HeapTupleHeaderXminFrozen(tuple.t_data))
+ nalready_frozen += 1;
+
/*
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
@@ -931,9 +975,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
} /* scan along page */
/*
- * If we froze any tuples, mark the buffer dirty, and write a WAL
- * record recording the changes. We must log the changes to be
- * crash-safe against future truncation of CLOG.
+ * If we froze any tuples or any tuples are already frozen,
+ * mark the buffer dirty, and write a WAL record recording the changes.
+ * We must log the changes to be crash-safe against future truncation
+ * of CLOG.
*/
if (nfrozen > 0)
{
@@ -966,6 +1011,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
END_CRIT_SECTION();
}
+ /* Compute the number of frozen tuples in a page */
+ ntotal_frozen = nfrozen + nalready_frozen;
+
/*
* If there are no indexes then we can vacuum the page right now
* instead of doing a second scan.
@@ -988,26 +1036,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
- /* mark page all-visible, if appropriate */
- if (all_visible && !all_visible_according_to_vm)
+ /* This page is all visible */
+ if (all_visible)
{
- /*
- * It should never be the case that the visibility map page is set
- * while the page-level bit is clear, but the reverse is allowed
- * (if checksums are not enabled). Regardless, set the both bits
- * so that we get back in sync.
- *
- * NB: If the heap page is all-visible but the VM bit is not set,
- * we don't need to dirty the heap page. However, if checksums
- * are enabled, we do need to make sure that the heap page is
- * dirtied before passing it to visibilitymap_set(), because it
- * may be logged. Given that this situation should only happen in
- * rare cases after a crash, it is not worth optimizing.
- */
- PageSetAllVisible(page);
- MarkBufferDirty(buf);
- visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid);
+ uint8 flags = 0;
+
+ /* mark page all-visible, if appropriate */
+ if (!all_visible_according_to_vm)
+ {
+ /*
+ * It should never be the case that the visibility map page is set
+ * while the page-level bit is clear, but the reverse is allowed
+ * (if checksums are not enabled). Regardless, set the both bits
+ * so that we get back in sync.
+ *
+ * NB: If the heap page is all-visible but the VM bit is not set,
+ * we don't need to dirty the heap page. However, if checksums
+ * are enabled, we do need to make sure that the heap page is
+ * dirtied before passing it to visibilitymap_set(), because it
+ * may be logged. Given that this situation should only happen in
+ * rare cases after a crash, it is not worth optimizing.
+ */
+ PageSetAllVisible(page);
+ flags |= VISIBILITYMAP_ALL_VISIBLE;
+ }
+
+ /* mark page all-frozen, if all tuples are frozen in total */
+ if ((ntotal_frozen == ntup_per_page) &&
+ !visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+ {
+ Assert(PageIsAllVisible(page));
+
+ PageSetAllFrozen(page);
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+ }
+
+ if (flags)
+ {
+ MarkBufferDirty(buf);
+ visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+ vmbuffer, visibility_cutoff_xid, flags);
+ }
}
/*
@@ -1018,7 +1087,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* that something bad has happened.
*/
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
- && visibilitymap_test(onerel, blkno, &vmbuffer))
+ && visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
{
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
relname, blkno);
@@ -1047,6 +1116,17 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
visibilitymap_clear(onerel, blkno, vmbuffer);
}
+ /*
+ * As a result of scanning a page, we set VM all-frozen bit and page header
+ * if all tuples of single page are frozen.
+ */
+ if (ntotal_frozen == ntup_per_page)
+ {
+ PageSetAllFrozen(page);
+ visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, vmbuffer,
+ InvalidTransactionId, VISIBILITYMAP_ALL_FROZEN);
+ }
+
UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
@@ -1078,7 +1158,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
num_tuples);
/*
- * Release any remaining pin on visibility map page.
+ * Release any remaining pin on visibility map and frozen map page.
*/
if (BufferIsValid(vmbuffer))
{
@@ -1115,6 +1195,14 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
tups_vacuumed, vacuumed_pages)));
/*
+ * This information would be effective for how much effect all-frozen bit
+ * of VM had for freezing tuples.
+ */
+ ereport(elevel,
+ (errmsg("Skipped %d frozen pages acoording to visibility map",
+ vacrelstats->vmskipped_frozen_pages)));
+
+ /*
* This is pretty messy, but we split it up so that we can skip emitting
* individual parts of the message when not applicable.
*/
@@ -1226,6 +1314,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
OffsetNumber unused[MaxOffsetNumber];
int uncnt = 0;
TransactionId visibility_cutoff_xid;
+ bool all_frozen;
START_CRIT_SECTION();
@@ -1277,19 +1366,31 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
* dirty, exclusively locked, and, if needed, a full page image has been
* emitted in the log_heap_clean() above.
*/
- if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
+ if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, &all_frozen))
PageSetAllVisible(page);
/*
* All the changes to the heap page have been done. If the all-visible
- * flag is now set, also set the VM bit.
+ * flag is now set, also set the VM all-visible bit.
+ * Also, if this page is all-frozen, set VM all-frozen bit and flag.
*/
- if (PageIsAllVisible(page) &&
- !visibilitymap_test(onerel, blkno, vmbuffer))
+ if (PageIsAllVisible(page))
{
- Assert(BufferIsValid(*vmbuffer));
- visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
- visibility_cutoff_xid);
+ uint8 flags = 0;
+
+ if (!visibilitymap_test(onerel, blkno, vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
+ flags |= VISIBILITYMAP_ALL_VISIBLE;
+
+ /* mark page all-frozen, and set VM all-frozen bit */
+ if (all_frozen)
+ {
+ PageSetAllFrozen(page);
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+ }
+
+ if (flags)
+ visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
+ visibility_cutoff_xid, flags);
}
return tupindex;
@@ -1408,6 +1509,7 @@ lazy_cleanup_index(Relation indrel,
stats->num_pages,
stats->num_index_tuples,
0,
+ 0,
false,
InvalidTransactionId,
InvalidMultiXactId,
@@ -1782,7 +1884,8 @@ vac_cmp_itemptr(const void *left, const void *right)
* xmin amongst the visible tuples.
*/
static bool
-heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
+heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid,
+ bool *all_frozen)
{
Page page = BufferGetPage(buf);
BlockNumber blockno = BufferGetBlockNumber(buf);
@@ -1791,6 +1894,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
bool all_visible = true;
*visibility_cutoff_xid = InvalidTransactionId;
+ *all_frozen = true;
/*
* This is a stripped down version of the line pointer scan in
@@ -1814,7 +1918,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
/*
* Dead line pointers can have index pointers pointing to them. So
- * they can't be treated as visible
+ * they can't be treated as visible and frozen.
*/
if (ItemIdIsDead(itemid))
{
@@ -1855,6 +1959,10 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
/* Track newest xmin on page. */
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
*visibility_cutoff_xid = xmin;
+
+ /* Check whether this tuple is alrady frozen or not */
+ if (!HeapTupleHeaderXminFrozen(tuple.t_data))
+ *all_frozen = false;
}
break;
@@ -1863,6 +1971,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
case HEAPTUPLE_INSERT_IN_PROGRESS:
case HEAPTUPLE_DELETE_IN_PROGRESS:
all_visible = false;
+ *all_frozen = false;
break;
default:
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 9f54c46..08df289 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -116,7 +116,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
*/
if (!visibilitymap_test(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
- &node->ioss_VMBuffer))
+ &node->ioss_VMBuffer, VISIBILITYMAP_ALL_VISIBLE))
{
/*
* Rats, we have to visit the heap to check visibility.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 1ef76d0..ee49ddf 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -127,7 +127,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
if (attno != resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("table row type and query-specified row type do not match"),
+ errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too few columns.")));
}
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 79d9390..8fededc 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -10,6 +10,7 @@
#include "postgres_fe.h"
#include "pg_upgrade.h"
+#include "storage/bufpage.h"
#include <fcntl.h>
@@ -21,6 +22,27 @@ static int copy_file(const char *fromfile, const char *tofile, bool force);
static int win32_pghardlink(const char *src, const char *dst);
#endif
+static int rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force);
+
+/* table for fast rewriting vm file to vfm file */
+static const uint16 rewrite_vm_to_vfm_table[256] = {
+ 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85,
+ 256, 257, 260, 261, 272, 273, 276, 277, 320, 321, 324, 325, 336, 337, 340, 341,
+ 1024, 1025, 1028, 1029, 1040, 1041, 1044, 1045, 1088, 1089, 1092, 1093, 1104, 1105, 1108, 1109,
+ 1280, 1281, 1284, 1285, 1296, 1297, 1300, 1301, 1344, 1345, 1348, 1349, 1360, 1361, 1364, 1365,
+ 4096, 4097, 4100, 4101, 4112, 4113, 4116, 4117, 4160, 4161, 4164, 4165, 4176, 4177, 4180, 4181,
+ 4352, 4353, 4356, 4357, 4368, 4369, 4372, 4373, 4416, 4417, 4420, 4421, 4432, 4433, 4436, 4437,
+ 5120, 5121, 5124, 5125, 5136, 5137, 5140, 5141, 5184, 5185, 5188, 5189, 5200, 5201, 5204, 5205,
+ 5376, 5377, 5380, 5381, 5392, 5393, 5396, 5397, 5440, 5441, 5444, 5445, 5456, 5457, 5460, 5461,
+ 16384, 16385, 16388, 16389, 16400, 16401, 16404, 16405, 16448, 16449, 16452, 16453, 16464, 16465, 16468, 16469,
+ 16640, 16641, 16644, 16645, 16656, 16657, 16660, 16661, 16704, 16705, 16708, 16709, 16720, 16721, 16724, 16725,
+ 17408, 17409, 17412, 17413, 17424, 17425, 17428, 17429, 17472, 17473, 17476, 17477, 17488, 17489, 17492, 17493,
+ 17664, 17665, 17668, 17669, 17680, 17681, 17684, 17685, 17728, 17729, 17732, 17733, 17744, 17745, 17748, 17749,
+ 20480, 20481, 20484, 20485, 20496, 20497, 20500, 20501, 20544, 20545, 20548, 20549, 20560, 20561, 20564, 20565,
+ 20736, 20737, 20740, 20741, 20752, 20753, 20756, 20757, 20800, 20801, 20804, 20805, 20816, 20817, 20820, 20821,
+ 21504, 21505, 21508, 21509, 21520, 21521, 21524, 21525, 21568, 21569, 21572, 21573, 21584, 21585, 21588, 21589,
+ 21760, 21761, 21764, 21765, 21776, 21777, 21780, 21781, 21824, 21825, 21828, 21829, 21840, 21841, 21844, 21845
+};
/*
* copyAndUpdateFile()
@@ -30,11 +52,19 @@ static int win32_pghardlink(const char *src, const char *dst);
*/
const char *
copyAndUpdateFile(pageCnvCtx *pageConverter,
- const char *src, const char *dst, bool force)
+ const char *src, const char *dst, bool force, bool rewrite_vm)
{
+
if (pageConverter == NULL)
{
- if (pg_copy_file(src, dst, force) == -1)
+ int ret;
+
+ if (rewrite_vm)
+ ret = rewrite_vm_to_vfm(src, dst, force);
+ else
+ ret = pg_copy_file(src, dst, force);
+
+ if (ret)
return getErrorText(errno);
else
return NULL;
@@ -99,7 +129,6 @@ copyAndUpdateFile(pageCnvCtx *pageConverter,
}
}
-
/*
* linkAndUpdateFile()
*
@@ -201,6 +230,110 @@ copy_file(const char *srcfile, const char *dstfile, bool force)
#endif
+/*
+ * rewriteVisibiiltyMap()
+ *
+ * A additional bit which indicates that all tuples on page is completely
+ * frozen is added into visibility map at PG 9.6. So the format of visibiilty
+ * map has been changed.
+ * Copies a visibility map file while adding all-frozen bit(0) into each bit.
+ */
+static int
+rewrite_vm_to_vfm(const char *fromfile, const char *tofile, bool force)
+{
+#define REWRITE_BUF_SIZE (50 * BLCKSZ)
+#define BITS_PER_HEAPBLOCK 2
+
+ int src_fd, dst_fd;
+ uint16 vfm_bits;
+ ssize_t nbytes;
+ char *buffer;
+ int ret = 0;
+ int save_errno = 0;
+
+ if ((fromfile == NULL) || (tofile == NULL))
+ {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if ((src_fd = open(fromfile, O_RDONLY, 0)) < 0)
+ return -1;
+
+ if ((dst_fd = open(tofile, O_RDWR | O_CREAT | (force ? 0 : O_EXCL), S_IRUSR | S_IWUSR)) < 0)
+ {
+ save_errno = errno;
+ if (src_fd != 0)
+ close(src_fd);
+
+ errno = save_errno;
+ return -1;
+ }
+
+ buffer = (char *) pg_malloc(REWRITE_BUF_SIZE);
+
+ /* Copy page header data in advance */
+ if ((nbytes = read(src_fd, buffer, MAXALIGN(SizeOfPageHeaderData))) <= 0)
+ {
+ save_errno = errno;
+ return -1;
+ }
+
+ if (write(dst_fd, buffer, nbytes) != nbytes)
+ {
+ /* if write didn't set errno, assume problem is no disk space */
+ if (errno == 0)
+ errno = ENOSPC;
+ save_errno = errno;
+ return -1;
+ }
+
+ /* perform data rewriting i.e read src srouce, write to destination */
+ while (true)
+ {
+ ssize_t nbytes = read(src_fd, buffer, REWRITE_BUF_SIZE);
+ char *cur, *end;
+
+ if (nbytes < 0)
+ {
+ ret = -1;
+ break;
+ }
+
+ if (nbytes == 0)
+ break;
+
+ cur = buffer;
+ end = buffer + nbytes;
+
+ /*
+ * Rewrite a byte and write dest_fd per BITS_PER_HEAPBLOCK bytes.
+ */
+ while (end > cur)
+ {
+ /* Get rewritten bit from table and its string representation */
+ vfm_bits = rewrite_vm_to_vfm_table[(uint8) *cur];
+
+ if (write(dst_fd, &vfm_bits, BITS_PER_HEAPBLOCK) != BITS_PER_HEAPBLOCK)
+ {
+ ret = -1;
+ break;
+ }
+ cur++;
+ }
+ }
+
+ pg_free(buffer);
+
+ if (src_fd != 0)
+ close(src_fd);
+
+ if (dst_fd != 0)
+ close(dst_fd);
+
+ return ret;
+}
+
void
check_hard_link(void)
{
diff --git a/src/bin/pg_upgrade/pg_upgrade.h b/src/bin/pg_upgrade/pg_upgrade.h
index 13aa891..d957581 100644
--- a/src/bin/pg_upgrade/pg_upgrade.h
+++ b/src/bin/pg_upgrade/pg_upgrade.h
@@ -112,6 +112,11 @@ extern char *output_files[];
#define VISIBILITY_MAP_CRASHSAFE_CAT_VER 201107031
/*
+ * The format of visibility map changed with this 9.6 commit,
+ *
+ */
+#define VISIBILITY_MAP_FROZEN_BIT_CAT_VER 201507161
+/*
* pg_multixact format changed in 9.3 commit 0ac5ad5134f2769ccbaefec73844f85,
* ("Improve concurrency of foreign key locking") which also updated catalog
* version to this value. pg_upgrade behavior depends on whether old and new
@@ -397,7 +402,7 @@ typedef void *pageCnvCtx;
#endif
const char *copyAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
- const char *dst, bool force);
+ const char *dst, bool force, bool rewrite_vm);
const char *linkAndUpdateFile(pageCnvCtx *pageConverter, const char *src,
const char *dst);
diff --git a/src/bin/pg_upgrade/relfilenode.c b/src/bin/pg_upgrade/relfilenode.c
index c22df42..766a473 100644
--- a/src/bin/pg_upgrade/relfilenode.c
+++ b/src/bin/pg_upgrade/relfilenode.c
@@ -18,7 +18,7 @@
static void transfer_single_new_db(pageCnvCtx *pageConverter,
FileNameMap *maps, int size, char *old_tablespace);
static void transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
- const char *suffix);
+ const char *type_old_suffix, const char *type_new_suffix);
/*
@@ -171,6 +171,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
{
int mapnum;
bool vm_crashsafe_match = true;
+ bool vm_rewrite_needed = false;
/*
* Do the old and new cluster disagree on the crash-safetiness of the vm
@@ -180,13 +181,20 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
new_cluster.controldata.cat_ver >= VISIBILITY_MAP_CRASHSAFE_CAT_VER)
vm_crashsafe_match = false;
+ /*
+ * Do we need to rewrite "vm" to "vfm".
+ */
+ if (old_cluster.controldata.cat_ver < VISIBILITY_MAP_FROZEN_BIT_CAT_VER &&
+ new_cluster.controldata.cat_ver >= VISIBILITY_MAP_FROZEN_BIT_CAT_VER)
+ vm_rewrite_needed = true;
+
for (mapnum = 0; mapnum < size; mapnum++)
{
if (old_tablespace == NULL ||
strcmp(maps[mapnum].old_tablespace, old_tablespace) == 0)
{
/* transfer primary file */
- transfer_relfile(pageConverter, &maps[mapnum], "");
+ transfer_relfile(pageConverter, &maps[mapnum], "", "");
/* fsm/vm files added in PG 8.4 */
if (GET_MAJOR_VERSION(old_cluster.major_version) >= 804)
@@ -194,9 +202,17 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
/*
* Copy/link any fsm and vm files, if they exist
*/
- transfer_relfile(pageConverter, &maps[mapnum], "_fsm");
+ transfer_relfile(pageConverter, &maps[mapnum], "_fsm", "_fsm");
if (vm_crashsafe_match)
- transfer_relfile(pageConverter, &maps[mapnum], "_vm");
+ {
+ /*
+ * vm file is changed to vfm file in PG 9.6.
+ */
+ if (vm_rewrite_needed)
+ transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vfm");
+ else
+ transfer_relfile(pageConverter, &maps[mapnum], "_vm", "_vm");
+ }
}
}
}
@@ -210,7 +226,7 @@ transfer_single_new_db(pageCnvCtx *pageConverter,
*/
static void
transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
- const char *type_suffix)
+ const char *type_old_suffix, const char *type_new_suffix)
{
const char *msg;
char old_file[MAXPGPATH];
@@ -218,6 +234,7 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
int fd;
int segno;
char extent_suffix[65];
+ bool rewrite_vm = false;
/*
* Now copy/link any related segments as well. Remember, PG breaks large
@@ -236,18 +253,18 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
map->old_tablespace_suffix,
map->old_db_oid,
map->old_relfilenode,
- type_suffix,
+ type_old_suffix,
extent_suffix);
snprintf(new_file, sizeof(new_file), "%s%s/%u/%u%s%s",
map->new_tablespace,
map->new_tablespace_suffix,
map->new_db_oid,
map->new_relfilenode,
- type_suffix,
+ type_new_suffix,
extent_suffix);
/* Is it an extent, fsm, or vm file? */
- if (type_suffix[0] != '\0' || segno != 0)
+ if (type_old_suffix[0] != '\0' || segno != 0)
{
/* Did file open fail? */
if ((fd = open(old_file, O_RDONLY, 0)) == -1)
@@ -276,7 +293,11 @@ transfer_relfile(pageCnvCtx *pageConverter, FileNameMap *map,
{
pg_log(PG_VERBOSE, "copying \"%s\" to \"%s\"\n", old_file, new_file);
- if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true)) != NULL)
+ /* We need to rewrite vm file to vfm file. */
+ if (strcmp(type_old_suffix, type_new_suffix) != 0)
+ rewrite_vm = true;
+
+ if ((msg = copyAndUpdateFile(pageConverter, old_file, new_file, true, rewrite_vm)) != NULL)
pg_fatal("error while copying relation \"%s.%s\" (\"%s\" to \"%s\"): %s\n",
map->nspname, map->relname, old_file, new_file, msg);
}
diff --git a/src/bin/pg_upgrade/test.sh b/src/bin/pg_upgrade/test.sh
index f4e5d9a..53b8b2f 100644
--- a/src/bin/pg_upgrade/test.sh
+++ b/src/bin/pg_upgrade/test.sh
@@ -171,6 +171,11 @@ if "$MAKE" -C "$oldsrc" installcheck; then
mv "$temp_root"/dump1.sql "$temp_root"/dump1.sql.orig
sed "s;$oldsrc;$newsrc;g" "$temp_root"/dump1.sql.orig >"$temp_root"/dump1.sql
fi
+
+ vm_sql="SELECT c.relname, c.relallvisible FROM pg_class as c, pg_namespace as n WHERE c.relnamespace = n.oid AND n.nspname NOT IN ('information_schema', 'pg_toast', 'pg_catalog') ORDER BY c.relname;"
+ # Test for rewriting visibility map
+ vacuumdb -d regression || visibilitymap_vacuum1_status=$?
+ psql -d regression -c "$vm_sql" > "$temp_root"/vm_test1.txt || visibilitymap_test1_status=$?
else
make_installcheck_status=$?
fi
@@ -185,6 +190,14 @@ if [ -n "$pg_dumpall1_status" ]; then
echo "pg_dumpall of pre-upgrade database cluster failed"
exit 1
fi
+if [ -n "$visibilitymap_vacuum1_status" ];then
+ echo "VACUUM of pre-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+if [ -n "$visibilitymap_test1_status" ];then
+ echo "SELECT of pre-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
PGDATA=$BASE_PGDATA
@@ -200,6 +213,8 @@ case $testhost in
esac
pg_dumpall -f "$temp_root"/dump2.sql || pg_dumpall2_status=$?
+vacuumdb -d regression || visibilitymap_vacuum2_status=$?
+psql -d regression -c "$vm_sql" > "$temp_root"/vm_test2.txt || visibilitymap_test2_status=$?
pg_ctl -m fast stop
# no need to echo commands anymore
@@ -211,11 +226,26 @@ if [ -n "$pg_dumpall2_status" ]; then
exit 1
fi
+if [ -n "$visibilitymap_vacuum2_status" ];then
+ echo "VACUUM of post-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+
+if [ -n "$visibilitymap_test2_status" ];then
+ echo "SELECT of post-upgrade database cluster for visibility map test failed"
+ exit 1
+fi
+
case $testhost in
MINGW*) cmd /c delete_old_cluster.bat ;;
*) sh ./delete_old_cluster.sh ;;
esac
+if ! diff "$temp_root"/vm_test1.txt "$temp_root"/vm_test2.txt >/dev/null; then
+ echo "Visibility map rewriting test failed"
+ exit 1
+fi
+
if diff "$temp_root"/dump1.sql "$temp_root"/dump2.sql >/dev/null; then
echo PASSED
exit 0
diff --git a/src/common/relpath.c b/src/common/relpath.c
index 66dfef1..5898f1b 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -30,11 +30,14 @@
* If you add a new entry, remember to update the errhint in
* forkname_to_number() below, and update the SGML documentation for
* pg_relation_size().
+ * 9.6 or later, the visibility map fork name is changed from "vm" to
+ * "vfm" bacause visibility map has not only information about all-visible
+ * but also information about all-frozen.
*/
const char *const forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
- "vm", /* VISIBILITYMAP_FORKNUM */
+ "vfm", /* VISIBILITYMAP_FORKNUM */
"init" /* INIT_FORKNUM */
};
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index caa0f14..93afb10 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
typedef struct xl_heap_visible
{
TransactionId cutoff_xid;
+ uint8 flags;
} xl_heap_visible;
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
typedef struct xl_heap_new_cid
{
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
- Buffer vm_buffer, TransactionId cutoff_xid);
+ Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
#endif /* HEAPAM_XLOG_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 0c0e0ef..7270609 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -19,15 +19,20 @@
#include "storage/buf.h"
#include "utils/relcache.h"
-extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
- Buffer vmbuf);
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE 0x01
+#define VISIBILITYMAP_ALL_FROZEN 0x02
+
+extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
Buffer *vmbuf);
extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
-extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern BlockNumber visibilitymap_count(Relation rel);
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags);
+extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf,
+ uint8 flags);
+extern BlockNumber visibilitymap_count(Relation rel, uint8 flags);
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
#endif /* VISIBILITYMAP_H */
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index e526cd9..ea0f7c1 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -47,6 +47,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
float4 reltuples; /* # of tuples (not always up-to-date) */
int32 relallvisible; /* # of all-visible blocks (not always
* up-to-date) */
+ int32 relallfrozen; /* # of all-frozen blocks (not always
+ up-to-date) */
Oid reltoastrelid; /* OID of toast table; 0 if none */
bool relhasindex; /* T if has (or has had) any indexes */
bool relisshared; /* T if shared across databases */
@@ -95,7 +97,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 30
+#define Natts_pg_class 31
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -107,25 +109,26 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relpages 9
#define Anum_pg_class_reltuples 10
#define Anum_pg_class_relallvisible 11
-#define Anum_pg_class_reltoastrelid 12
-#define Anum_pg_class_relhasindex 13
-#define Anum_pg_class_relisshared 14
-#define Anum_pg_class_relpersistence 15
-#define Anum_pg_class_relkind 16
-#define Anum_pg_class_relnatts 17
-#define Anum_pg_class_relchecks 18
-#define Anum_pg_class_relhasoids 19
-#define Anum_pg_class_relhaspkey 20
-#define Anum_pg_class_relhasrules 21
-#define Anum_pg_class_relhastriggers 22
-#define Anum_pg_class_relhassubclass 23
-#define Anum_pg_class_relrowsecurity 24
-#define Anum_pg_class_relispopulated 25
-#define Anum_pg_class_relreplident 26
-#define Anum_pg_class_relfrozenxid 27
-#define Anum_pg_class_relminmxid 28
-#define Anum_pg_class_relacl 29
-#define Anum_pg_class_reloptions 30
+#define Anum_pg_class_relallfrozen 12
+#define Anum_pg_class_reltoastrelid 13
+#define Anum_pg_class_relhasindex 14
+#define Anum_pg_class_relisshared 15
+#define Anum_pg_class_relpersistence 16
+#define Anum_pg_class_relkind 17
+#define Anum_pg_class_relnatts 18
+#define Anum_pg_class_relchecks 19
+#define Anum_pg_class_relhasoids 20
+#define Anum_pg_class_relhaspkey 21
+#define Anum_pg_class_relhasrules 22
+#define Anum_pg_class_relhastriggers 23
+#define Anum_pg_class_relhassubclass 24
+#define Anum_pg_class_relrowsecurity 25
+#define Anum_pg_class_relispopulated 26
+#define Anum_pg_class_relreplident 27
+#define Anum_pg_class_relfrozenxid 28
+#define Anum_pg_class_relminmxid 29
+#define Anum_pg_class_relacl 30
+#define Anum_pg_class_reloptions 31
/* ----------------
* initial contents of pg_class
@@ -140,13 +143,13 @@ typedef FormData_pg_class *Form_pg_class;
* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
* similarly, "1" in relminmxid stands for FirstMultiXactId
*/
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 09bf143..dbe16f3 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3213,6 +3213,12 @@ DESCR("sleep until the specified time");
DATA(insert OID = 2971 ( text PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "16" _null_ _null_ _null_ _null_ _null_ booltext _null_ _null_ _null_ ));
DESCR("convert boolean to text");
+DATA(insert OID = 3298 ( pg_is_all_visible PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_visible _null_ _null_ _null_ ));
+DESCR("true if the page is all visible");
+DATA(insert OID = 3299 ( pg_is_all_frozen PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_frozen _null_ _null_ _null_ ));
+DESCR("true if the page is all frozen");
+
+
/* Aggregates (moved here from pg_aggregate for 7.3) */
DATA(insert OID = 2100 ( avg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e3a31af..d2bae2d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -172,6 +172,7 @@ extern void vac_update_relstats(Relation relation,
BlockNumber num_pages,
double num_tuples,
BlockNumber num_all_visible_pages,
+ BlockNumber num_all_frozen_pages,
bool hasindex,
TransactionId frozenxid,
MultiXactId minmulti,
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index a2f78ee..7bf2718 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
* tuple? */
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
* everyone */
+#define PD_ALL_FROZEN 0x0008 /* all tuples on page are completely
+ frozen */
-#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS 0x000F /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -369,6 +371,13 @@ typedef PageHeaderData *PageHeader;
#define PageClearAllVisible(page) \
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
+#define PageIsAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
+#define PageSetAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
+#define PageClearAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags &= ~PD_ALL_FROZEN)
+
#define PageIsPrunable(page, oldestxmin) \
( \
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule
index 4df15de..893d773 100644
--- a/src/test/regress/parallel_schedule
+++ b/src/test/regress/parallel_schedule
@@ -108,5 +108,8 @@ test: plancache limit plpgsql copy2 temp domain rangefuncs prepare without_oid c
# event triggers cannot run concurrently with any test that runs DDL
test: event_trigger
+# visibility map and vacuum test cannot run concurrently with any test that runs SQL
+test: visibilitymap
+
# run stats by itself because its delay may be insufficient under heavy load
test: stats
diff --git a/src/test/regress/serial_schedule b/src/test/regress/serial_schedule
index 15d74d4..da84aa6 100644
--- a/src/test/regress/serial_schedule
+++ b/src/test/regress/serial_schedule
@@ -157,3 +157,4 @@ test: with
test: xml
test: event_trigger
test: stats
+test: visibilitymap
\ No newline at end of file
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers