On Fri, Jul 10, 2015 at 3:05 AM, Sawada Masahiko <[email protected]> wrote:
>
> Also something for pg_upgrade is also not yet.
>
> TODO
> - Test case for this feature
> - pg_upgrade support.
>
I had forgotten to change the fork name of visibility map to "vfm".
Attached latest v7 patch.
Please review it.
Regards,
--
Sawada Masahiko
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
index 22c5f7a..b1b6a06 100644
--- a/contrib/pgstattuple/pgstatapprox.c
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat)
* If the page has only visible tuples, then we can find out the free
* space from the FSM and move on.
*/
- if (visibilitymap_test(rel, blkno, &vmbuffer))
+ if (visibilitymap_test(rel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
{
freespace = GetRecordedFreeSpace(rel, blkno);
stat->tuple_len += BLCKSZ - freespace;
diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile
index b83d496..806ce27 100644
--- a/src/backend/access/heap/Makefile
+++ b/src/backend/access/heap/Makefile
@@ -12,6 +12,7 @@ subdir = src/backend/access/heap
top_builddir = ../../../..
include $(top_builddir)/src/Makefile.global
-OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o
+OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o \
+ heapfuncs.o
include $(top_srcdir)/src/backend/common.mk
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 86a2e6b..ac74100 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -88,7 +88,7 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup,
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
Buffer newbuf, HeapTuple oldtup,
HeapTuple newtup, HeapTuple old_key_tup,
- bool all_visible_cleared, bool new_all_visible_cleared);
+ bool all_visible_cleared, bool new_all_visible_cleared);
static void HeapSatisfiesHOTandKeyUpdate(Relation relation,
Bitmapset *hot_attrs,
Bitmapset *key_attrs, Bitmapset *id_attrs,
@@ -2131,8 +2131,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
CheckForSerializableConflictIn(relation, NULL, InvalidBuffer);
/*
- * Find buffer to insert this tuple into. If the page is all visible,
- * this will also pin the requisite visibility map page.
+ * Find buffer to insert this tuple into. If the page is all visible
+ * or all frozen, this will also pin the requisite visibility map and
+ * frozen map page.
*/
buffer = RelationGetBufferForTuple(relation, heaptup->t_len,
InvalidBuffer, options, bistate,
@@ -2147,10 +2148,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid,
if (PageIsAllVisible(BufferGetPage(buffer)))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(buffer));
+ PageClearAllFrozen(BufferGetPage(buffer));
+
visibilitymap_clear(relation,
- ItemPointerGetBlockNumber(&(heaptup->t_self)),
- vmbuffer);
+ ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer);
}
/*
@@ -2448,10 +2452,12 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
if (PageIsAllVisible(page))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(page);
- visibilitymap_clear(relation,
- BufferGetBlockNumber(buffer),
- vmbuffer);
+ PageClearAllFrozen(page);
+
+ visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer);
}
/*
@@ -2495,7 +2501,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples,
/* the rest of the scratch space is used for tuple data */
tupledata = scratchptr;
- xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0;
+ if (all_visible_cleared)
+ xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED;
+
xlrec->ntuples = nthispage;
/*
@@ -2731,9 +2739,9 @@ heap_delete(Relation relation, ItemPointer tid,
/*
* If we didn't pin the visibility map page and the page has become all
- * visible while we were busy locking the buffer, we'll have to unlock and
- * re-lock, to avoid holding the buffer lock across an I/O. That's a bit
- * unfortunate, but hopefully shouldn't happen often.
+ * visible or all frozen while we were busy locking the buffer, we'll
+ * have to unlock and re-lock, to avoid holding the buffer lock across an
+ * I/O. That's a bit unfortunate, but hopefully shouldn't happen often.
*/
if (vmbuffer == InvalidBuffer && PageIsAllVisible(page))
{
@@ -2925,12 +2933,16 @@ l1:
*/
PageSetPrunable(page, xid);
+ /* clear PD_ALL_VISIBLE and PD_ALL_FORZEN flags */
if (PageIsAllVisible(page))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(page);
- visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
- vmbuffer);
+ PageClearAllFrozen(page);
+
+ visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer);
}
/* store transaction information of xact deleting the tuple */
@@ -2961,7 +2973,9 @@ l1:
if (RelationIsAccessibleInLogicalDecoding(relation))
log_heap_new_cid(relation, &tp);
- xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0;
+ if (all_visible_cleared)
+ xlrec.flags = XLH_DELETE_ALL_VISIBLE_CLEARED;
+
xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask,
tp.t_data->t_infomask2);
xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self);
@@ -3207,7 +3221,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
* in the middle of changing this, so we'll need to recheck after we have
* the lock.
*/
- if (PageIsAllVisible(page))
+ if (PageIsAllVisible(page) || PageIsAllFrozen(page))
visibilitymap_pin(relation, block, &vmbuffer);
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
@@ -3801,16 +3815,22 @@ l2:
if (PageIsAllVisible(BufferGetPage(buffer)))
{
all_visible_cleared = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(buffer));
- visibilitymap_clear(relation, BufferGetBlockNumber(buffer),
- vmbuffer);
+ PageClearAllFrozen(BufferGetPage(buffer));
+
+ visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer);
}
if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf)))
{
all_visible_cleared_new = true;
+
+ /* all-frozen information is also cleared at the same time */
PageClearAllVisible(BufferGetPage(newbuf));
- visibilitymap_clear(relation, BufferGetBlockNumber(newbuf),
- vmbuffer_new);
+ PageClearAllFrozen(BufferGetPage(newbuf));
+
+ visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), vmbuffer_new);
}
if (newbuf != buffer)
@@ -6893,7 +6913,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid,
*/
XLogRecPtr
log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
- TransactionId cutoff_xid)
+ TransactionId cutoff_xid, uint8 vmflags)
{
xl_heap_visible xlrec;
XLogRecPtr recptr;
@@ -6903,6 +6923,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer,
Assert(BufferIsValid(vm_buffer));
xlrec.cutoff_xid = cutoff_xid;
+ xlrec.flags = vmflags;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfHeapVisible);
@@ -7492,8 +7513,14 @@ heap_xlog_visible(XLogReaderState *record)
* the subsequent update won't be replayed to clear the flag.
*/
page = BufferGetPage(buffer);
- PageSetAllVisible(page);
+
+ if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE)
+ PageSetAllVisible(page);
+ if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN)
+ PageSetAllFrozen(page);
+
MarkBufferDirty(buffer);
+
}
else if (action == BLK_RESTORED)
{
@@ -7544,7 +7571,7 @@ heap_xlog_visible(XLogReaderState *record)
*/
if (lsn > PageGetLSN(vmpage))
visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer,
- xlrec->cutoff_xid);
+ xlrec->cutoff_xid, xlrec->flags);
ReleaseBuffer(vmbuffer);
FreeFakeRelcacheEntry(reln);
@@ -7694,7 +7721,10 @@ heap_xlog_delete(XLogReaderState *record)
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
/* Make sure there is no forward chain link in t_ctid */
htup->t_ctid = target_tid;
@@ -7798,7 +7828,10 @@ heap_xlog_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
MarkBufferDirty(buffer);
}
@@ -7937,7 +7970,10 @@ heap_xlog_multi_insert(XLogReaderState *record)
PageSetLSN(page, lsn);
if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
MarkBufferDirty(buffer);
}
@@ -8065,7 +8101,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
PageSetPrunable(page, XLogRecGetXid(record));
if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
PageSetLSN(page, lsn);
MarkBufferDirty(obuffer);
@@ -8200,7 +8239,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update)
elog(PANIC, "heap_update_redo: failed to add tuple");
if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED)
+ {
PageClearAllVisible(page);
+ PageClearAllFrozen(page);
+ }
freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
index 7c38772..13ad5b1 100644
--- a/src/backend/access/heap/visibilitymap.c
+++ b/src/backend/access/heap/visibilitymap.c
@@ -21,33 +21,44 @@
*
* NOTES
*
- * The visibility map is a bitmap with one bit per heap page. A set bit means
- * that all tuples on the page are known visible to all transactions, and
- * therefore the page doesn't need to be vacuumed. The map is conservative in
- * the sense that we make sure that whenever a bit is set, we know the
- * condition is true, but if a bit is not set, it might or might not be true.
+ * The visibility map is a bitmap with two bits (all-visible and all-frozen)
+ * per heap page. A set all-visible bit means that all tuples on the page are
+ * known visible to all transactions, and therefore the page doesn't need to
+ * be vacuumed. A set all-frozen bit means that all tuples on the page are
+ * completely frozen, and therefor the page doesn't need to be vacuumed even
+ * if whole table scanning vacuum is required. A all-frozen bit must be set only
+ * when the page is already all-visible. That is, all-frozen bit is always set
+ * with all-visible bit.
+ * The map is conservative in the sense that we make sure that whenever a bit
+ * is set, we know the condition is true, but if a bit is not set, it might or
+ * might not be true.
*
* Clearing a visibility map bit is not separately WAL-logged. The callers
* must make sure that whenever a bit is cleared, the bit is cleared on WAL
- * replay of the updating operation as well.
+ * replay of the updating operation as well. And all-frozen bit must be
+ * cleared with all-visible at the same time.
*
* When we *set* a visibility map during VACUUM, we must write WAL. This may
* seem counterintuitive, since the bit is basically a hint: if it is clear,
- * it may still be the case that every tuple on the page is visible to all
- * transactions; we just don't know that for certain. The difficulty is that
- * there are two bits which are typically set together: the PD_ALL_VISIBLE bit
- * on the page itself, and the visibility map bit. If a crash occurs after the
- * visibility map page makes it to disk and before the updated heap page makes
- * it to disk, redo must set the bit on the heap page. Otherwise, the next
- * insert, update, or delete on the heap page will fail to realize that the
- * visibility map bit must be cleared, possibly causing index-only scans to
- * return wrong answers.
+ * it may still be the case that every tuple on the page is visible or frozen
+ * to all transactions; we just don't know that for certain. The difficulty is
+ * that there are two bits which are typically set together: the PD_ALL_VISIBLE
+ * or PD_ALL_FROZEN bit on the page itself, and the visibility map bit. If a
+ * crash occurs after the visibility map page makes it to disk and before the
+ * updated heap page makes it to disk, redo must set the bit on the heap page.
+ * Otherwise, the next insert, update, or delete on the heap page will fail to
+ * realize that the visibility map bit must be cleared, possibly causing index-only
+ * scans to return wrong answers.
*
* VACUUM will normally skip pages for which the visibility map bit is set;
* such pages can't contain any dead tuples and therefore don't need vacuuming.
- * The visibility map is not used for anti-wraparound vacuums, because
+ * The visibility map is not used for anti-wraparound vacuums before 9.5, because
* an anti-wraparound vacuum needs to freeze tuples and observe the latest xid
* present in the table, even on pages that don't have any dead tuples.
+ * 9.6 or later, the visibility map has a additional bit which indicates all tuple
+ * on single page has been completely forzen, so the visibility map is also used for
+ * anti-wraparound vacuums.
+ *
*
* LOCKING
*
@@ -58,14 +69,14 @@
* section that logs the page modification. However, we don't want to hold
* the buffer lock over any I/O that may be required to read in the visibility
* map page. To avoid this, we examine the heap page before locking it;
- * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map
- * bit. Then, we lock the buffer. But this creates a race condition: there
- * is a possibility that in the time it takes to lock the buffer, the
- * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the
- * buffer, pin the visibility map page, and relock the buffer. This shouldn't
- * happen often, because only VACUUM currently sets visibility map bits,
- * and the race will only occur if VACUUM processes a given page at almost
- * exactly the same time that someone tries to further modify it.
+ * if the page-level PD_ALL_VISIBLE or PD_ALL_FROZEN bit is set, we pin the
+ * visibility map bit. Then, we lock the buffer. But this creates a race
+ * condition: there is a possibility that in the time it takes to lock the
+ * buffer, the PD_ALL_VISIBLE or PD_ALL_FROZEN bit gets set. If that happens,
+ * we have to unlock the buffer, pin the visibility map page, and relock the
+ * buffer. This shouldn't happen often, because only VACUUM currently sets
+ * visibility map bits, and the race will only occur if VACUUM processes a given
+ * page at almost exactly the same time that someone tries to further modify it.
*
* To set a bit, you need to hold a lock on the heap page. That prevents
* the race condition where VACUUM sees that all tuples on the page are
@@ -101,11 +112,14 @@
*/
#define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData))
-/* Number of bits allocated for each heap block. */
-#define BITS_PER_HEAPBLOCK 1
+/*
+ * Number of bits allocated for each heap block.
+ * One for all-visible, other for all-frozen.
+*/
+#define BITS_PER_HEAPBLOCK 2
/* Number of heap blocks we can represent in one byte. */
-#define HEAPBLOCKS_PER_BYTE 8
+#define HEAPBLOCKS_PER_BYTE 4
/* Number of heap blocks we can represent in one visibility map page. */
#define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE)
@@ -115,24 +129,42 @@
#define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE)
#define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE)
-/* table for fast counting of set bits */
-static const uint8 number_of_ones[256] = {
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+/* tables for fast counting of set bits for visible and freeze */
+static const uint8 number_of_ones_for_visible[256] = {
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4,
+ 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3,
+ 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4
+};
+static const uint8 number_of_ones_for_frozen[256] = {
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4,
+ 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4
};
/* prototypes for internal routines */
@@ -141,7 +173,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks);
/*
- * visibilitymap_clear - clear a bit in visibility map
+ * visibilitymap_clear - clear all bits in visibility map
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -153,7 +185,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
- uint8 mask = 1 << mapBit;
+ uint8 mask = (VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN) <<
+ (BITS_PER_HEAPBLOCK * mapBit);
char *map;
#ifdef TRACE_VISIBILITYMAP
@@ -225,7 +258,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
}
/*
- * visibilitymap_set - set a bit on a previously pinned page
+ * visibilitymap_set - set bit(s) on a previously pinned page
*
* recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
* or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
@@ -234,10 +267,11 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
* marked all-visible; it is needed for Hot Standby, and can be
* InvalidTransactionId if the page contains no tuples.
*
- * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling
- * this function. Except in recovery, caller should also pass the heap
- * buffer. When checksums are enabled and we're not in recovery, we must add
- * the heap buffer to the WAL chain to protect it from being torn.
+ * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN
+ * bit before calling this function. Except in recovery, caller should also
+ * pass the heap buffer and flags which indicates what flag we want to set.
+ * When checksums are enabled and we're not in recovery, we must add the heap
+ * buffer to the WAL chain to protect it from being torn.
*
* You must pass a buffer containing the correct map page to this function.
* Call visibilitymap_pin first to pin the right one. This function doesn't do
@@ -245,7 +279,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
*/
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid)
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -254,7 +289,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
char *map;
#ifdef TRACE_VISIBILITYMAP
- elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
+ elog(DEBUG1, "vm_set %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
#endif
Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
@@ -272,11 +307,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
map = PageGetContents(page);
LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE);
- if (!(map[mapByte] & (1 << mapBit)))
+ if (flags != (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))))
{
START_CRIT_SECTION();
- map[mapByte] |= (1 << mapBit);
+ map[mapByte] |= (flags << (BITS_PER_HEAPBLOCK * mapBit));
MarkBufferDirty(vmBuf);
if (RelationNeedsWAL(rel))
@@ -285,7 +320,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Assert(!InRecovery);
recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf,
- cutoff_xid);
+ cutoff_xid, flags);
/*
* If data checksums are enabled (or wal_log_hints=on), we
@@ -295,11 +330,15 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
{
Page heapPage = BufferGetPage(heapBuf);
- /* caller is expected to set PD_ALL_VISIBLE first */
- Assert(PageIsAllVisible(heapPage));
+ /*
+ * caller is expected to set PD_ALL_VISIBLE or
+ * PD_ALL_FROZEN first.
+ */
+ Assert(PageIsAllVisible(heapPage) || PageIsAllFrozen(heapPage));
PageSetLSN(heapPage, recptr);
}
}
+
PageSetLSN(page, recptr);
}
@@ -310,15 +349,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
}
/*
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_test - test if bit(s) is set
*
- * Are all tuples on heapBlk visible to all, according to the visibility map?
+ * Are all tuples on heapBlk visible or frozen to all, according to the visibility map?
*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
* earlier call to visibilitymap_pin or visibilitymap_test on the same
* relation. On return, *buf is a valid buffer with the map page containing
* the bit for heapBlk, or InvalidBuffer. The caller is responsible for
- * releasing *buf after it's done testing and setting bits.
+ * releasing *buf after it's done testing and setting bits, and must set flags
+ * which indicates what flag we want to test.
*
* NOTE: This function is typically called without a lock on the heap page,
* so somebody else could change the bit just after we look at it. In fact,
@@ -328,7 +368,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
* all concurrency issues!
*/
bool
-visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
+visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf, uint8 flags)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
@@ -337,7 +377,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
char *map;
#ifdef TRACE_VISIBILITYMAP
- elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk);
+ elog(DEBUG1, "vm_test %s %d %u", RelationGetRelationName(rel), heapBlk, flags);
#endif
/* Reuse the old pinned buffer if possible */
@@ -360,11 +400,12 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
map = PageGetContents(BufferGetPage(*buf));
/*
- * A single-bit read is atomic. There could be memory-ordering effects
+ * A single or double bit read is atomic. There could be memory-ordering effects
* here, but for performance reasons we make it the caller's job to worry
* about that.
*/
- result = (map[mapByte] & (1 << mapBit)) ? true : false;
+ result = (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))) ?
+ true : false;
return result;
}
@@ -374,10 +415,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf)
*
* Note: we ignore the possibility of race conditions when the table is being
* extended concurrently with the call. New pages added to the table aren't
- * going to be marked all-visible, so they won't affect the result.
+ * going to be marked all-visible or all-frozen, so they won't affect the result.
+ * The caller must set the flags which indicates what flag we want to count.
*/
BlockNumber
-visibilitymap_count(Relation rel)
+visibilitymap_count(Relation rel, uint8 flags)
{
BlockNumber result = 0;
BlockNumber mapBlock;
@@ -406,7 +448,10 @@ visibilitymap_count(Relation rel)
for (i = 0; i < MAPSIZE; i++)
{
- result += number_of_ones[map[i]];
+ if (flags & VISIBILITYMAP_ALL_VISIBLE)
+ result += number_of_ones_for_visible[map[i]];
+ if (flags & VISIBILITYMAP_ALL_FROZEN)
+ result += number_of_ones_for_frozen[map[i]];
}
ReleaseBuffer(mapBuffer);
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 4246554..015bfb8 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -1919,11 +1919,18 @@ index_update_stats(Relation rel,
{
BlockNumber relpages = RelationGetNumberOfBlocks(rel);
BlockNumber relallvisible;
+ BlockNumber relallfrozen;
if (rd_rel->relkind != RELKIND_INDEX)
- relallvisible = visibilitymap_count(rel);
+ {
+ relallvisible = visibilitymap_count(rel, VISIBILITYMAP_ALL_VISIBLE);
+ relallfrozen = visibilitymap_count(rel, VISIBILITYMAP_ALL_FROZEN);
+ }
else /* don't bother for indexes */
+ {
relallvisible = 0;
+ relallfrozen = 0;
+ }
if (rd_rel->relpages != (int32) relpages)
{
@@ -1940,6 +1947,11 @@ index_update_stats(Relation rel,
rd_rel->relallvisible = (int32) relallvisible;
dirty = true;
}
+ if (rd_rel->relallfrozen != (int32) relallfrozen)
+ {
+ rd_rel->relallfrozen = (int32) relallfrozen;
+ dirty = true;
+ }
}
/*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 861048f..392c2a4 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -572,7 +572,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
vac_update_relstats(onerel,
relpages,
totalrows,
- visibilitymap_count(onerel),
+ visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE),
+ visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN),
hasindex,
InvalidTransactionId,
InvalidMultiXactId,
@@ -595,6 +596,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params,
RelationGetNumberOfBlocks(Irel[ind]),
totalindexrows,
0,
+ 0,
false,
InvalidTransactionId,
InvalidMultiXactId,
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 7ab4874..d3725dd 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -22,6 +22,7 @@
#include "access/rewriteheap.h"
#include "access/transam.h"
#include "access/tuptoaster.h"
+#include "access/visibilitymap.h"
#include "access/xact.h"
#include "access/xlog.h"
#include "catalog/catalog.h"
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index baf66f1..d68c7c4 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -744,6 +744,7 @@ void
vac_update_relstats(Relation relation,
BlockNumber num_pages, double num_tuples,
BlockNumber num_all_visible_pages,
+ BlockNumber num_all_frozen_pages,
bool hasindex, TransactionId frozenxid,
MultiXactId minmulti,
bool in_outer_xact)
@@ -781,6 +782,11 @@ vac_update_relstats(Relation relation,
pgcform->relallvisible = (int32) num_all_visible_pages;
dirty = true;
}
+ if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
+ {
+ pgcform->relallfrozen = (int32) num_all_frozen_pages;
+ dirty = true;
+ }
/* Apply DDL updates, but not inside an outer transaction (see above) */
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
index a01cfb4..12322a4 100644
--- a/src/backend/commands/vacuumlazy.c
+++ b/src/backend/commands/vacuumlazy.c
@@ -106,6 +106,8 @@ typedef struct LVRelStats
BlockNumber rel_pages; /* total number of pages */
BlockNumber scanned_pages; /* number of pages we examined */
BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */
+ BlockNumber vmskipped_frozen_pages; /* # of pages we skipped by all-frozen bit
+ of visibility map */
double scanned_tuples; /* counts only tuples on scanned pages */
double old_rel_tuples; /* previous value of pg_class.reltuples */
double new_rel_tuples; /* new estimated total # of tuples */
@@ -156,7 +158,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats,
static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
static int vac_cmp_itemptr(const void *left, const void *right);
static bool heap_page_is_all_visible(Relation rel, Buffer buf,
- TransactionId *visibility_cutoff_xid);
+ TransactionId *visibility_cutoff_xid, bool *all_frozen);
/*
@@ -188,7 +190,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
MultiXactId mxactFullScanLimit;
BlockNumber new_rel_pages;
double new_rel_tuples;
- BlockNumber new_rel_allvisible;
+ BlockNumber new_rel_allvisible,
+ new_rel_allfrozen;
double new_live_tuples;
TransactionId new_frozen_xid;
MultiXactId new_min_multi;
@@ -222,6 +225,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
* than or equal to the requested Xid full-table scan limit; or if the
* table's minimum MultiXactId is older than or equal to the requested
* mxid full-table scan limit.
+ * Even if scan_all is set so far, we could skip to scan some pages
+ * according by frozen map.
*/
scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid,
xidFullScanLimit);
@@ -253,13 +258,16 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
* NB: We need to check this before truncating the relation, because that
* will change ->rel_pages.
*/
- if (vacrelstats->scanned_pages < vacrelstats->rel_pages)
+ if ((vacrelstats->scanned_pages + vacrelstats->vmskipped_frozen_pages)
+ < vacrelstats->rel_pages)
{
Assert(!scan_all);
scanned_all = false;
}
else
+ {
scanned_all = true;
+ }
/*
* Optionally truncate the relation.
@@ -301,10 +309,14 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
new_rel_tuples = vacrelstats->old_rel_tuples;
}
- new_rel_allvisible = visibilitymap_count(onerel);
+ new_rel_allvisible = visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE);
if (new_rel_allvisible > new_rel_pages)
new_rel_allvisible = new_rel_pages;
+ new_rel_allfrozen = visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN);
+ if (new_rel_allfrozen > new_rel_pages)
+ new_rel_allfrozen = new_rel_pages;
+
new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId;
new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId;
@@ -312,6 +324,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
new_rel_pages,
new_rel_tuples,
new_rel_allvisible,
+ new_rel_allfrozen,
vacrelstats->hasindex,
new_frozen_xid,
new_min_multi,
@@ -360,10 +373,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
get_namespace_name(RelationGetNamespace(onerel)),
RelationGetRelationName(onerel),
vacrelstats->num_index_scans);
- appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"),
+ appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen page according to visibility map\n"),
vacrelstats->pages_removed,
vacrelstats->rel_pages,
- vacrelstats->pinskipped_pages);
+ vacrelstats->pinskipped_pages,
+ vacrelstats->vmskipped_frozen_pages);
appendStringInfo(&buf,
_("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"),
vacrelstats->tuples_deleted,
@@ -486,9 +500,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* consecutive pages. Since we're reading sequentially, the OS should be
* doing readahead for us, so there's no gain in skipping a page now and
* then; that's likely to disable readahead and so be counterproductive.
- * Also, skipping even a single page means that we can't update
- * relfrozenxid, so we only want to do it if we can skip a goodly number
- * of pages.
+ * Also, skipping even a single page accorinding to all-visible bit of
+ * visibility map means that we can't update relfrozenxid, so we only want
+ * to do it if we can skip a goodly number. On the other hand, we count
+ * both how many pages we skipped according to all-frozen bit of visibility
+ * map and how many pages we freeze page, so we can update relfrozenxid if
+ * the sum of them is as many as pages of table.
*
* Before entering the main loop, establish the invariant that
* next_not_all_visible_block is the next block number >= blkno that's not
@@ -515,7 +532,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block < nblocks;
next_not_all_visible_block++)
{
- if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer))
+ if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
break;
vacuum_delay_point();
}
@@ -533,7 +551,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
bool tupgone,
hastup;
int prev_dead_count;
- int nfrozen;
+ int nfrozen; /* # of tuples is frozen */
+ int nalready_frozen; /* # of tuples is already frozen */
+ int ntotal_frozen; /* # of tuples is in single page */
+ int ntup_per_page;
Size freespace;
bool all_visible_according_to_vm;
bool all_visible;
@@ -548,7 +569,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
next_not_all_visible_block++)
{
if (!visibilitymap_test(onerel, next_not_all_visible_block,
- &vmbuffer))
+ &vmbuffer,
+ VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN))
break;
vacuum_delay_point();
}
@@ -566,9 +588,25 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
}
else
{
- /* Current block is all-visible */
- if (skipping_all_visible_blocks && !scan_all)
- continue;
+ /*
+ * This block is at least all-visible according to visibility map.
+ * We check whehter this block is all-frozen to skip to vacuum this
+ * page even though scanning whole page is required.
+ */
+ if (scan_all)
+ {
+ if (visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+ {
+ vacrelstats->vmskipped_frozen_pages++;
+ continue;
+ }
+ }
+ else
+ {
+ if (skipping_all_visible_blocks)
+ continue;
+ }
+
all_visible_according_to_vm = true;
}
@@ -740,7 +778,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
PageSetAllVisible(page);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, InvalidTransactionId);
+ vmbuffer, InvalidTransactionId,
+ VISIBILITYMAP_ALL_VISIBLE);
END_CRIT_SECTION();
}
@@ -764,6 +803,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
all_visible = true;
has_dead_tuples = false;
nfrozen = 0;
+ nalready_frozen = 0;
+ ntup_per_page = 0;
hastup = false;
prev_dead_count = vacrelstats->num_dead_tuples;
maxoff = PageGetMaxOffsetNumber(page);
@@ -918,8 +959,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
else
{
num_tuples += 1;
+ ntup_per_page += 1;;
hastup = true;
+ /* Check whether this tuple is alrady frozen or not */
+ if (HeapTupleHeaderXminFrozen(tuple.t_data))
+ nalready_frozen += 1;
+
/*
* Each non-removable tuple must be checked to see if it needs
* freezing. Note we already have exclusive buffer lock.
@@ -931,9 +977,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
} /* scan along page */
/*
- * If we froze any tuples, mark the buffer dirty, and write a WAL
- * record recording the changes. We must log the changes to be
- * crash-safe against future truncation of CLOG.
+ * If we froze any tuples or any tuples are already frozen,
+ * mark the buffer dirty, and write a WAL record recording the changes.
+ * We must log the changes to be crash-safe against future truncation
+ * of CLOG.
*/
if (nfrozen > 0)
{
@@ -966,6 +1013,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
END_CRIT_SECTION();
}
+ /* Compute the number of frozen tuples in a page */
+ ntotal_frozen = nfrozen + nalready_frozen;
+
/*
* If there are no indexes then we can vacuum the page right now
* instead of doing a second scan.
@@ -988,26 +1038,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
- /* mark page all-visible, if appropriate */
- if (all_visible && !all_visible_according_to_vm)
+ /* This page is all visible */
+ if (all_visible)
{
- /*
- * It should never be the case that the visibility map page is set
- * while the page-level bit is clear, but the reverse is allowed
- * (if checksums are not enabled). Regardless, set the both bits
- * so that we get back in sync.
- *
- * NB: If the heap page is all-visible but the VM bit is not set,
- * we don't need to dirty the heap page. However, if checksums
- * are enabled, we do need to make sure that the heap page is
- * dirtied before passing it to visibilitymap_set(), because it
- * may be logged. Given that this situation should only happen in
- * rare cases after a crash, it is not worth optimizing.
- */
- PageSetAllVisible(page);
- MarkBufferDirty(buf);
- visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
- vmbuffer, visibility_cutoff_xid);
+ uint8 flags = 0;
+
+ /* mark page all-visible, if appropriate */
+ if (!all_visible_according_to_vm)
+ {
+ /*
+ * It should never be the case that the visibility map page is set
+ * while the page-level bit is clear, but the reverse is allowed
+ * (if checksums are not enabled). Regardless, set the both bits
+ * so that we get back in sync.
+ *
+ * NB: If the heap page is all-visible but the VM bit is not set,
+ * we don't need to dirty the heap page. However, if checksums
+ * are enabled, we do need to make sure that the heap page is
+ * dirtied before passing it to visibilitymap_set(), because it
+ * may be logged. Given that this situation should only happen in
+ * rare cases after a crash, it is not worth optimizing.
+ */
+ PageSetAllVisible(page);
+ flags |= VISIBILITYMAP_ALL_VISIBLE;
+ }
+
+ /* mark page all-frozen, if all tuples are frozen in total */
+ if ((ntotal_frozen == ntup_per_page) &&
+ !visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN))
+ {
+ Assert(PageIsAllVisible(page));
+
+ PageSetAllFrozen(page);
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+ }
+
+ if (flags)
+ {
+ MarkBufferDirty(buf);
+ visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
+ vmbuffer, visibility_cutoff_xid, flags);
+ }
}
/*
@@ -1018,7 +1089,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* that something bad has happened.
*/
else if (all_visible_according_to_vm && !PageIsAllVisible(page)
- && visibilitymap_test(onerel, blkno, &vmbuffer))
+ && visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
{
elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u",
relname, blkno);
@@ -1047,6 +1118,17 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
visibilitymap_clear(onerel, blkno, vmbuffer);
}
+ /*
+ * As a result of scanning a page, we set VM all-frozen bit and page header
+ * if all tuples of single page are frozen.
+ */
+ if (ntotal_frozen == ntup_per_page)
+ {
+ PageSetAllFrozen(page);
+ visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, vmbuffer,
+ InvalidTransactionId, VISIBILITYMAP_ALL_FROZEN);
+ }
+
UnlockReleaseBuffer(buf);
/* Remember the location of the last page with nonremovable tuples */
@@ -1078,7 +1160,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
num_tuples);
/*
- * Release any remaining pin on visibility map page.
+ * Release any remaining pin on visibility map and frozen map page.
*/
if (BufferIsValid(vmbuffer))
{
@@ -1126,6 +1208,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
nunused);
appendStringInfo(&buf, _("Skipped %u pages due to buffer pins.\n"),
vacrelstats->pinskipped_pages);
+ appendStringInfo(&buf, _("Skipped %u frozen pages according to visibility map.\n"),
+ vacrelstats->vmskipped_frozen_pages);
appendStringInfo(&buf, _("%u pages are entirely empty.\n"),
empty_pages);
appendStringInfo(&buf, _("%s."),
@@ -1226,6 +1310,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
OffsetNumber unused[MaxOffsetNumber];
int uncnt = 0;
TransactionId visibility_cutoff_xid;
+ bool all_frozen;
START_CRIT_SECTION();
@@ -1277,19 +1362,31 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
* dirty, exclusively locked, and, if needed, a full page image has been
* emitted in the log_heap_clean() above.
*/
- if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
+ if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, &all_frozen))
PageSetAllVisible(page);
/*
* All the changes to the heap page have been done. If the all-visible
- * flag is now set, also set the VM bit.
+ * flag is now set, also set the VM all-visible bit.
+ * Also, if this page is all-frozen, set VM all-frozen bit and flag.
*/
- if (PageIsAllVisible(page) &&
- !visibilitymap_test(onerel, blkno, vmbuffer))
+ if (PageIsAllVisible(page))
{
- Assert(BufferIsValid(*vmbuffer));
- visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
- visibility_cutoff_xid);
+ uint8 flags = 0;
+
+ if (!visibilitymap_test(onerel, blkno, vmbuffer, VISIBILITYMAP_ALL_VISIBLE))
+ flags |= VISIBILITYMAP_ALL_VISIBLE;
+
+ /* mark page all-frozen, and set VM all-frozen bit */
+ if (all_frozen)
+ {
+ PageSetAllFrozen(page);
+ flags |= VISIBILITYMAP_ALL_FROZEN;
+ }
+
+ if (flags)
+ visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer,
+ visibility_cutoff_xid, flags);
}
return tupindex;
@@ -1408,6 +1505,7 @@ lazy_cleanup_index(Relation indrel,
stats->num_pages,
stats->num_index_tuples,
0,
+ 0,
false,
InvalidTransactionId,
InvalidMultiXactId,
@@ -1782,7 +1880,8 @@ vac_cmp_itemptr(const void *left, const void *right)
* xmin amongst the visible tuples.
*/
static bool
-heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
+heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid,
+ bool *all_frozen)
{
Page page = BufferGetPage(buf);
BlockNumber blockno = BufferGetBlockNumber(buf);
@@ -1791,6 +1890,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
bool all_visible = true;
*visibility_cutoff_xid = InvalidTransactionId;
+ *all_frozen = true;
/*
* This is a stripped down version of the line pointer scan in
@@ -1814,7 +1914,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
/*
* Dead line pointers can have index pointers pointing to them. So
- * they can't be treated as visible
+ * they can't be treated as visible and frozen.
*/
if (ItemIdIsDead(itemid))
{
@@ -1855,6 +1955,10 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
/* Track newest xmin on page. */
if (TransactionIdFollows(xmin, *visibility_cutoff_xid))
*visibility_cutoff_xid = xmin;
+
+ /* Check whether this tuple is alrady frozen or not */
+ if (!HeapTupleHeaderXminFrozen(tuple.t_data))
+ *all_frozen = false;
}
break;
@@ -1863,6 +1967,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut
case HEAPTUPLE_INSERT_IN_PROGRESS:
case HEAPTUPLE_DELETE_IN_PROGRESS:
all_visible = false;
+ *all_frozen = false;
break;
default:
diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c
index 9f54c46..08df289 100644
--- a/src/backend/executor/nodeIndexonlyscan.c
+++ b/src/backend/executor/nodeIndexonlyscan.c
@@ -116,7 +116,7 @@ IndexOnlyNext(IndexOnlyScanState *node)
*/
if (!visibilitymap_test(scandesc->heapRelation,
ItemPointerGetBlockNumber(tid),
- &node->ioss_VMBuffer))
+ &node->ioss_VMBuffer, VISIBILITYMAP_ALL_VISIBLE))
{
/*
* Rats, we have to visit the heap to check visibility.
diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c
index 874ca6a..376841a 100644
--- a/src/backend/executor/nodeModifyTable.c
+++ b/src/backend/executor/nodeModifyTable.c
@@ -127,7 +127,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList)
if (attno != resultDesc->natts)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
- errmsg("table row type and query-specified row type do not match"),
+ errmsg("table row type and query-specified row type do not match"),
errdetail("Query has too few columns.")));
}
diff --git a/src/common/relpath.c b/src/common/relpath.c
index 66dfef1..5898f1b 100644
--- a/src/common/relpath.c
+++ b/src/common/relpath.c
@@ -30,11 +30,14 @@
* If you add a new entry, remember to update the errhint in
* forkname_to_number() below, and update the SGML documentation for
* pg_relation_size().
+ * 9.6 or later, the visibility map fork name is changed from "vm" to
+ * "vfm" bacause visibility map has not only information about all-visible
+ * but also information about all-frozen.
*/
const char *const forkNames[] = {
"main", /* MAIN_FORKNUM */
"fsm", /* FSM_FORKNUM */
- "vm", /* VISIBILITYMAP_FORKNUM */
+ "vfm", /* VISIBILITYMAP_FORKNUM */
"init" /* INIT_FORKNUM */
};
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index caa0f14..93afb10 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page
typedef struct xl_heap_visible
{
TransactionId cutoff_xid;
+ uint8 flags;
} xl_heap_visible;
-#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId))
+#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8))
typedef struct xl_heap_new_cid
{
@@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple,
extern void heap_execute_freeze_tuple(HeapTupleHeader tuple,
xl_heap_freeze_tuple *xlrec_tp);
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
- Buffer vm_buffer, TransactionId cutoff_xid);
+ Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags);
#endif /* HEAPAM_XLOG_H */
diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h
index 0c0e0ef..7270609 100644
--- a/src/include/access/visibilitymap.h
+++ b/src/include/access/visibilitymap.h
@@ -19,15 +19,20 @@
#include "storage/buf.h"
#include "utils/relcache.h"
-extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk,
- Buffer vmbuf);
+/* Flags for bit map */
+#define VISIBILITYMAP_ALL_VISIBLE 0x01
+#define VISIBILITYMAP_ALL_FROZEN 0x02
+
+extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk,
Buffer *vmbuf);
extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf);
extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf,
- XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid);
-extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf);
-extern BlockNumber visibilitymap_count(Relation rel);
+ XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid,
+ uint8 flags);
+extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf,
+ uint8 flags);
+extern BlockNumber visibilitymap_count(Relation rel, uint8 flags);
extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks);
#endif /* VISIBILITYMAP_H */
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 44ce2b3..1645add 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201507021
+#define CATALOG_VERSION_NO 201507101
#endif
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index e526cd9..ea0f7c1 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -47,6 +47,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO
float4 reltuples; /* # of tuples (not always up-to-date) */
int32 relallvisible; /* # of all-visible blocks (not always
* up-to-date) */
+ int32 relallfrozen; /* # of all-frozen blocks (not always
+ up-to-date) */
Oid reltoastrelid; /* OID of toast table; 0 if none */
bool relhasindex; /* T if has (or has had) any indexes */
bool relisshared; /* T if shared across databases */
@@ -95,7 +97,7 @@ typedef FormData_pg_class *Form_pg_class;
* ----------------
*/
-#define Natts_pg_class 30
+#define Natts_pg_class 31
#define Anum_pg_class_relname 1
#define Anum_pg_class_relnamespace 2
#define Anum_pg_class_reltype 3
@@ -107,25 +109,26 @@ typedef FormData_pg_class *Form_pg_class;
#define Anum_pg_class_relpages 9
#define Anum_pg_class_reltuples 10
#define Anum_pg_class_relallvisible 11
-#define Anum_pg_class_reltoastrelid 12
-#define Anum_pg_class_relhasindex 13
-#define Anum_pg_class_relisshared 14
-#define Anum_pg_class_relpersistence 15
-#define Anum_pg_class_relkind 16
-#define Anum_pg_class_relnatts 17
-#define Anum_pg_class_relchecks 18
-#define Anum_pg_class_relhasoids 19
-#define Anum_pg_class_relhaspkey 20
-#define Anum_pg_class_relhasrules 21
-#define Anum_pg_class_relhastriggers 22
-#define Anum_pg_class_relhassubclass 23
-#define Anum_pg_class_relrowsecurity 24
-#define Anum_pg_class_relispopulated 25
-#define Anum_pg_class_relreplident 26
-#define Anum_pg_class_relfrozenxid 27
-#define Anum_pg_class_relminmxid 28
-#define Anum_pg_class_relacl 29
-#define Anum_pg_class_reloptions 30
+#define Anum_pg_class_relallfrozen 12
+#define Anum_pg_class_reltoastrelid 13
+#define Anum_pg_class_relhasindex 14
+#define Anum_pg_class_relisshared 15
+#define Anum_pg_class_relpersistence 16
+#define Anum_pg_class_relkind 17
+#define Anum_pg_class_relnatts 18
+#define Anum_pg_class_relchecks 19
+#define Anum_pg_class_relhasoids 20
+#define Anum_pg_class_relhaspkey 21
+#define Anum_pg_class_relhasrules 22
+#define Anum_pg_class_relhastriggers 23
+#define Anum_pg_class_relhassubclass 24
+#define Anum_pg_class_relrowsecurity 25
+#define Anum_pg_class_relispopulated 26
+#define Anum_pg_class_relreplident 27
+#define Anum_pg_class_relfrozenxid 28
+#define Anum_pg_class_relminmxid 29
+#define Anum_pg_class_relacl 30
+#define Anum_pg_class_reloptions 31
/* ----------------
* initial contents of pg_class
@@ -140,13 +143,13 @@ typedef FormData_pg_class *Form_pg_class;
* Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId;
* similarly, "1" in relminmxid stands for FirstMultiXactId
*/
-DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
-DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ ));
+DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f t n 3 1 _null_ _null_ ));
DESCR("");
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 6fd1278..7d9b93f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -3213,6 +3213,12 @@ DESCR("sleep until the specified time");
DATA(insert OID = 2971 ( text PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "16" _null_ _null_ _null_ _null_ _null_ booltext _null_ _null_ _null_ ));
DESCR("convert boolean to text");
+DATA(insert OID = 3298 ( pg_is_all_visible PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_visible _null_ _null_ _null_ ));
+DESCR("true if the page is all visible");
+DATA(insert OID = 3299 ( pg_is_all_frozen PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_frozen _null_ _null_ _null_ ));
+DESCR("true if the page is all frozen");
+
+
/* Aggregates (moved here from pg_aggregate for 7.3) */
DATA(insert OID = 2100 ( avg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ ));
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e3a31af..d2bae2d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -172,6 +172,7 @@ extern void vac_update_relstats(Relation relation,
BlockNumber num_pages,
double num_tuples,
BlockNumber num_all_visible_pages,
+ BlockNumber num_all_frozen_pages,
bool hasindex,
TransactionId frozenxid,
MultiXactId minmulti,
diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h
index a2f78ee..7bf2718 100644
--- a/src/include/storage/bufpage.h
+++ b/src/include/storage/bufpage.h
@@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader;
* tuple? */
#define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to
* everyone */
+#define PD_ALL_FROZEN 0x0008 /* all tuples on page are completely
+ frozen */
-#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
+#define PD_VALID_FLAG_BITS 0x000F /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
@@ -369,6 +371,13 @@ typedef PageHeaderData *PageHeader;
#define PageClearAllVisible(page) \
(((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE)
+#define PageIsAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags & PD_ALL_FROZEN)
+#define PageSetAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN)
+#define PageClearAllFrozen(page) \
+ (((PageHeader) (page))->pd_flags &= ~PD_ALL_FROZEN)
+
#define PageIsPrunable(page, oldestxmin) \
( \
AssertMacro(TransactionIdIsNormal(oldestxmin)), \
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers