On Fri, Jul 10, 2015 at 3:05 AM, Sawada Masahiko <sawada.m...@gmail.com> wrote: > > Also something for pg_upgrade is also not yet. > > TODO > - Test case for this feature > - pg_upgrade support. >
I had forgotten to change the fork name of visibility map to "vfm". Attached latest v7 patch. Please review it. Regards, -- Sawada Masahiko
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c index 22c5f7a..b1b6a06 100644 --- a/contrib/pgstattuple/pgstatapprox.c +++ b/contrib/pgstattuple/pgstatapprox.c @@ -87,7 +87,7 @@ statapprox_heap(Relation rel, output_type *stat) * If the page has only visible tuples, then we can find out the free * space from the FSM and move on. */ - if (visibilitymap_test(rel, blkno, &vmbuffer)) + if (visibilitymap_test(rel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE)) { freespace = GetRecordedFreeSpace(rel, blkno); stat->tuple_len += BLCKSZ - freespace; diff --git a/src/backend/access/heap/Makefile b/src/backend/access/heap/Makefile index b83d496..806ce27 100644 --- a/src/backend/access/heap/Makefile +++ b/src/backend/access/heap/Makefile @@ -12,6 +12,7 @@ subdir = src/backend/access/heap top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global -OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o +OBJS = heapam.o hio.o pruneheap.o rewriteheap.o syncscan.o tuptoaster.o visibilitymap.o \ + heapfuncs.o include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index 86a2e6b..ac74100 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -88,7 +88,7 @@ static HeapTuple heap_prepare_insert(Relation relation, HeapTuple tup, static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf, Buffer newbuf, HeapTuple oldtup, HeapTuple newtup, HeapTuple old_key_tup, - bool all_visible_cleared, bool new_all_visible_cleared); + bool all_visible_cleared, bool new_all_visible_cleared); static void HeapSatisfiesHOTandKeyUpdate(Relation relation, Bitmapset *hot_attrs, Bitmapset *key_attrs, Bitmapset *id_attrs, @@ -2131,8 +2131,9 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, CheckForSerializableConflictIn(relation, NULL, InvalidBuffer); /* - * Find buffer to insert this tuple into. If the page is all visible, - * this will also pin the requisite visibility map page. + * Find buffer to insert this tuple into. If the page is all visible + * or all frozen, this will also pin the requisite visibility map and + * frozen map page. */ buffer = RelationGetBufferForTuple(relation, heaptup->t_len, InvalidBuffer, options, bistate, @@ -2147,10 +2148,13 @@ heap_insert(Relation relation, HeapTuple tup, CommandId cid, if (PageIsAllVisible(BufferGetPage(buffer))) { all_visible_cleared = true; + + /* all-frozen information is also cleared at the same time */ PageClearAllVisible(BufferGetPage(buffer)); + PageClearAllFrozen(BufferGetPage(buffer)); + visibilitymap_clear(relation, - ItemPointerGetBlockNumber(&(heaptup->t_self)), - vmbuffer); + ItemPointerGetBlockNumber(&(heaptup->t_self)), vmbuffer); } /* @@ -2448,10 +2452,12 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, if (PageIsAllVisible(page)) { all_visible_cleared = true; + + /* all-frozen information is also cleared at the same time */ PageClearAllVisible(page); - visibilitymap_clear(relation, - BufferGetBlockNumber(buffer), - vmbuffer); + PageClearAllFrozen(page); + + visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer); } /* @@ -2495,7 +2501,9 @@ heap_multi_insert(Relation relation, HeapTuple *tuples, int ntuples, /* the rest of the scratch space is used for tuple data */ tupledata = scratchptr; - xlrec->flags = all_visible_cleared ? XLH_INSERT_ALL_VISIBLE_CLEARED : 0; + if (all_visible_cleared) + xlrec->flags = XLH_INSERT_ALL_VISIBLE_CLEARED; + xlrec->ntuples = nthispage; /* @@ -2731,9 +2739,9 @@ heap_delete(Relation relation, ItemPointer tid, /* * If we didn't pin the visibility map page and the page has become all - * visible while we were busy locking the buffer, we'll have to unlock and - * re-lock, to avoid holding the buffer lock across an I/O. That's a bit - * unfortunate, but hopefully shouldn't happen often. + * visible or all frozen while we were busy locking the buffer, we'll + * have to unlock and re-lock, to avoid holding the buffer lock across an + * I/O. That's a bit unfortunate, but hopefully shouldn't happen often. */ if (vmbuffer == InvalidBuffer && PageIsAllVisible(page)) { @@ -2925,12 +2933,16 @@ l1: */ PageSetPrunable(page, xid); + /* clear PD_ALL_VISIBLE and PD_ALL_FORZEN flags */ if (PageIsAllVisible(page)) { all_visible_cleared = true; + + /* all-frozen information is also cleared at the same time */ PageClearAllVisible(page); - visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer); + PageClearAllFrozen(page); + + visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer); } /* store transaction information of xact deleting the tuple */ @@ -2961,7 +2973,9 @@ l1: if (RelationIsAccessibleInLogicalDecoding(relation)) log_heap_new_cid(relation, &tp); - xlrec.flags = all_visible_cleared ? XLH_DELETE_ALL_VISIBLE_CLEARED : 0; + if (all_visible_cleared) + xlrec.flags = XLH_DELETE_ALL_VISIBLE_CLEARED; + xlrec.infobits_set = compute_infobits(tp.t_data->t_infomask, tp.t_data->t_infomask2); xlrec.offnum = ItemPointerGetOffsetNumber(&tp.t_self); @@ -3207,7 +3221,7 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup, * in the middle of changing this, so we'll need to recheck after we have * the lock. */ - if (PageIsAllVisible(page)) + if (PageIsAllVisible(page) || PageIsAllFrozen(page)) visibilitymap_pin(relation, block, &vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -3801,16 +3815,22 @@ l2: if (PageIsAllVisible(BufferGetPage(buffer))) { all_visible_cleared = true; + + /* all-frozen information is also cleared at the same time */ PageClearAllVisible(BufferGetPage(buffer)); - visibilitymap_clear(relation, BufferGetBlockNumber(buffer), - vmbuffer); + PageClearAllFrozen(BufferGetPage(buffer)); + + visibilitymap_clear(relation, BufferGetBlockNumber(buffer), vmbuffer); } if (newbuf != buffer && PageIsAllVisible(BufferGetPage(newbuf))) { all_visible_cleared_new = true; + + /* all-frozen information is also cleared at the same time */ PageClearAllVisible(BufferGetPage(newbuf)); - visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), - vmbuffer_new); + PageClearAllFrozen(BufferGetPage(newbuf)); + + visibilitymap_clear(relation, BufferGetBlockNumber(newbuf), vmbuffer_new); } if (newbuf != buffer) @@ -6893,7 +6913,7 @@ log_heap_freeze(Relation reln, Buffer buffer, TransactionId cutoff_xid, */ XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, - TransactionId cutoff_xid) + TransactionId cutoff_xid, uint8 vmflags) { xl_heap_visible xlrec; XLogRecPtr recptr; @@ -6903,6 +6923,7 @@ log_heap_visible(RelFileNode rnode, Buffer heap_buffer, Buffer vm_buffer, Assert(BufferIsValid(vm_buffer)); xlrec.cutoff_xid = cutoff_xid; + xlrec.flags = vmflags; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, SizeOfHeapVisible); @@ -7492,8 +7513,14 @@ heap_xlog_visible(XLogReaderState *record) * the subsequent update won't be replayed to clear the flag. */ page = BufferGetPage(buffer); - PageSetAllVisible(page); + + if (xlrec->flags & VISIBILITYMAP_ALL_VISIBLE) + PageSetAllVisible(page); + if (xlrec->flags & VISIBILITYMAP_ALL_FROZEN) + PageSetAllFrozen(page); + MarkBufferDirty(buffer); + } else if (action == BLK_RESTORED) { @@ -7544,7 +7571,7 @@ heap_xlog_visible(XLogReaderState *record) */ if (lsn > PageGetLSN(vmpage)) visibilitymap_set(reln, blkno, InvalidBuffer, lsn, vmbuffer, - xlrec->cutoff_xid); + xlrec->cutoff_xid, xlrec->flags); ReleaseBuffer(vmbuffer); FreeFakeRelcacheEntry(reln); @@ -7694,7 +7721,10 @@ heap_xlog_delete(XLogReaderState *record) PageSetPrunable(page, XLogRecGetXid(record)); if (xlrec->flags & XLH_DELETE_ALL_VISIBLE_CLEARED) + { PageClearAllVisible(page); + PageClearAllFrozen(page); + } /* Make sure there is no forward chain link in t_ctid */ htup->t_ctid = target_tid; @@ -7798,7 +7828,10 @@ heap_xlog_insert(XLogReaderState *record) PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) + { PageClearAllVisible(page); + PageClearAllFrozen(page); + } MarkBufferDirty(buffer); } @@ -7937,7 +7970,10 @@ heap_xlog_multi_insert(XLogReaderState *record) PageSetLSN(page, lsn); if (xlrec->flags & XLH_INSERT_ALL_VISIBLE_CLEARED) + { PageClearAllVisible(page); + PageClearAllFrozen(page); + } MarkBufferDirty(buffer); } @@ -8065,7 +8101,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) PageSetPrunable(page, XLogRecGetXid(record)); if (xlrec->flags & XLH_UPDATE_OLD_ALL_VISIBLE_CLEARED) + { PageClearAllVisible(page); + PageClearAllFrozen(page); + } PageSetLSN(page, lsn); MarkBufferDirty(obuffer); @@ -8200,7 +8239,10 @@ heap_xlog_update(XLogReaderState *record, bool hot_update) elog(PANIC, "heap_update_redo: failed to add tuple"); if (xlrec->flags & XLH_UPDATE_NEW_ALL_VISIBLE_CLEARED) + { PageClearAllVisible(page); + PageClearAllFrozen(page); + } freespace = PageGetHeapFreeSpace(page); /* needed to update FSM below */ diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c index 7c38772..13ad5b1 100644 --- a/src/backend/access/heap/visibilitymap.c +++ b/src/backend/access/heap/visibilitymap.c @@ -21,33 +21,44 @@ * * NOTES * - * The visibility map is a bitmap with one bit per heap page. A set bit means - * that all tuples on the page are known visible to all transactions, and - * therefore the page doesn't need to be vacuumed. The map is conservative in - * the sense that we make sure that whenever a bit is set, we know the - * condition is true, but if a bit is not set, it might or might not be true. + * The visibility map is a bitmap with two bits (all-visible and all-frozen) + * per heap page. A set all-visible bit means that all tuples on the page are + * known visible to all transactions, and therefore the page doesn't need to + * be vacuumed. A set all-frozen bit means that all tuples on the page are + * completely frozen, and therefor the page doesn't need to be vacuumed even + * if whole table scanning vacuum is required. A all-frozen bit must be set only + * when the page is already all-visible. That is, all-frozen bit is always set + * with all-visible bit. + * The map is conservative in the sense that we make sure that whenever a bit + * is set, we know the condition is true, but if a bit is not set, it might or + * might not be true. * * Clearing a visibility map bit is not separately WAL-logged. The callers * must make sure that whenever a bit is cleared, the bit is cleared on WAL - * replay of the updating operation as well. + * replay of the updating operation as well. And all-frozen bit must be + * cleared with all-visible at the same time. * * When we *set* a visibility map during VACUUM, we must write WAL. This may * seem counterintuitive, since the bit is basically a hint: if it is clear, - * it may still be the case that every tuple on the page is visible to all - * transactions; we just don't know that for certain. The difficulty is that - * there are two bits which are typically set together: the PD_ALL_VISIBLE bit - * on the page itself, and the visibility map bit. If a crash occurs after the - * visibility map page makes it to disk and before the updated heap page makes - * it to disk, redo must set the bit on the heap page. Otherwise, the next - * insert, update, or delete on the heap page will fail to realize that the - * visibility map bit must be cleared, possibly causing index-only scans to - * return wrong answers. + * it may still be the case that every tuple on the page is visible or frozen + * to all transactions; we just don't know that for certain. The difficulty is + * that there are two bits which are typically set together: the PD_ALL_VISIBLE + * or PD_ALL_FROZEN bit on the page itself, and the visibility map bit. If a + * crash occurs after the visibility map page makes it to disk and before the + * updated heap page makes it to disk, redo must set the bit on the heap page. + * Otherwise, the next insert, update, or delete on the heap page will fail to + * realize that the visibility map bit must be cleared, possibly causing index-only + * scans to return wrong answers. * * VACUUM will normally skip pages for which the visibility map bit is set; * such pages can't contain any dead tuples and therefore don't need vacuuming. - * The visibility map is not used for anti-wraparound vacuums, because + * The visibility map is not used for anti-wraparound vacuums before 9.5, because * an anti-wraparound vacuum needs to freeze tuples and observe the latest xid * present in the table, even on pages that don't have any dead tuples. + * 9.6 or later, the visibility map has a additional bit which indicates all tuple + * on single page has been completely forzen, so the visibility map is also used for + * anti-wraparound vacuums. + * * * LOCKING * @@ -58,14 +69,14 @@ * section that logs the page modification. However, we don't want to hold * the buffer lock over any I/O that may be required to read in the visibility * map page. To avoid this, we examine the heap page before locking it; - * if the page-level PD_ALL_VISIBLE bit is set, we pin the visibility map - * bit. Then, we lock the buffer. But this creates a race condition: there - * is a possibility that in the time it takes to lock the buffer, the - * PD_ALL_VISIBLE bit gets set. If that happens, we have to unlock the - * buffer, pin the visibility map page, and relock the buffer. This shouldn't - * happen often, because only VACUUM currently sets visibility map bits, - * and the race will only occur if VACUUM processes a given page at almost - * exactly the same time that someone tries to further modify it. + * if the page-level PD_ALL_VISIBLE or PD_ALL_FROZEN bit is set, we pin the + * visibility map bit. Then, we lock the buffer. But this creates a race + * condition: there is a possibility that in the time it takes to lock the + * buffer, the PD_ALL_VISIBLE or PD_ALL_FROZEN bit gets set. If that happens, + * we have to unlock the buffer, pin the visibility map page, and relock the + * buffer. This shouldn't happen often, because only VACUUM currently sets + * visibility map bits, and the race will only occur if VACUUM processes a given + * page at almost exactly the same time that someone tries to further modify it. * * To set a bit, you need to hold a lock on the heap page. That prevents * the race condition where VACUUM sees that all tuples on the page are @@ -101,11 +112,14 @@ */ #define MAPSIZE (BLCKSZ - MAXALIGN(SizeOfPageHeaderData)) -/* Number of bits allocated for each heap block. */ -#define BITS_PER_HEAPBLOCK 1 +/* + * Number of bits allocated for each heap block. + * One for all-visible, other for all-frozen. +*/ +#define BITS_PER_HEAPBLOCK 2 /* Number of heap blocks we can represent in one byte. */ -#define HEAPBLOCKS_PER_BYTE 8 +#define HEAPBLOCKS_PER_BYTE 4 /* Number of heap blocks we can represent in one visibility map page. */ #define HEAPBLOCKS_PER_PAGE (MAPSIZE * HEAPBLOCKS_PER_BYTE) @@ -115,24 +129,42 @@ #define HEAPBLK_TO_MAPBYTE(x) (((x) % HEAPBLOCKS_PER_PAGE) / HEAPBLOCKS_PER_BYTE) #define HEAPBLK_TO_MAPBIT(x) ((x) % HEAPBLOCKS_PER_BYTE) -/* table for fast counting of set bits */ -static const uint8 number_of_ones[256] = { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +/* tables for fast counting of set bits for visible and freeze */ +static const uint8 number_of_ones_for_visible[256] = { + 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, + 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 0, 1, 0, 1, 1, 2, 1, 2, 0, 1, 0, 1, 1, 2, 1, 2, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4, + 1, 2, 1, 2, 2, 3, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, + 2, 3, 2, 3, 3, 4, 3, 4, 2, 3, 2, 3, 3, 4, 3, 4 +}; +static const uint8 number_of_ones_for_frozen[256] = { + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 1, 1, 2, 2, 1, 1, 2, 2, 2, 2, 3, 3, 2, 2, 3, 3, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4, + 2, 2, 3, 3, 2, 2, 3, 3, 3, 3, 4, 4, 3, 3, 4, 4 }; /* prototypes for internal routines */ @@ -141,7 +173,7 @@ static void vm_extend(Relation rel, BlockNumber nvmblocks); /* - * visibilitymap_clear - clear a bit in visibility map + * visibilitymap_clear - clear all bits in visibility map * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do @@ -153,7 +185,8 @@ visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf) BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); int mapBit = HEAPBLK_TO_MAPBIT(heapBlk); - uint8 mask = 1 << mapBit; + uint8 mask = (VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN) << + (BITS_PER_HEAPBLOCK * mapBit); char *map; #ifdef TRACE_VISIBILITYMAP @@ -225,7 +258,7 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) } /* - * visibilitymap_set - set a bit on a previously pinned page + * visibilitymap_set - set bit(s) on a previously pinned page * * recptr is the LSN of the XLOG record we're replaying, if we're in recovery, * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the @@ -234,10 +267,11 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) * marked all-visible; it is needed for Hot Standby, and can be * InvalidTransactionId if the page contains no tuples. * - * Caller is expected to set the heap page's PD_ALL_VISIBLE bit before calling - * this function. Except in recovery, caller should also pass the heap - * buffer. When checksums are enabled and we're not in recovery, we must add - * the heap buffer to the WAL chain to protect it from being torn. + * Caller is expected to set the heap page's PD_ALL_VISIBLE or PD_ALL_FROZEN + * bit before calling this function. Except in recovery, caller should also + * pass the heap buffer and flags which indicates what flag we want to set. + * When checksums are enabled and we're not in recovery, we must add the heap + * buffer to the WAL chain to protect it from being torn. * * You must pass a buffer containing the correct map page to this function. * Call visibilitymap_pin first to pin the right one. This function doesn't do @@ -245,7 +279,8 @@ visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf) */ void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, - XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid) + XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, + uint8 flags) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); @@ -254,7 +289,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, char *map; #ifdef TRACE_VISIBILITYMAP - elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk); + elog(DEBUG1, "vm_set %s %d %u", RelationGetRelationName(rel), heapBlk, flags); #endif Assert(InRecovery || XLogRecPtrIsInvalid(recptr)); @@ -272,11 +307,11 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, map = PageGetContents(page); LockBuffer(vmBuf, BUFFER_LOCK_EXCLUSIVE); - if (!(map[mapByte] & (1 << mapBit))) + if (flags != (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit)))) { START_CRIT_SECTION(); - map[mapByte] |= (1 << mapBit); + map[mapByte] |= (flags << (BITS_PER_HEAPBLOCK * mapBit)); MarkBufferDirty(vmBuf); if (RelationNeedsWAL(rel)) @@ -285,7 +320,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, { Assert(!InRecovery); recptr = log_heap_visible(rel->rd_node, heapBuf, vmBuf, - cutoff_xid); + cutoff_xid, flags); /* * If data checksums are enabled (or wal_log_hints=on), we @@ -295,11 +330,15 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, { Page heapPage = BufferGetPage(heapBuf); - /* caller is expected to set PD_ALL_VISIBLE first */ - Assert(PageIsAllVisible(heapPage)); + /* + * caller is expected to set PD_ALL_VISIBLE or + * PD_ALL_FROZEN first. + */ + Assert(PageIsAllVisible(heapPage) || PageIsAllFrozen(heapPage)); PageSetLSN(heapPage, recptr); } } + PageSetLSN(page, recptr); } @@ -310,15 +349,16 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, } /* - * visibilitymap_test - test if a bit is set + * visibilitymap_test - test if bit(s) is set * - * Are all tuples on heapBlk visible to all, according to the visibility map? + * Are all tuples on heapBlk visible or frozen to all, according to the visibility map? * * On entry, *buf should be InvalidBuffer or a valid buffer returned by an * earlier call to visibilitymap_pin or visibilitymap_test on the same * relation. On return, *buf is a valid buffer with the map page containing * the bit for heapBlk, or InvalidBuffer. The caller is responsible for - * releasing *buf after it's done testing and setting bits. + * releasing *buf after it's done testing and setting bits, and must set flags + * which indicates what flag we want to test. * * NOTE: This function is typically called without a lock on the heap page, * so somebody else could change the bit just after we look at it. In fact, @@ -328,7 +368,7 @@ visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, * all concurrency issues! */ bool -visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) +visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf, uint8 flags) { BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk); uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk); @@ -337,7 +377,7 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) char *map; #ifdef TRACE_VISIBILITYMAP - elog(DEBUG1, "vm_test %s %d", RelationGetRelationName(rel), heapBlk); + elog(DEBUG1, "vm_test %s %d %u", RelationGetRelationName(rel), heapBlk, flags); #endif /* Reuse the old pinned buffer if possible */ @@ -360,11 +400,12 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) map = PageGetContents(BufferGetPage(*buf)); /* - * A single-bit read is atomic. There could be memory-ordering effects + * A single or double bit read is atomic. There could be memory-ordering effects * here, but for performance reasons we make it the caller's job to worry * about that. */ - result = (map[mapByte] & (1 << mapBit)) ? true : false; + result = (map[mapByte] & (flags << (BITS_PER_HEAPBLOCK * mapBit))) ? + true : false; return result; } @@ -374,10 +415,11 @@ visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *buf) * * Note: we ignore the possibility of race conditions when the table is being * extended concurrently with the call. New pages added to the table aren't - * going to be marked all-visible, so they won't affect the result. + * going to be marked all-visible or all-frozen, so they won't affect the result. + * The caller must set the flags which indicates what flag we want to count. */ BlockNumber -visibilitymap_count(Relation rel) +visibilitymap_count(Relation rel, uint8 flags) { BlockNumber result = 0; BlockNumber mapBlock; @@ -406,7 +448,10 @@ visibilitymap_count(Relation rel) for (i = 0; i < MAPSIZE; i++) { - result += number_of_ones[map[i]]; + if (flags & VISIBILITYMAP_ALL_VISIBLE) + result += number_of_ones_for_visible[map[i]]; + if (flags & VISIBILITYMAP_ALL_FROZEN) + result += number_of_ones_for_frozen[map[i]]; } ReleaseBuffer(mapBuffer); diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 4246554..015bfb8 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -1919,11 +1919,18 @@ index_update_stats(Relation rel, { BlockNumber relpages = RelationGetNumberOfBlocks(rel); BlockNumber relallvisible; + BlockNumber relallfrozen; if (rd_rel->relkind != RELKIND_INDEX) - relallvisible = visibilitymap_count(rel); + { + relallvisible = visibilitymap_count(rel, VISIBILITYMAP_ALL_VISIBLE); + relallfrozen = visibilitymap_count(rel, VISIBILITYMAP_ALL_FROZEN); + } else /* don't bother for indexes */ + { relallvisible = 0; + relallfrozen = 0; + } if (rd_rel->relpages != (int32) relpages) { @@ -1940,6 +1947,11 @@ index_update_stats(Relation rel, rd_rel->relallvisible = (int32) relallvisible; dirty = true; } + if (rd_rel->relallfrozen != (int32) relallfrozen) + { + rd_rel->relallfrozen = (int32) relallfrozen; + dirty = true; + } } /* diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 861048f..392c2a4 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -572,7 +572,8 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, vac_update_relstats(onerel, relpages, totalrows, - visibilitymap_count(onerel), + visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE), + visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN), hasindex, InvalidTransactionId, InvalidMultiXactId, @@ -595,6 +596,7 @@ do_analyze_rel(Relation onerel, int options, VacuumParams *params, RelationGetNumberOfBlocks(Irel[ind]), totalindexrows, 0, + 0, false, InvalidTransactionId, InvalidMultiXactId, diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c index 7ab4874..d3725dd 100644 --- a/src/backend/commands/cluster.c +++ b/src/backend/commands/cluster.c @@ -22,6 +22,7 @@ #include "access/rewriteheap.h" #include "access/transam.h" #include "access/tuptoaster.h" +#include "access/visibilitymap.h" #include "access/xact.h" #include "access/xlog.h" #include "catalog/catalog.h" diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index baf66f1..d68c7c4 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -744,6 +744,7 @@ void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, + BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, bool in_outer_xact) @@ -781,6 +782,11 @@ vac_update_relstats(Relation relation, pgcform->relallvisible = (int32) num_all_visible_pages; dirty = true; } + if (pgcform->relallfrozen != (int32) num_all_frozen_pages) + { + pgcform->relallfrozen = (int32) num_all_frozen_pages; + dirty = true; + } /* Apply DDL updates, but not inside an outer transaction (see above) */ diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c index a01cfb4..12322a4 100644 --- a/src/backend/commands/vacuumlazy.c +++ b/src/backend/commands/vacuumlazy.c @@ -106,6 +106,8 @@ typedef struct LVRelStats BlockNumber rel_pages; /* total number of pages */ BlockNumber scanned_pages; /* number of pages we examined */ BlockNumber pinskipped_pages; /* # of pages we skipped due to a pin */ + BlockNumber vmskipped_frozen_pages; /* # of pages we skipped by all-frozen bit + of visibility map */ double scanned_tuples; /* counts only tuples on scanned pages */ double old_rel_tuples; /* previous value of pg_class.reltuples */ double new_rel_tuples; /* new estimated total # of tuples */ @@ -156,7 +158,7 @@ static void lazy_record_dead_tuple(LVRelStats *vacrelstats, static bool lazy_tid_reaped(ItemPointer itemptr, void *state); static int vac_cmp_itemptr(const void *left, const void *right); static bool heap_page_is_all_visible(Relation rel, Buffer buf, - TransactionId *visibility_cutoff_xid); + TransactionId *visibility_cutoff_xid, bool *all_frozen); /* @@ -188,7 +190,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, MultiXactId mxactFullScanLimit; BlockNumber new_rel_pages; double new_rel_tuples; - BlockNumber new_rel_allvisible; + BlockNumber new_rel_allvisible, + new_rel_allfrozen; double new_live_tuples; TransactionId new_frozen_xid; MultiXactId new_min_multi; @@ -222,6 +225,8 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, * than or equal to the requested Xid full-table scan limit; or if the * table's minimum MultiXactId is older than or equal to the requested * mxid full-table scan limit. + * Even if scan_all is set so far, we could skip to scan some pages + * according by frozen map. */ scan_all = TransactionIdPrecedesOrEquals(onerel->rd_rel->relfrozenxid, xidFullScanLimit); @@ -253,13 +258,16 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, * NB: We need to check this before truncating the relation, because that * will change ->rel_pages. */ - if (vacrelstats->scanned_pages < vacrelstats->rel_pages) + if ((vacrelstats->scanned_pages + vacrelstats->vmskipped_frozen_pages) + < vacrelstats->rel_pages) { Assert(!scan_all); scanned_all = false; } else + { scanned_all = true; + } /* * Optionally truncate the relation. @@ -301,10 +309,14 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, new_rel_tuples = vacrelstats->old_rel_tuples; } - new_rel_allvisible = visibilitymap_count(onerel); + new_rel_allvisible = visibilitymap_count(onerel, VISIBILITYMAP_ALL_VISIBLE); if (new_rel_allvisible > new_rel_pages) new_rel_allvisible = new_rel_pages; + new_rel_allfrozen = visibilitymap_count(onerel, VISIBILITYMAP_ALL_FROZEN); + if (new_rel_allfrozen > new_rel_pages) + new_rel_allfrozen = new_rel_pages; + new_frozen_xid = scanned_all ? FreezeLimit : InvalidTransactionId; new_min_multi = scanned_all ? MultiXactCutoff : InvalidMultiXactId; @@ -312,6 +324,7 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, new_rel_pages, new_rel_tuples, new_rel_allvisible, + new_rel_allfrozen, vacrelstats->hasindex, new_frozen_xid, new_min_multi, @@ -360,10 +373,11 @@ lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params, get_namespace_name(RelationGetNamespace(onerel)), RelationGetRelationName(onerel), vacrelstats->num_index_scans); - appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins\n"), + appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped due to pins, %u skipped frozen page according to visibility map\n"), vacrelstats->pages_removed, vacrelstats->rel_pages, - vacrelstats->pinskipped_pages); + vacrelstats->pinskipped_pages, + vacrelstats->vmskipped_frozen_pages); appendStringInfo(&buf, _("tuples: %.0f removed, %.0f remain, %.0f are dead but not yet removable\n"), vacrelstats->tuples_deleted, @@ -486,9 +500,12 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * consecutive pages. Since we're reading sequentially, the OS should be * doing readahead for us, so there's no gain in skipping a page now and * then; that's likely to disable readahead and so be counterproductive. - * Also, skipping even a single page means that we can't update - * relfrozenxid, so we only want to do it if we can skip a goodly number - * of pages. + * Also, skipping even a single page accorinding to all-visible bit of + * visibility map means that we can't update relfrozenxid, so we only want + * to do it if we can skip a goodly number. On the other hand, we count + * both how many pages we skipped according to all-frozen bit of visibility + * map and how many pages we freeze page, so we can update relfrozenxid if + * the sum of them is as many as pages of table. * * Before entering the main loop, establish the invariant that * next_not_all_visible_block is the next block number >= blkno that's not @@ -515,7 +532,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, next_not_all_visible_block < nblocks; next_not_all_visible_block++) { - if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer)) + if (!visibilitymap_test(onerel, next_not_all_visible_block, &vmbuffer, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN)) break; vacuum_delay_point(); } @@ -533,7 +551,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, bool tupgone, hastup; int prev_dead_count; - int nfrozen; + int nfrozen; /* # of tuples is frozen */ + int nalready_frozen; /* # of tuples is already frozen */ + int ntotal_frozen; /* # of tuples is in single page */ + int ntup_per_page; Size freespace; bool all_visible_according_to_vm; bool all_visible; @@ -548,7 +569,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, next_not_all_visible_block++) { if (!visibilitymap_test(onerel, next_not_all_visible_block, - &vmbuffer)) + &vmbuffer, + VISIBILITYMAP_ALL_VISIBLE | VISIBILITYMAP_ALL_FROZEN)) break; vacuum_delay_point(); } @@ -566,9 +588,25 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, } else { - /* Current block is all-visible */ - if (skipping_all_visible_blocks && !scan_all) - continue; + /* + * This block is at least all-visible according to visibility map. + * We check whehter this block is all-frozen to skip to vacuum this + * page even though scanning whole page is required. + */ + if (scan_all) + { + if (visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN)) + { + vacrelstats->vmskipped_frozen_pages++; + continue; + } + } + else + { + if (skipping_all_visible_blocks) + continue; + } + all_visible_according_to_vm = true; } @@ -740,7 +778,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, PageSetAllVisible(page); visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, - vmbuffer, InvalidTransactionId); + vmbuffer, InvalidTransactionId, + VISIBILITYMAP_ALL_VISIBLE); END_CRIT_SECTION(); } @@ -764,6 +803,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, all_visible = true; has_dead_tuples = false; nfrozen = 0; + nalready_frozen = 0; + ntup_per_page = 0; hastup = false; prev_dead_count = vacrelstats->num_dead_tuples; maxoff = PageGetMaxOffsetNumber(page); @@ -918,8 +959,13 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, else { num_tuples += 1; + ntup_per_page += 1;; hastup = true; + /* Check whether this tuple is alrady frozen or not */ + if (HeapTupleHeaderXminFrozen(tuple.t_data)) + nalready_frozen += 1; + /* * Each non-removable tuple must be checked to see if it needs * freezing. Note we already have exclusive buffer lock. @@ -931,9 +977,10 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, } /* scan along page */ /* - * If we froze any tuples, mark the buffer dirty, and write a WAL - * record recording the changes. We must log the changes to be - * crash-safe against future truncation of CLOG. + * If we froze any tuples or any tuples are already frozen, + * mark the buffer dirty, and write a WAL record recording the changes. + * We must log the changes to be crash-safe against future truncation + * of CLOG. */ if (nfrozen > 0) { @@ -966,6 +1013,9 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, END_CRIT_SECTION(); } + /* Compute the number of frozen tuples in a page */ + ntotal_frozen = nfrozen + nalready_frozen; + /* * If there are no indexes then we can vacuum the page right now * instead of doing a second scan. @@ -988,26 +1038,47 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, freespace = PageGetHeapFreeSpace(page); - /* mark page all-visible, if appropriate */ - if (all_visible && !all_visible_according_to_vm) + /* This page is all visible */ + if (all_visible) { - /* - * It should never be the case that the visibility map page is set - * while the page-level bit is clear, but the reverse is allowed - * (if checksums are not enabled). Regardless, set the both bits - * so that we get back in sync. - * - * NB: If the heap page is all-visible but the VM bit is not set, - * we don't need to dirty the heap page. However, if checksums - * are enabled, we do need to make sure that the heap page is - * dirtied before passing it to visibilitymap_set(), because it - * may be logged. Given that this situation should only happen in - * rare cases after a crash, it is not worth optimizing. - */ - PageSetAllVisible(page); - MarkBufferDirty(buf); - visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, - vmbuffer, visibility_cutoff_xid); + uint8 flags = 0; + + /* mark page all-visible, if appropriate */ + if (!all_visible_according_to_vm) + { + /* + * It should never be the case that the visibility map page is set + * while the page-level bit is clear, but the reverse is allowed + * (if checksums are not enabled). Regardless, set the both bits + * so that we get back in sync. + * + * NB: If the heap page is all-visible but the VM bit is not set, + * we don't need to dirty the heap page. However, if checksums + * are enabled, we do need to make sure that the heap page is + * dirtied before passing it to visibilitymap_set(), because it + * may be logged. Given that this situation should only happen in + * rare cases after a crash, it is not worth optimizing. + */ + PageSetAllVisible(page); + flags |= VISIBILITYMAP_ALL_VISIBLE; + } + + /* mark page all-frozen, if all tuples are frozen in total */ + if ((ntotal_frozen == ntup_per_page) && + !visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_FROZEN)) + { + Assert(PageIsAllVisible(page)); + + PageSetAllFrozen(page); + flags |= VISIBILITYMAP_ALL_FROZEN; + } + + if (flags) + { + MarkBufferDirty(buf); + visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, + vmbuffer, visibility_cutoff_xid, flags); + } } /* @@ -1018,7 +1089,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, * that something bad has happened. */ else if (all_visible_according_to_vm && !PageIsAllVisible(page) - && visibilitymap_test(onerel, blkno, &vmbuffer)) + && visibilitymap_test(onerel, blkno, &vmbuffer, VISIBILITYMAP_ALL_VISIBLE)) { elog(WARNING, "page is not marked all-visible but visibility map bit is set in relation \"%s\" page %u", relname, blkno); @@ -1047,6 +1118,17 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, visibilitymap_clear(onerel, blkno, vmbuffer); } + /* + * As a result of scanning a page, we set VM all-frozen bit and page header + * if all tuples of single page are frozen. + */ + if (ntotal_frozen == ntup_per_page) + { + PageSetAllFrozen(page); + visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr, vmbuffer, + InvalidTransactionId, VISIBILITYMAP_ALL_FROZEN); + } + UnlockReleaseBuffer(buf); /* Remember the location of the last page with nonremovable tuples */ @@ -1078,7 +1160,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, num_tuples); /* - * Release any remaining pin on visibility map page. + * Release any remaining pin on visibility map and frozen map page. */ if (BufferIsValid(vmbuffer)) { @@ -1126,6 +1208,8 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats, nunused); appendStringInfo(&buf, _("Skipped %u pages due to buffer pins.\n"), vacrelstats->pinskipped_pages); + appendStringInfo(&buf, _("Skipped %u frozen pages according to visibility map.\n"), + vacrelstats->vmskipped_frozen_pages); appendStringInfo(&buf, _("%u pages are entirely empty.\n"), empty_pages); appendStringInfo(&buf, _("%s."), @@ -1226,6 +1310,7 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, OffsetNumber unused[MaxOffsetNumber]; int uncnt = 0; TransactionId visibility_cutoff_xid; + bool all_frozen; START_CRIT_SECTION(); @@ -1277,19 +1362,31 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, * dirty, exclusively locked, and, if needed, a full page image has been * emitted in the log_heap_clean() above. */ - if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid)) + if (heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid, &all_frozen)) PageSetAllVisible(page); /* * All the changes to the heap page have been done. If the all-visible - * flag is now set, also set the VM bit. + * flag is now set, also set the VM all-visible bit. + * Also, if this page is all-frozen, set VM all-frozen bit and flag. */ - if (PageIsAllVisible(page) && - !visibilitymap_test(onerel, blkno, vmbuffer)) + if (PageIsAllVisible(page)) { - Assert(BufferIsValid(*vmbuffer)); - visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer, - visibility_cutoff_xid); + uint8 flags = 0; + + if (!visibilitymap_test(onerel, blkno, vmbuffer, VISIBILITYMAP_ALL_VISIBLE)) + flags |= VISIBILITYMAP_ALL_VISIBLE; + + /* mark page all-frozen, and set VM all-frozen bit */ + if (all_frozen) + { + PageSetAllFrozen(page); + flags |= VISIBILITYMAP_ALL_FROZEN; + } + + if (flags) + visibilitymap_set(onerel, blkno, buffer, InvalidXLogRecPtr, *vmbuffer, + visibility_cutoff_xid, flags); } return tupindex; @@ -1408,6 +1505,7 @@ lazy_cleanup_index(Relation indrel, stats->num_pages, stats->num_index_tuples, 0, + 0, false, InvalidTransactionId, InvalidMultiXactId, @@ -1782,7 +1880,8 @@ vac_cmp_itemptr(const void *left, const void *right) * xmin amongst the visible tuples. */ static bool -heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid) +heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid, + bool *all_frozen) { Page page = BufferGetPage(buf); BlockNumber blockno = BufferGetBlockNumber(buf); @@ -1791,6 +1890,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut bool all_visible = true; *visibility_cutoff_xid = InvalidTransactionId; + *all_frozen = true; /* * This is a stripped down version of the line pointer scan in @@ -1814,7 +1914,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut /* * Dead line pointers can have index pointers pointing to them. So - * they can't be treated as visible + * they can't be treated as visible and frozen. */ if (ItemIdIsDead(itemid)) { @@ -1855,6 +1955,10 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut /* Track newest xmin on page. */ if (TransactionIdFollows(xmin, *visibility_cutoff_xid)) *visibility_cutoff_xid = xmin; + + /* Check whether this tuple is alrady frozen or not */ + if (!HeapTupleHeaderXminFrozen(tuple.t_data)) + *all_frozen = false; } break; @@ -1863,6 +1967,7 @@ heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cut case HEAPTUPLE_INSERT_IN_PROGRESS: case HEAPTUPLE_DELETE_IN_PROGRESS: all_visible = false; + *all_frozen = false; break; default: diff --git a/src/backend/executor/nodeIndexonlyscan.c b/src/backend/executor/nodeIndexonlyscan.c index 9f54c46..08df289 100644 --- a/src/backend/executor/nodeIndexonlyscan.c +++ b/src/backend/executor/nodeIndexonlyscan.c @@ -116,7 +116,7 @@ IndexOnlyNext(IndexOnlyScanState *node) */ if (!visibilitymap_test(scandesc->heapRelation, ItemPointerGetBlockNumber(tid), - &node->ioss_VMBuffer)) + &node->ioss_VMBuffer, VISIBILITYMAP_ALL_VISIBLE)) { /* * Rats, we have to visit the heap to check visibility. diff --git a/src/backend/executor/nodeModifyTable.c b/src/backend/executor/nodeModifyTable.c index 874ca6a..376841a 100644 --- a/src/backend/executor/nodeModifyTable.c +++ b/src/backend/executor/nodeModifyTable.c @@ -127,7 +127,7 @@ ExecCheckPlanOutput(Relation resultRel, List *targetList) if (attno != resultDesc->natts) ereport(ERROR, (errcode(ERRCODE_DATATYPE_MISMATCH), - errmsg("table row type and query-specified row type do not match"), + errmsg("table row type and query-specified row type do not match"), errdetail("Query has too few columns."))); } diff --git a/src/common/relpath.c b/src/common/relpath.c index 66dfef1..5898f1b 100644 --- a/src/common/relpath.c +++ b/src/common/relpath.c @@ -30,11 +30,14 @@ * If you add a new entry, remember to update the errhint in * forkname_to_number() below, and update the SGML documentation for * pg_relation_size(). + * 9.6 or later, the visibility map fork name is changed from "vm" to + * "vfm" bacause visibility map has not only information about all-visible + * but also information about all-frozen. */ const char *const forkNames[] = { "main", /* MAIN_FORKNUM */ "fsm", /* FSM_FORKNUM */ - "vm", /* VISIBILITYMAP_FORKNUM */ + "vfm", /* VISIBILITYMAP_FORKNUM */ "init" /* INIT_FORKNUM */ }; diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h index caa0f14..93afb10 100644 --- a/src/include/access/heapam_xlog.h +++ b/src/include/access/heapam_xlog.h @@ -320,9 +320,10 @@ typedef struct xl_heap_freeze_page typedef struct xl_heap_visible { TransactionId cutoff_xid; + uint8 flags; } xl_heap_visible; -#define SizeOfHeapVisible (offsetof(xl_heap_visible, cutoff_xid) + sizeof(TransactionId)) +#define SizeOfHeapVisible (offsetof(xl_heap_visible, flags) + sizeof(uint8)) typedef struct xl_heap_new_cid { @@ -389,6 +390,6 @@ extern bool heap_prepare_freeze_tuple(HeapTupleHeader tuple, extern void heap_execute_freeze_tuple(HeapTupleHeader tuple, xl_heap_freeze_tuple *xlrec_tp); extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer, - Buffer vm_buffer, TransactionId cutoff_xid); + Buffer vm_buffer, TransactionId cutoff_xid, uint8 flags); #endif /* HEAPAM_XLOG_H */ diff --git a/src/include/access/visibilitymap.h b/src/include/access/visibilitymap.h index 0c0e0ef..7270609 100644 --- a/src/include/access/visibilitymap.h +++ b/src/include/access/visibilitymap.h @@ -19,15 +19,20 @@ #include "storage/buf.h" #include "utils/relcache.h" -extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, - Buffer vmbuf); +/* Flags for bit map */ +#define VISIBILITYMAP_ALL_VISIBLE 0x01 +#define VISIBILITYMAP_ALL_FROZEN 0x02 + +extern void visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer vmbuf); extern void visibilitymap_pin(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); extern bool visibilitymap_pin_ok(BlockNumber heapBlk, Buffer vmbuf); extern void visibilitymap_set(Relation rel, BlockNumber heapBlk, Buffer heapBuf, - XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid); -extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf); -extern BlockNumber visibilitymap_count(Relation rel); + XLogRecPtr recptr, Buffer vmBuf, TransactionId cutoff_xid, + uint8 flags); +extern bool visibilitymap_test(Relation rel, BlockNumber heapBlk, Buffer *vmbuf, + uint8 flags); +extern BlockNumber visibilitymap_count(Relation rel, uint8 flags); extern void visibilitymap_truncate(Relation rel, BlockNumber nheapblocks); #endif /* VISIBILITYMAP_H */ diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index 44ce2b3..1645add 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201507021 +#define CATALOG_VERSION_NO 201507101 #endif diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h index e526cd9..ea0f7c1 100644 --- a/src/include/catalog/pg_class.h +++ b/src/include/catalog/pg_class.h @@ -47,6 +47,8 @@ CATALOG(pg_class,1259) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83) BKI_SCHEMA_MACRO float4 reltuples; /* # of tuples (not always up-to-date) */ int32 relallvisible; /* # of all-visible blocks (not always * up-to-date) */ + int32 relallfrozen; /* # of all-frozen blocks (not always + up-to-date) */ Oid reltoastrelid; /* OID of toast table; 0 if none */ bool relhasindex; /* T if has (or has had) any indexes */ bool relisshared; /* T if shared across databases */ @@ -95,7 +97,7 @@ typedef FormData_pg_class *Form_pg_class; * ---------------- */ -#define Natts_pg_class 30 +#define Natts_pg_class 31 #define Anum_pg_class_relname 1 #define Anum_pg_class_relnamespace 2 #define Anum_pg_class_reltype 3 @@ -107,25 +109,26 @@ typedef FormData_pg_class *Form_pg_class; #define Anum_pg_class_relpages 9 #define Anum_pg_class_reltuples 10 #define Anum_pg_class_relallvisible 11 -#define Anum_pg_class_reltoastrelid 12 -#define Anum_pg_class_relhasindex 13 -#define Anum_pg_class_relisshared 14 -#define Anum_pg_class_relpersistence 15 -#define Anum_pg_class_relkind 16 -#define Anum_pg_class_relnatts 17 -#define Anum_pg_class_relchecks 18 -#define Anum_pg_class_relhasoids 19 -#define Anum_pg_class_relhaspkey 20 -#define Anum_pg_class_relhasrules 21 -#define Anum_pg_class_relhastriggers 22 -#define Anum_pg_class_relhassubclass 23 -#define Anum_pg_class_relrowsecurity 24 -#define Anum_pg_class_relispopulated 25 -#define Anum_pg_class_relreplident 26 -#define Anum_pg_class_relfrozenxid 27 -#define Anum_pg_class_relminmxid 28 -#define Anum_pg_class_relacl 29 -#define Anum_pg_class_reloptions 30 +#define Anum_pg_class_relallfrozen 12 +#define Anum_pg_class_reltoastrelid 13 +#define Anum_pg_class_relhasindex 14 +#define Anum_pg_class_relisshared 15 +#define Anum_pg_class_relpersistence 16 +#define Anum_pg_class_relkind 17 +#define Anum_pg_class_relnatts 18 +#define Anum_pg_class_relchecks 19 +#define Anum_pg_class_relhasoids 20 +#define Anum_pg_class_relhaspkey 21 +#define Anum_pg_class_relhasrules 22 +#define Anum_pg_class_relhastriggers 23 +#define Anum_pg_class_relhassubclass 24 +#define Anum_pg_class_relrowsecurity 25 +#define Anum_pg_class_relispopulated 26 +#define Anum_pg_class_relreplident 27 +#define Anum_pg_class_relfrozenxid 28 +#define Anum_pg_class_relminmxid 29 +#define Anum_pg_class_relacl 30 +#define Anum_pg_class_reloptions 31 /* ---------------- * initial contents of pg_class @@ -140,13 +143,13 @@ typedef FormData_pg_class *Form_pg_class; * Note: "3" in the relfrozenxid column stands for FirstNormalTransactionId; * similarly, "1" in relminmxid stands for FirstMultiXactId */ -DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1247 ( pg_type PGNSP 71 0 PGUID 0 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1249 ( pg_attribute PGNSP 75 0 PGUID 0 0 0 0 0 0 0 0 f f p r 21 0 f f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1255 ( pg_proc PGNSP 81 0 PGUID 0 0 0 0 0 0 0 0 f f p r 28 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); -DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 f f p r 30 0 t f f f f f t n 3 1 _null_ _null_ )); +DATA(insert OID = 1259 ( pg_class PGNSP 83 0 PGUID 0 0 0 0 0 0 0 0 f f p r 31 0 t f f f f f t n 3 1 _null_ _null_ )); DESCR(""); diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 6fd1278..7d9b93f 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -3213,6 +3213,12 @@ DESCR("sleep until the specified time"); DATA(insert OID = 2971 ( text PGNSP PGUID 12 1 0 0 0 f f f f t f i 1 0 25 "16" _null_ _null_ _null_ _null_ _null_ booltext _null_ _null_ _null_ )); DESCR("convert boolean to text"); +DATA(insert OID = 3298 ( pg_is_all_visible PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_visible _null_ _null_ _null_ )); +DESCR("true if the page is all visible"); +DATA(insert OID = 3299 ( pg_is_all_frozen PGNSP PGUID 12 1 0 0 0 f f f f t f v 2 0 16 "2205 20" _null_ _null_ _null_ _null_ _null_ pg_is_all_frozen _null_ _null_ _null_ )); +DESCR("true if the page is all frozen"); + + /* Aggregates (moved here from pg_aggregate for 7.3) */ DATA(insert OID = 2100 ( avg PGNSP PGUID 12 1 0 0 0 t f f f f f i 1 0 1700 "20" _null_ _null_ _null_ _null_ _null_ aggregate_dummy _null_ _null_ _null_ )); diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h index e3a31af..d2bae2d 100644 --- a/src/include/commands/vacuum.h +++ b/src/include/commands/vacuum.h @@ -172,6 +172,7 @@ extern void vac_update_relstats(Relation relation, BlockNumber num_pages, double num_tuples, BlockNumber num_all_visible_pages, + BlockNumber num_all_frozen_pages, bool hasindex, TransactionId frozenxid, MultiXactId minmulti, diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index a2f78ee..7bf2718 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -178,8 +178,10 @@ typedef PageHeaderData *PageHeader; * tuple? */ #define PD_ALL_VISIBLE 0x0004 /* all tuples on page are visible to * everyone */ +#define PD_ALL_FROZEN 0x0008 /* all tuples on page are completely + frozen */ -#define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */ +#define PD_VALID_FLAG_BITS 0x000F /* OR of all valid pd_flags bits */ /* * Page layout version number 0 is for pre-7.3 Postgres releases. @@ -369,6 +371,13 @@ typedef PageHeaderData *PageHeader; #define PageClearAllVisible(page) \ (((PageHeader) (page))->pd_flags &= ~PD_ALL_VISIBLE) +#define PageIsAllFrozen(page) \ + (((PageHeader) (page))->pd_flags & PD_ALL_FROZEN) +#define PageSetAllFrozen(page) \ + (((PageHeader) (page))->pd_flags |= PD_ALL_FROZEN) +#define PageClearAllFrozen(page) \ + (((PageHeader) (page))->pd_flags &= ~PD_ALL_FROZEN) + #define PageIsPrunable(page, oldestxmin) \ ( \ AssertMacro(TransactionIdIsNormal(oldestxmin)), \
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers