On Mon, Jan 28, 2013 at 07:24:04PM +0530, Pavan Deolasee wrote:
> On Wed, Jan 23, 2013 at 10:05 AM, Noah Misch <[email protected]> wrote:
>
> > You're the second commentator to be skittish about the patch's correctness,
> > so
> > I won't argue against a conservatism-motivated bounce of the patch.
>
> Can you please rebase the patch against the latest head ? I see
> Alvaro's and Simon's recent changes has bit-rotten the patch.
Attached.
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 5553,5584 **** HeapTupleHeaderAdvanceLatestRemovedXid(HeapTupleHeader tuple,
}
/*
- * Perform XLogInsert to register a heap cleanup info message. These
- * messages are sent once per VACUUM and are required because
- * of the phasing of removal operations during a lazy VACUUM.
- * see comments for vacuum_log_cleanup_info().
- */
- XLogRecPtr
- log_heap_cleanup_info(RelFileNode rnode, TransactionId latestRemovedXid)
- {
- xl_heap_cleanup_info xlrec;
- XLogRecPtr recptr;
- XLogRecData rdata;
-
- xlrec.node = rnode;
- xlrec.latestRemovedXid = latestRemovedXid;
-
- rdata.data = (char *) &xlrec;
- rdata.len = SizeOfHeapCleanupInfo;
- rdata.buffer = InvalidBuffer;
- rdata.next = NULL;
-
- recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_CLEANUP_INFO, &rdata);
-
- return recptr;
- }
-
- /*
* Perform XLogInsert for a heap-clean operation. Caller must already
* have modified the buffer and marked it dirty.
*
--- 5553,5558 ----
***************
*** 5930,5956 **** log_newpage_buffer(Buffer buffer)
}
/*
- * Handles CLEANUP_INFO
- */
- static void
- heap_xlog_cleanup_info(XLogRecPtr lsn, XLogRecord *record)
- {
- xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *)
XLogRecGetData(record);
-
- if (InHotStandby)
- ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node);
-
- /*
- * Actual operation is a no-op. Record type exists to provide a means
for
- * conflict processing to occur before we begin index vacuum actions.
see
- * vacuumlazy.c and also comments in btvacuumpage()
- */
-
- /* Backup blocks are not used in cleanup_info records */
- Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
- }
-
- /*
* Handles HEAP2_CLEAN record type
*/
static void
--- 5904,5909 ----
***************
*** 7057,7065 **** heap2_redo(XLogRecPtr lsn, XLogRecord *record)
case XLOG_HEAP2_CLEAN:
heap_xlog_clean(lsn, record);
break;
- case XLOG_HEAP2_CLEANUP_INFO:
- heap_xlog_cleanup_info(lsn, record);
- break;
case XLOG_HEAP2_VISIBLE:
heap_xlog_visible(lsn, record);
break;
--- 7010,7015 ----
*** a/src/backend/access/heap/pruneheap.c
--- b/src/backend/access/heap/pruneheap.c
***************
*** 121,133 **** heap_page_prune_opt(Relation relation, Buffer buffer,
TransactionId OldestXmin)
* have pruned while we hold pin.)
*/
if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
- {
- TransactionId ignore = InvalidTransactionId;
/* return value not
-
* needed */
-
/* OK to prune */
! (void) heap_page_prune(relation, buffer, OldestXmin,
true, &ignore);
! }
/* And release buffer lock */
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
--- 121,128 ----
* have pruned while we hold pin.)
*/
if (PageIsFull(page) || PageGetHeapFreeSpace(page) < minfree)
/* OK to prune */
! (void) heap_page_prune(relation, buffer, OldestXmin,
true);
/* And release buffer lock */
LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
***************
*** 148,159 **** heap_page_prune_opt(Relation relation, Buffer buffer,
TransactionId OldestXmin)
* send its own new total to pgstats, and we don't want this delta applied
* on top of that.)
*
! * Returns the number of tuples deleted from the page and sets
! * latestRemovedXid.
*/
int
heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
! bool report_stats, TransactionId
*latestRemovedXid)
{
int ndeleted = 0;
Page page = BufferGetPage(buffer);
--- 143,153 ----
* send its own new total to pgstats, and we don't want this delta applied
* on top of that.)
*
! * Returns the number of tuples deleted from the page.
*/
int
heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
! bool report_stats)
{
int ndeleted = 0;
Page page = BufferGetPage(buffer);
***************
*** 173,179 **** heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
* initialize the rest of our working state.
*/
prstate.new_prune_xid = InvalidTransactionId;
! prstate.latestRemovedXid = *latestRemovedXid;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
--- 167,173 ----
* initialize the rest of our working state.
*/
prstate.new_prune_xid = InvalidTransactionId;
! prstate.latestRemovedXid = InvalidTransactionId;
prstate.nredirected = prstate.ndead = prstate.nunused = 0;
memset(prstate.marked, 0, sizeof(prstate.marked));
***************
*** 277,284 **** heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
if (report_stats && ndeleted > prstate.ndead)
pgstat_update_heap_dead_tuples(relation, ndeleted -
prstate.ndead);
- *latestRemovedXid = prstate.latestRemovedXid;
-
/*
* XXX Should we update the FSM information of this page ?
*
--- 271,276 ----
*** a/src/backend/access/nbtree/nbtxlog.c
--- b/src/backend/access/nbtree/nbtxlog.c
***************
*** 728,738 **** btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
* update the page.
*
* Btree delete records can conflict with standby queries. You might
! * think that vacuum records would conflict as well, but we've handled
! * that already. XLOG_HEAP2_CLEANUP_INFO records provide the highest
xid
! * cleaned by the vacuum of the heap and so we can resolve any conflicts
! * just once when that arrives. After that we know that no conflicts
! * exist from individual btree vacuum records on that index.
*/
if (InHotStandby)
{
--- 728,736 ----
* update the page.
*
* Btree delete records can conflict with standby queries. You might
! * think that vacuum records would conflict as well. However, VACUUM
! * always removes the heap tuple first, and the XLOG_HEAP2_CLEAN record
! * from doing so is enough.
*/
if (InHotStandby)
{
*** a/src/backend/access/rmgrdesc/heapdesc.c
--- b/src/backend/access/rmgrdesc/heapdesc.c
***************
*** 149,161 **** heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
xlrec->node.relNode,
xlrec->block,
xlrec->latestRemovedXid);
}
- else if (info == XLOG_HEAP2_CLEANUP_INFO)
- {
- xl_heap_cleanup_info *xlrec = (xl_heap_cleanup_info *) rec;
-
- appendStringInfo(buf, "cleanup info: remxid %u",
- xlrec->latestRemovedXid);
- }
else if (info == XLOG_HEAP2_VISIBLE)
{
xl_heap_visible *xlrec = (xl_heap_visible *) rec;
--- 149,154 ----
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 11,17 ****
* on the number of tuples and pages we will keep track of at once.
*
* We are willing to use at most maintenance_work_mem memory space to keep
! * track of dead tuples. We initially allocate an array of TIDs of that size,
* with an upper limit that depends on table size (this limit ensures we don't
* allocate a huge area uselessly for vacuuming small tables). If the
array
* threatens to overflow, we suspend the heap scan phase and perform a pass of
--- 11,17 ----
* on the number of tuples and pages we will keep track of at once.
*
* We are willing to use at most maintenance_work_mem memory space to keep
! * track of dead TIDs. We initially allocate an array of TIDs of that size,
* with an upper limit that depends on table size (this limit ensures we don't
* allocate a huge area uselessly for vacuuming small tables). If the
array
* threatens to overflow, we suspend the heap scan phase and perform a pass of
***************
*** 19,25 ****
* TID array.
*
* If we're processing a table with no indexes, we can just vacuum each page
! * as we go; there's no need to save up multiple tuples to minimize the number
* of index scans performed. So we don't use maintenance_work_mem memory for
* the TID array, just enough to hold as many heap tuples as fit on one page.
*
--- 19,25 ----
* TID array.
*
* If we're processing a table with no indexes, we can just vacuum each page
! * as we go; there's no need to save up many TIDs to minimize the number
* of index scans performed. So we don't use maintenance_work_mem memory for
* the TID array, just enough to hold as many heap tuples as fit on one page.
*
***************
*** 109,121 **** typedef struct LVRelStats
BlockNumber pages_removed;
double tuples_deleted;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
! /* List of TIDs of tuples we intend to delete */
! /* NB: this list is ordered by TID address */
int num_dead_tuples; /* current # of entries
*/
int max_dead_tuples; /* # slots allocated in
array */
ItemPointer dead_tuples; /* array of ItemPointerData */
int num_index_scans;
- TransactionId latestRemovedXid;
bool lock_waiter_detected;
} LVRelStats;
--- 109,119 ----
BlockNumber pages_removed;
double tuples_deleted;
BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
! /* Ordered list of TIDs we intend to delete */
int num_dead_tuples; /* current # of entries
*/
int max_dead_tuples; /* # slots allocated in
array */
ItemPointer dead_tuples; /* array of ItemPointerData */
int num_index_scans;
bool lock_waiter_detected;
} LVRelStats;
***************
*** 340,388 **** lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
}
/*
- * For Hot Standby we need to know the highest transaction id that will
- * be removed by any change. VACUUM proceeds in a number of passes so
- * we need to consider how each pass operates. The first phase runs
- * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it
- * progresses - these will have a latestRemovedXid on each record.
- * In some cases this removes all of the tuples to be removed, though
- * often we have dead tuples with index pointers so we must remember them
- * for removal in phase 3. Index records for those rows are removed
- * in phase 2 and index blocks do not have MVCC information attached.
- * So before we can allow removal of any index tuples we need to issue
- * a WAL record containing the latestRemovedXid of rows that will be
- * removed in phase three. This allows recovery queries to block at the
- * correct place, i.e. before phase two, rather than during phase three
- * which would be after the rows have become inaccessible.
- */
- static void
- vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats)
- {
- /*
- * Skip this for relations for which no WAL is to be written, or if
we're
- * not trying to support archive recovery.
- */
- if (!RelationNeedsWAL(rel) || !XLogIsNeeded())
- return;
-
- /*
- * No need to write the record at all unless it contains a valid value
- */
- if (TransactionIdIsValid(vacrelstats->latestRemovedXid))
- (void) log_heap_cleanup_info(rel->rd_node,
vacrelstats->latestRemovedXid);
- }
-
- /*
* lazy_scan_heap() -- scan an open heap relation
*
* This routine prunes each page in the heap, which will among
other
* things truncate dead tuples to dead line pointers, defragment
the
! * page, and set commit status bits (see heap_page_prune). It
also builds
! * lists of dead tuples and pages with free space, calculates
statistics
! * on the number of live tuples in the heap, and marks pages as
! * all-visible if appropriate. When done, or when we run low on
space for
! * dead-tuple TIDs, invoke vacuuming of indexes and call
lazy_vacuum_heap
! * to reclaim dead line pointers.
*
* If there are no indexes then we can reclaim line pointers on
the fly;
* dead line pointers need only be retained until all index
pointers that
--- 338,353 ----
}
/*
* lazy_scan_heap() -- scan an open heap relation
*
* This routine prunes each page in the heap, which will among
other
* things truncate dead tuples to dead line pointers, defragment
the
! * page, and set commit status bits (see heap_page_prune). It then
! * builds lists of dead line pointers, tracks feasibility of
truncating
! * the end of the heap, calculates statistics on the number of live
! * tuples, and marks pages as all-visible if appropriate. When
done, or
! * when we run low on space for dead-tuple TIDs, invoke vacuuming
of
! * indexes and call lazy_vacuum_heap to reclaim dead line pointers.
*
* If there are no indexes then we can reclaim line pointers on
the fly;
* dead line pointers need only be retained until all index
pointers that
***************
*** 398,407 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
char *relname;
BlockNumber empty_pages,
vacuumed_pages;
! double num_tuples,
! tups_vacuumed,
! nkeep,
! nunused;
IndexBulkDeleteResult **indstats;
int i;
PGRUsage ru0;
--- 363,372 ----
char *relname;
BlockNumber empty_pages,
vacuumed_pages;
! double num_tuples, /* kept: any reason */
! tups_vacuumed, /* truncated to LP_DEAD */
! nkeep, /* kept: writer
finished too recently */
! nunused; /* already LP_UNUSED */
IndexBulkDeleteResult **indstats;
int i;
PGRUsage ru0;
***************
*** 427,433 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->rel_pages = nblocks;
vacrelstats->scanned_pages = 0;
vacrelstats->nonempty_pages = 0;
- vacrelstats->latestRemovedXid = InvalidTransactionId;
lazy_space_alloc(vacrelstats, nblocks);
--- 392,397 ----
***************
*** 481,495 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
Page page;
OffsetNumber offnum,
maxoff;
! bool tupgone,
! hastup;
! int prev_dead_count;
OffsetNumber frozen[MaxOffsetNumber];
int nfrozen;
Size freespace;
bool all_visible_according_to_vm;
- bool all_visible;
- bool has_dead_tuples;
TransactionId visibility_cutoff_xid = InvalidTransactionId;
if (blkno == next_not_all_visible_block)
--- 445,460 ----
Page page;
OffsetNumber offnum,
maxoff;
! /* Page has items !LP_UNUSED && !LP_DEAD? */
! bool hastup,
! /* Page has items !LP_UNUSED && !(LP_NORMAL && HEAPTUPLE_LIVE)?
*/
! has_nonlive,
! /* Page has live tuples too new to be visible to some snapshot?
*/
! has_too_recent;
OffsetNumber frozen[MaxOffsetNumber];
int nfrozen;
Size freespace;
bool all_visible_according_to_vm;
TransactionId visibility_cutoff_xid = InvalidTransactionId;
if (blkno == next_not_all_visible_block)
***************
*** 545,566 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vmbuffer = InvalidBuffer;
}
- /* Log cleanup info before we touch indexes */
- vacuum_log_cleanup_info(onerel, vacrelstats);
-
/* Remove index entries */
for (i = 0; i < nindexes; i++)
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
! /* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
! /*
! * Forget the now-vacuumed tuples, and press on, but be
careful
! * not to reset latestRemovedXid since we want that
value to be
! * valid.
! */
vacrelstats->num_dead_tuples = 0;
vacrelstats->num_index_scans++;
}
--- 510,524 ----
vmbuffer = InvalidBuffer;
}
/* Remove index entries */
for (i = 0; i < nindexes; i++)
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
! /* Mark dead line pointers unused */
lazy_vacuum_heap(onerel, vacrelstats);
! /* Forget the now-vacuumed TIDs, and press on */
vacrelstats->num_dead_tuples = 0;
vacrelstats->num_index_scans++;
}
***************
*** 578,584 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
RBM_NORMAL,
vac_strategy);
! /* We need buffer cleanup lock so that we can prune HOT chains.
*/
if (!ConditionalLockBufferForCleanup(buf))
{
/*
--- 536,542 ----
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, blkno,
RBM_NORMAL,
vac_strategy);
! /* We need buffer cleanup lock so that we can prune. */
if (!ConditionalLockBufferForCleanup(buf))
{
/*
***************
*** 680,707 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
}
/*
! * Prune all HOT-update chains in this page.
! *
! * We count tuples removed by the pruning step as removed by
VACUUM.
*/
! tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin, false,
!
&vacrelstats->latestRemovedXid);
/*
* Now scan the page to collect vacuumable items and check for
tuples
* requiring freezing.
*/
- all_visible = true;
- has_dead_tuples = false;
nfrozen = 0;
hastup = false;
! prev_dead_count = vacrelstats->num_dead_tuples;
maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
ItemId itemid;
itemid = PageGetItemId(page, offnum);
--- 638,666 ----
}
/*
! * Remove dead tuples in this page. Certain heap-only tuples
will be
! * reduced straight to LP_UNUSED line pointers. Other tuples,
! * including all index-referenced tuples, will be reduced to
LP_DEAD
! * with no storage. We will finish the job by removing the
index
! * entries and changing them to LP_UNUSED.
*/
! tups_vacuumed += heap_page_prune(onerel, buf, OldestXmin,
false);
/*
* Now scan the page to collect vacuumable items and check for
tuples
* requiring freezing.
*/
nfrozen = 0;
hastup = false;
! has_nonlive = false;
! has_too_recent = false;
maxoff = PageGetMaxOffsetNumber(page);
for (offnum = FirstOffsetNumber;
offnum <= maxoff;
offnum = OffsetNumberNext(offnum))
{
ItemId itemid;
+ bool try_freeze = true;
itemid = PageGetItemId(page, offnum);
***************
*** 725,746 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* DEAD item pointers are to be vacuumed normally; but
we don't
* count them in tups_vacuumed, else we'd be
double-counting (at
* least in the common case where heap_page_prune()
just freed up
! * a non-HOT tuple).
*/
if (ItemIdIsDead(itemid))
{
lazy_record_dead_tuple(vacrelstats,
&(tuple.t_self));
! all_visible = false;
continue;
}
Assert(ItemIdIsNormal(itemid));
tuple.t_data = (HeapTupleHeader) PageGetItem(page,
itemid);
tuple.t_len = ItemIdGetLength(itemid);
- tupgone = false;
-
switch (HeapTupleSatisfiesVacuum(tuple.t_data,
OldestXmin, buf))
{
case HEAPTUPLE_DEAD:
--- 684,706 ----
* DEAD item pointers are to be vacuumed normally; but
we don't
* count them in tups_vacuumed, else we'd be
double-counting (at
* least in the common case where heap_page_prune()
just freed up
! * a tuple).
*/
if (ItemIdIsDead(itemid))
{
lazy_record_dead_tuple(vacrelstats,
&(tuple.t_self));
! has_nonlive = true;
continue;
}
Assert(ItemIdIsNormal(itemid));
+ num_tuples += 1;
+ hastup = true;
+
tuple.t_data = (HeapTupleHeader) PageGetItem(page,
itemid);
tuple.t_len = ItemIdGetLength(itemid);
switch (HeapTupleSatisfiesVacuum(tuple.t_data,
OldestXmin, buf))
{
case HEAPTUPLE_DEAD:
***************
*** 751,771 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* state changed since
heap_page_prune() looked. In
* particular an INSERT_IN_PROGRESS
tuple could have
* changed to DEAD if the inserter
aborted. So this
! * cannot be considered an error
condition.
*
! * If the tuple is HOT-updated then it
must only be
! * removed by a prune operation; so we
keep it just as if
! * it were RECENTLY_DEAD. Also, if
it's a heap-only
! * tuple, we choose to keep it, because
it'll be a lot
! * cheaper to get rid of it in the next
pruning pass than
! * to treat it like an indexed tuple.
*/
! if (HeapTupleIsHotUpdated(&tuple) ||
! HeapTupleIsHeapOnly(&tuple))
! nkeep += 1;
! else
! tupgone = true; /* we can
delete the tuple */
! all_visible = false;
break;
case HEAPTUPLE_LIVE:
/* Tuple is good --- but let's do some
validity checks */
--- 711,735 ----
* state changed since
heap_page_prune() looked. In
* particular an INSERT_IN_PROGRESS
tuple could have
* changed to DEAD if the inserter
aborted. So this
! * cannot be considered an error
condition, and it's not
! * worth the code to actually kill the
tuple here.
*
! * heap_freeze_tuple() can't cope with
HEAPTUPLE_DEAD.
! * Since our OldestXmin predates the
inserter's abort,
! * none of the tuple's XIDs should
qualify for freezing.
! * For this rarely-tested branch, burn
a few cycles
! * verifying that at runtime.
! *
! * In all other respects, treat this
outcome just like
! * HEAPTUPLE_RECENTLY_DEAD.
*/
! if
(heap_tuple_needs_freeze(tuple.t_data, FreezeLimit,
!
MultiXactFrzLimit, buf))
! elog(ERROR, "just-dead tuple
requires freezing");
! try_freeze = false;
!
! nkeep += 1;
! has_nonlive = true;
break;
case HEAPTUPLE_LIVE:
/* Tuple is good --- but let's do some
validity checks */
***************
*** 783,795 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* that the HEAP_XMIN_COMMITTED hint
bit is set because of
* that.
*/
! if (all_visible)
{
TransactionId xmin;
if (!(tuple.t_data->t_infomask
& HEAP_XMIN_COMMITTED))
{
! all_visible = false;
break;
}
--- 747,759 ----
* that the HEAP_XMIN_COMMITTED hint
bit is set because of
* that.
*/
! if (!has_too_recent)
{
TransactionId xmin;
if (!(tuple.t_data->t_infomask
& HEAP_XMIN_COMMITTED))
{
! has_too_recent = true;
break;
}
***************
*** 800,806 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
xmin =
HeapTupleHeaderGetXmin(tuple.t_data);
if
(!TransactionIdPrecedes(xmin, OldestXmin))
{
! all_visible = false;
break;
}
--- 764,770 ----
xmin =
HeapTupleHeaderGetXmin(tuple.t_data);
if
(!TransactionIdPrecedes(xmin, OldestXmin))
{
! has_too_recent = true;
break;
}
***************
*** 816,857 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* from relation.
*/
nkeep += 1;
! all_visible = false;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
/* This is an expected case during
concurrent vacuum */
! all_visible = false;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/* This is an expected case during
concurrent vacuum */
! all_visible = false;
break;
default:
elog(ERROR, "unexpected
HeapTupleSatisfiesVacuum result");
break;
}
! if (tupgone)
! {
! lazy_record_dead_tuple(vacrelstats,
&(tuple.t_self));
!
HeapTupleHeaderAdvanceLatestRemovedXid(tuple.t_data,
!
&vacrelstats->latestRemovedXid);
! tups_vacuumed += 1;
! has_dead_tuples = true;
! }
! else
! {
! num_tuples += 1;
! hastup = true;
!
! /*
! * Each non-removable tuple must be checked to
see if it needs
! * freezing. Note we already have exclusive
buffer lock.
! */
! if (heap_freeze_tuple(tuple.t_data, FreezeLimit,
!
MultiXactFrzLimit))
! frozen[nfrozen++] = offnum;
! }
} /* scan along
page */
/*
--- 780,808 ----
* from relation.
*/
nkeep += 1;
! has_nonlive = true;
break;
case HEAPTUPLE_INSERT_IN_PROGRESS:
/* This is an expected case during
concurrent vacuum */
! has_nonlive = true;
break;
case HEAPTUPLE_DELETE_IN_PROGRESS:
/* This is an expected case during
concurrent vacuum */
! has_nonlive = true;
break;
default:
elog(ERROR, "unexpected
HeapTupleSatisfiesVacuum result");
break;
}
! /*
! * Apart from the case(s) handled specially above, each
! * non-removable tuple must be checked to see if it
needs
! * freezing. Note we already have exclusive buffer
lock.
! */
! if (try_freeze && heap_freeze_tuple(tuple.t_data,
FreezeLimit,
!
MultiXactFrzLimit))
! frozen[nfrozen++] = offnum;
} /* scan along
page */
/*
***************
*** 880,893 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if (nindexes == 0 &&
vacrelstats->num_dead_tuples > 0)
{
! /* Remove tuples from heap */
lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
! /*
! * Forget the now-vacuumed tuples, and press on, but be
careful
! * not to reset latestRemovedXid since we want that
value to be
! * valid.
! */
vacrelstats->num_dead_tuples = 0;
vacuumed_pages++;
}
--- 831,840 ----
if (nindexes == 0 &&
vacrelstats->num_dead_tuples > 0)
{
! /* Mark dead line pointers unused */
lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats);
! /* Forget the now-vacuumed TIDs, and press on */
vacrelstats->num_dead_tuples = 0;
vacuumed_pages++;
}
***************
*** 895,901 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
freespace = PageGetHeapFreeSpace(page);
/* mark page all-visible, if appropriate */
! if (all_visible)
{
if (!PageIsAllVisible(page))
{
--- 842,848 ----
freespace = PageGetHeapFreeSpace(page);
/* mark page all-visible, if appropriate */
! if (!has_nonlive && !has_too_recent)
{
if (!PageIsAllVisible(page))
{
***************
*** 942,951 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
* means that the tuples that we think are not visible to
everyone yet
* actually are, and the PD_ALL_VISIBLE flag is correct.
*
! * There should never be dead tuples on a page with
PD_ALL_VISIBLE
! * set, however.
*/
! else if (PageIsAllVisible(page) && has_dead_tuples)
{
elog(WARNING, "page containing dead tuples is marked as
all-visible in relation \"%s\" page %u",
relname, blkno);
--- 889,897 ----
* means that the tuples that we think are not visible to
everyone yet
* actually are, and the PD_ALL_VISIBLE flag is correct.
*
! * A PD_ALL_VISIBLE page should, however, carry only live
tuples.
*/
! else if (PageIsAllVisible(page) && has_nonlive)
{
elog(WARNING, "page containing dead tuples is marked as
all-visible in relation \"%s\" page %u",
relname, blkno);
***************
*** 960,974 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
if (hastup)
vacrelstats->nonempty_pages = blkno + 1;
! /*
! * If we remembered any tuples for deletion, then the page will
be
! * visited again by lazy_vacuum_heap, which will compute and
record
! * its post-compaction free space. If not, then we're done
with this
! * page, so remember its free space as-is. (This path will
always be
! * taken if there are no indexes.)
! */
! if (vacrelstats->num_dead_tuples == prev_dead_count)
! RecordPageWithFreeSpace(onerel, blkno, freespace);
}
/* save stats for use later */
--- 906,912 ----
if (hastup)
vacrelstats->nonempty_pages = blkno + 1;
! RecordPageWithFreeSpace(onerel, blkno, freespace);
}
/* save stats for use later */
***************
*** 990,1008 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vmbuffer = InvalidBuffer;
}
! /* If any tuples need to be deleted, perform final vacuum cycle */
! /* XXX put a threshold on min number of tuples here? */
if (vacrelstats->num_dead_tuples > 0)
{
- /* Log cleanup info before we touch indexes */
- vacuum_log_cleanup_info(onerel, vacrelstats);
-
/* Remove index entries */
for (i = 0; i < nindexes; i++)
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
! /* Remove tuples from heap */
lazy_vacuum_heap(onerel, vacrelstats);
vacrelstats->num_index_scans++;
}
--- 928,943 ----
vmbuffer = InvalidBuffer;
}
! /* If any line pointers need to be deleted, perform final vacuum cycle
*/
! /* XXX put a threshold on minimum count here? */
if (vacrelstats->num_dead_tuples > 0)
{
/* Remove index entries */
for (i = 0; i < nindexes; i++)
lazy_vacuum_index(Irel[i],
&indstats[i],
vacrelstats);
! /* Mark dead line pointers unused */
lazy_vacuum_heap(onerel, vacrelstats);
vacrelstats->num_index_scans++;
}
***************
*** 1037,1048 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
/*
* lazy_vacuum_heap() -- second pass over the heap
*
! * This routine marks dead tuples as unused and compacts out free
! * space on their pages. Pages not having dead tuples recorded
from
! * lazy_scan_heap are not visited at all.
*
! * Note: the reason for doing this as a second pass is we cannot remove
! * the tuples until we've removed their index entries, and we want to
* process index entry removal in batches as large as possible.
*/
static void
--- 972,982 ----
/*
* lazy_vacuum_heap() -- second pass over the heap
*
! * This routine marks dead line pointers as unused. Pages not
recorded
! * during lazy_scan_heap() are not visited at all.
*
! * Note: the reason for doing this as a second pass is we cannot reuse the
! * line pointers until we've removed their index entries, and we want to
* process index entry removal in batches as large as possible.
*/
static void
***************
*** 1060,1087 **** lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
{
BlockNumber tblk;
Buffer buf;
- Page page;
- Size freespace;
vacuum_delay_point();
tblk =
ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
vac_strategy);
! if (!ConditionalLockBufferForCleanup(buf))
! {
! ReleaseBuffer(buf);
! ++tupindex;
! continue;
! }
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex,
vacrelstats);
-
- /* Now that we've compacted the page, record its available
space */
- page = BufferGetPage(buf);
- freespace = PageGetHeapFreeSpace(page);
-
UnlockReleaseBuffer(buf);
- RecordPageWithFreeSpace(onerel, tblk, freespace);
npages++;
}
--- 994,1008 ----
{
BlockNumber tblk;
Buffer buf;
vacuum_delay_point();
tblk =
ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL,
vac_strategy);
! LockBuffer(buf, BUFFER_LOCK_SHARE);
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex,
vacrelstats);
UnlockReleaseBuffer(buf);
npages++;
}
***************
*** 1094,1117 **** lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
}
/*
! * lazy_vacuum_page() -- free dead tuples on a page
! * and repair its fragmentation.
*
! * Caller must hold pin and buffer cleanup lock on the buffer.
*
! * tupindex is the index in vacrelstats->dead_tuples of the first dead
! * tuple for this page. We assume the rest follow sequentially.
! * The return value is the first tupindex after the tuples of this page.
*/
static int
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats)
{
Page page = BufferGetPage(buffer);
- OffsetNumber unused[MaxOffsetNumber];
- int uncnt = 0;
-
- START_CRIT_SECTION();
for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
{
--- 1015,1045 ----
}
/*
! * lazy_vacuum_page() -- change LP_DEAD line pointers to LP_UNUSED.
! *
! * Caller must hold at least a shared lock on the buffer. Only VACUUM
! * modifies heap LP_DEAD line pointers, and no other process will be upset if
! * such a line pointer suddenly becomes LP_UNUSED. The primary task noticing
! * the difference is PageAddItem(), and it would only mind a switch in the
! * opposite direction, from LP_UNUSED to LP_DEAD. (Furthermore, it needs an
! * exclusive lock on the buffer.)
*
! * Emit no WAL. The next VACUUM will redo lost changes. A mere LP_DEAD
! * pointer imposes no wraparound hazard; all XIDs are gone. However, WAL
! * replay of operations that repurpose LP_UNUSED pointers must be prepared to
! * find the rare LP_DEAD pointer until recovery reaches a consistent state.
! * That can happen when such a WAL record makes it to disk, but the crash
! * precedes any flush of this buffer.
*
! * tupindex is the index in vacrelstats->dead_tuples of the first dead TID for
! * this page. We assume the rest follow sequentially. The return value is
! * the first tupindex beyond this page.
*/
static int
lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats)
{
Page page = BufferGetPage(buffer);
for (; tupindex < vacrelstats->num_dead_tuples; tupindex++)
{
***************
*** 1124,1151 **** lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer
buffer,
break; /* past end of tuples
for this block */
toff =
ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
itemid = PageGetItemId(page, toff);
ItemIdSetUnused(itemid);
- unused[uncnt++] = toff;
}
! PageRepairFragmentation(page);
!
! MarkBufferDirty(buffer);
!
! /* XLOG stuff */
! if (RelationNeedsWAL(onerel))
! {
! XLogRecPtr recptr;
!
! recptr = log_heap_clean(onerel, buffer,
! NULL, 0, NULL,
0,
! unused, uncnt,
!
vacrelstats->latestRemovedXid);
! PageSetLSN(page, recptr);
! PageSetTLI(page, ThisTimeLineID);
! }
! END_CRIT_SECTION();
return tupindex;
}
--- 1052,1065 ----
break; /* past end of tuples
for this block */
toff =
ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]);
itemid = PageGetItemId(page, toff);
+ Assert(ItemIdIsDead(itemid)); /* stronger lock needed otherwise
*/
ItemIdSetUnused(itemid);
}
! /* This is a pure hint; there's no harm if it arrives early or never. */
! PageSetHasFreeLinePointers(page);
! SetBufferCommitInfoNeedsSave(buffer);
return tupindex;
}
***************
*** 1198,1204 **** lazy_check_needs_freeze(Buffer buf)
/*
* lazy_vacuum_index() -- vacuum one index relation.
*
! * Delete all the index entries pointing to tuples listed in
* vacrelstats->dead_tuples, and update running statistics.
*/
static void
--- 1112,1118 ----
/*
* lazy_vacuum_index() -- vacuum one index relation.
*
! * Delete all the index entries pointing to items listed in
* vacrelstats->dead_tuples, and update running statistics.
*/
static void
***************
*** 1503,1511 **** count_nondeletable_pages(Relation onerel, LVRelStats
*vacrelstats)
/*
* Note: any non-unused item should be taken as a
reason to keep
! * this page. We formerly thought that DEAD tuples
could be
! * thrown away, but that's not so, because we'd not
have cleaned
! * out their index entries.
*/
if (ItemIdIsUsed(itemid))
{
--- 1417,1425 ----
/*
* Note: any non-unused item should be taken as a
reason to keep
! * this page. We formerly thought that LP_DEAD line
pointers
! * could be thrown away, but that's not so, because
we'd not have
! * cleaned out their index entries.
*/
if (ItemIdIsUsed(itemid))
{
***************
*** 1564,1570 **** lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber
relblocks)
}
/*
! * lazy_record_dead_tuple - remember one deletable tuple
*/
static void
lazy_record_dead_tuple(LVRelStats *vacrelstats,
--- 1478,1484 ----
}
/*
! * lazy_record_dead_tuple - remember one reclaimable line pointer
*/
static void
lazy_record_dead_tuple(LVRelStats *vacrelstats,
***************
*** 1573,1579 **** lazy_record_dead_tuple(LVRelStats *vacrelstats,
/*
* The array shouldn't overflow under normal behavior, but perhaps it
* could if we are given a really small maintenance_work_mem. In that
! * case, just forget the last few tuples (we'll get 'em next time).
*/
if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
{
--- 1487,1493 ----
/*
* The array shouldn't overflow under normal behavior, but perhaps it
* could if we are given a really small maintenance_work_mem. In that
! * case, just forget the last few entries (we'll get 'em next time).
*/
if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples)
{
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 15,20 ****
--- 15,21 ----
#include "postgres.h"
#include "access/htup_details.h"
+ #include "access/xlog.h"
/* ----------------------------------------------------------------
***************
*** 156,163 **** PageAddItem(Page page,
if (offsetNumber < limit)
{
itemId = PageGetItemId(phdr, offsetNumber);
! if (ItemIdIsUsed(itemId) ||
ItemIdHasStorage(itemId))
{
elog(WARNING, "will not overwrite a
used ItemId");
return InvalidOffsetNumber;
}
--- 157,175 ----
if (offsetNumber < limit)
{
itemId = PageGetItemId(phdr, offsetNumber);
! if (InRecovery && !reachedConsistency &&
! ItemIdIsDead(itemId) &&
!ItemIdHasStorage(itemId))
{
+ /*
+ * Before recovering to a consistent
state, it is possible
+ * to find LP_DEAD items where we
expect LP_UNUSED. See
+ * comments at lazy_vacuum_page().
+ */
+ elog(WARNING, "overwriting dead
ItemId");
+ }
+ else if (ItemIdIsUsed(itemId) ||
ItemIdHasStorage(itemId))
+ {
+ /* Be strict later on. */
elog(WARNING, "will not overwrite a
used ItemId");
return InvalidOffsetNumber;
}
*** a/src/include/access/heapam.h
--- b/src/include/access/heapam.h
***************
*** 166,172 **** extern void heap_page_prune_opt(Relation relation, Buffer
buffer,
TransactionId OldestXmin);
extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
! bool report_stats, TransactionId
*latestRemovedXid);
extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int
nredirected,
OffsetNumber *nowdead, int
ndead,
--- 166,172 ----
TransactionId OldestXmin);
extern int heap_page_prune(Relation relation, Buffer buffer,
TransactionId OldestXmin,
! bool report_stats);
extern void heap_page_prune_execute(Buffer buffer,
OffsetNumber *redirected, int
nredirected,
OffsetNumber *nowdead, int
ndead,
*** a/src/include/access/heapam_xlog.h
--- b/src/include/access/heapam_xlog.h
***************
*** 51,57 ****
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
/* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */
! #define XLOG_HEAP2_CLEANUP_INFO 0x30
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
#define XLOG_HEAP2_LOCK_UPDATED 0x60
--- 51,57 ----
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
/* 0x20 is free, was XLOG_HEAP2_CLEAN_MOVE */
! /* 0x30 is free, was XLOG_HEAP2_CLEANUP_INFO */
#define XLOG_HEAP2_VISIBLE 0x40
#define XLOG_HEAP2_MULTI_INSERT 0x50
#define XLOG_HEAP2_LOCK_UPDATED 0x60
***************
*** 178,196 **** typedef struct xl_heap_clean
#define SizeOfHeapClean (offsetof(xl_heap_clean, ndead) + sizeof(uint16))
- /*
- * Cleanup_info is required in some cases during a lazy VACUUM.
- * Used for reporting the results of HeapTupleHeaderAdvanceLatestRemovedXid()
- * see vacuumlazy.c for full explanation
- */
- typedef struct xl_heap_cleanup_info
- {
- RelFileNode node;
- TransactionId latestRemovedXid;
- } xl_heap_cleanup_info;
-
- #define SizeOfHeapCleanupInfo (sizeof(xl_heap_cleanup_info))
-
/* This is for replacing a page's contents in toto */
/* NB: this is used for indexes as well as heaps */
typedef struct xl_heap_newpage
--- 178,183 ----
***************
*** 269,276 **** extern void heap_desc(StringInfo buf, uint8 xl_info, char
*rec);
extern void heap2_redo(XLogRecPtr lsn, XLogRecord *rptr);
extern void heap2_desc(StringInfo buf, uint8 xl_info, char *rec);
- extern XLogRecPtr log_heap_cleanup_info(RelFileNode rnode,
- TransactionId latestRemovedXid);
extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
OffsetNumber *redirected, int nredirected,
OffsetNumber *nowdead, int ndead,
--- 256,261 ----
*** a/src/include/storage/bufpage.h
--- b/src/include/storage/bufpage.h
***************
*** 164,169 **** typedef PageHeaderData *PageHeader;
--- 164,172 ----
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
* page for its new tuple version; this suggests that a prune is needed.
* Again, this is just a hint.
+ *
+ * Hold at least a shared lock to change the hints. Critical flags, currently
+ * PD_ALL_VISIBLE, require an exclusive lock.
*/
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line
pointers? */
#define PD_PAGE_FULL 0x0002 /* not enough free space for new
*** a/src/include/storage/itemid.h
--- b/src/include/storage/itemid.h
***************
*** 123,128 **** typedef uint16 ItemLength;
--- 123,131 ----
* ItemIdSetUnused
* Set the item identifier to be UNUSED, with no storage.
* Beware of multiple evaluations of itemId!
+ *
+ * Note: VACUUM uses this on LP_DEAD heap items as though it were a hint-bit
+ * mechanism, holding only a shared lock.
*/
#define ItemIdSetUnused(itemId) \
( \
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers