Here's a patch for WAL logging tuple freezes in vacuum, per discussion
on pgsql-bugs.

This patch is against CVS head. Should this be backported to stable
branches? I think it should.

After writing the patch, I realized that it needs some thought if backported, because WAL records of removing tuples and freezing tuples share the same heapam opcode XLOG_HEAP_CLEAN, and are only differentiated by setting a flag. If we applied the patch as it is, and for some reason someone replayed a WAL log generated by a newer version, with the patch, with an older version, without the patch, the older version would interpret the freeze WAL records as dead tuple removals, and remove live records. I would've liked to give freezing a new opcode, but we've ran out of them (see htup.h).

--
   Heikki Linnakangas
   EnterpriseDB   http://www.enterprisedb.com

Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: 
/home/hlinnaka/pgcvsrepository/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.220
diff -c -r1.220 heapam.c
*** src/backend/access/heap/heapam.c    4 Oct 2006 00:29:48 -0000       1.220
--- src/backend/access/heap/heapam.c    23 Oct 2006 18:17:17 -0000
***************
*** 2877,2889 ****
  /*
   * Perform XLogInsert for a heap-clean operation.  Caller must already
   * have modified the buffer and marked it dirty.
   */
  XLogRecPtr
! log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
  {
        xl_heap_clean xlrec;
        XLogRecPtr      recptr;
        XLogRecData rdata[2];
  
        /* Caller should not call me on a temp relation */
        Assert(!reln->rd_istemp);
--- 2877,2895 ----
  /*
   * Perform XLogInsert for a heap-clean operation.  Caller must already
   * have modified the buffer and marked it dirty.
+  *
+  * If freeze is true, the tuples specified in offsets array were frozen,
+  * otherwise they were dead and removed.
   */
  XLogRecPtr
! log_heap_clean(Relation reln, Buffer buffer, 
!                          OffsetNumber *offsets, int noffsets, bool freeze)
  {
        xl_heap_clean xlrec;
        XLogRecPtr      recptr;
        XLogRecData rdata[2];
+       uint8           info = freeze ? 
+               (XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE) : XLOG_HEAP_CLEAN;
  
        /* Caller should not call me on a temp relation */
        Assert(!reln->rd_istemp);
***************
*** 2901,2910 ****
         * that it is.  When XLogInsert stores the whole buffer, the offsets 
array
         * need not be stored too.
         */
!       if (uncnt > 0)
        {
!               rdata[1].data = (char *) unused;
!               rdata[1].len = uncnt * sizeof(OffsetNumber);
        }
        else
        {
--- 2907,2916 ----
         * that it is.  When XLogInsert stores the whole buffer, the offsets 
array
         * need not be stored too.
         */
!       if (noffsets > 0)
        {
!               rdata[1].data = (char *) offsets;
!               rdata[1].len = noffsets * sizeof(OffsetNumber);
        }
        else
        {
***************
*** 2915,2921 ****
        rdata[1].buffer_std = true;
        rdata[1].next = NULL;
  
!       recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_CLEAN, rdata);
  
        return recptr;
  }
--- 2921,2927 ----
        rdata[1].buffer_std = true;
        rdata[1].next = NULL;
  
!       recptr = XLogInsert(RM_HEAP_ID, info, rdata);
  
        return recptr;
  }
***************
*** 3030,3039 ****
--- 3036,3048 ----
        Relation        reln;
        Buffer          buffer;
        Page            page;
+       bool            freeze;
  
        if (record->xl_info & XLR_BKP_BLOCK_1)
                return;
  
+       freeze = record->xl_info & XLOG_HEAP_FREEZE;
+ 
        reln = XLogOpenRelation(xlrec->node);
        buffer = XLogReadBuffer(reln, xlrec->block, false);
        if (!BufferIsValid(buffer))
***************
*** 3048,3069 ****
  
        if (record->xl_len > SizeOfHeapClean)
        {
!               OffsetNumber *unused;
!               OffsetNumber *unend;
                ItemId          lp;
  
!               unused = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
!               unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
  
!               while (unused < unend)
                {
!                       lp = PageGetItemId(page, *unused + 1);
!                       lp->lp_flags &= ~LP_USED;
!                       unused++;
                }
        }
  
!       PageRepairFragmentation(page, NULL);
  
        PageSetLSN(page, lsn);
        PageSetTLI(page, ThisTimeLineID);
--- 3057,3089 ----
  
        if (record->xl_len > SizeOfHeapClean)
        {
!               OffsetNumber *offsets;
!               OffsetNumber *offend;
                ItemId          lp;
  
!               offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapClean);
!               offend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
  
!               while (offsets < offend)
                {
!                       lp = PageGetItemId(page, *offsets + 1);
! 
!                       if(freeze)
!                       {
!                               HeapTupleHeader htup = (HeapTupleHeader) 
PageGetItem(page, lp);
!                               
!                               Assert(!(htup->t_infomask & HEAP_XMIN_INVALID));
! 
!                               htup->t_infomask |= HEAP_XMIN_COMMITTED;
!                               HeapTupleHeaderSetXmin(htup, 
FrozenTransactionId);
!                       } else
!                               lp->lp_flags &= ~LP_USED;
!                       offsets++;
                }
        }
  
!       if(!freeze)
!               PageRepairFragmentation(page, NULL);
  
        PageSetLSN(page, lsn);
        PageSetTLI(page, ThisTimeLineID);
Index: src/backend/commands/vacuum.c
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuum.c,v
retrieving revision 1.341
diff -c -r1.341 vacuum.c
*** src/backend/commands/vacuum.c       4 Oct 2006 00:29:51 -0000       1.341
--- src/backend/commands/vacuum.c       23 Oct 2006 18:36:07 -0000
***************
*** 1357,1364 ****
                Buffer          buf;
                OffsetNumber offnum,
                                        maxoff;
!               bool            pgchanged,
!                                       notup;
  
                vacuum_delay_point();
  
--- 1357,1365 ----
                Buffer          buf;
                OffsetNumber offnum,
                                        maxoff;
!               bool            notup;
!               OffsetNumber frozen[MaxOffsetNumber];
!               int                     nfrozen;
  
                vacuum_delay_point();
  
***************
*** 1414,1420 ****
                        continue;
                }
  
!               pgchanged = false;
                notup = true;
                maxoff = PageGetMaxOffsetNumber(page);
                for (offnum = FirstOffsetNumber;
--- 1415,1421 ----
                        continue;
                }
  
!               nfrozen = 0;
                notup = true;
                maxoff = PageGetMaxOffsetNumber(page);
                for (offnum = FirstOffsetNumber;
***************
*** 1458,1464 ****
                                                
HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                                                /* infomask should be okay 
already */
                                                Assert(tuple.t_data->t_infomask 
& HEAP_XMIN_COMMITTED);
!                                               pgchanged = true;
                                        }
  
                                        /*
--- 1459,1465 ----
                                                
HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                                                /* infomask should be okay 
already */
                                                Assert(tuple.t_data->t_infomask 
& HEAP_XMIN_COMMITTED);
!                                               frozen[nfrozen++] = offnum;
                                        }
  
                                        /*
***************
*** 1627,1634 ****
                else
                        empty_end_pages = 0;
  
!               if (pgchanged)
                        MarkBufferDirty(buf);
                UnlockReleaseBuffer(buf);
        }
  
--- 1628,1650 ----
                else
                        empty_end_pages = 0;
  
!               /* 
!                * If we froze any tuples, write a WAL record. We used to treat
!                * freezing the same as hint bit updates, because it was 
thought that 
!                * losing a tuple freeze doesn't matter since the tuple is 
marked as
!                * committed anyway. But that's not safe: if we later truncate 
the 
!                * clog and crash, we might end up with xids on the disk that 
belonged
!                * to a truncated clog segment.
!                */
!               if (nfrozen > 0)
!               {
!                       XLogRecPtr recptr;
! 
                        MarkBufferDirty(buf);
+                       recptr = log_heap_clean(onerel, buf, frozen, nfrozen, 
true);
+                       PageSetLSN(page, recptr);
+                       PageSetTLI(page, ThisTimeLineID);
+               }
                UnlockReleaseBuffer(buf);
        }
  
***************
*** 2603,2609 ****
                        {
                                XLogRecPtr      recptr;
  
!                               recptr = log_heap_clean(onerel, buf, unused, 
uncnt);
                                PageSetLSN(page, recptr);
                                PageSetTLI(page, ThisTimeLineID);
                        }
--- 2619,2625 ----
                        {
                                XLogRecPtr      recptr;
  
!                               recptr = log_heap_clean(onerel, buf, unused, 
uncnt, false);
                                PageSetLSN(page, recptr);
                                PageSetTLI(page, ThisTimeLineID);
                        }
***************
*** 3074,3080 ****
        {
                XLogRecPtr      recptr;
  
!               recptr = log_heap_clean(onerel, buffer, unused, uncnt);
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
--- 3090,3096 ----
        {
                XLogRecPtr      recptr;
  
!               recptr = log_heap_clean(onerel, buffer, unused, uncnt, false);
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
Index: src/backend/commands/vacuumlazy.c
===================================================================
RCS file: 
/home/hlinnaka/pgcvsrepository/pgsql/src/backend/commands/vacuumlazy.c,v
retrieving revision 1.80
diff -c -r1.80 vacuumlazy.c
*** src/backend/commands/vacuumlazy.c   4 Oct 2006 00:29:52 -0000       1.80
--- src/backend/commands/vacuumlazy.c   23 Oct 2006 18:35:52 -0000
***************
*** 266,275 ****
                Page            page;
                OffsetNumber offnum,
                                        maxoff;
!               bool            pgchanged,
!                                       tupgone,
                                        hastup;
                int                     prev_dead_count;
  
                vacuum_delay_point();
  
--- 266,276 ----
                Page            page;
                OffsetNumber offnum,
                                        maxoff;
!               bool            tupgone,
                                        hastup;
                int                     prev_dead_count;
+               OffsetNumber frozen[MaxOffsetNumber];
+               int                     nfrozen;
  
                vacuum_delay_point();
  
***************
*** 349,355 ****
                        continue;
                }
  
!               pgchanged = false;
                hastup = false;
                prev_dead_count = vacrelstats->num_dead_tuples;
                maxoff = PageGetMaxOffsetNumber(page);
--- 350,356 ----
                        continue;
                }
  
!               nfrozen = 0;
                hastup = false;
                prev_dead_count = vacrelstats->num_dead_tuples;
                maxoff = PageGetMaxOffsetNumber(page);
***************
*** 398,404 ****
                                                
HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                                                /* infomask should be okay 
already */
                                                Assert(tuple.t_data->t_infomask 
& HEAP_XMIN_COMMITTED);
!                                               pgchanged = true;
                                        }
  
                                        /*
--- 399,405 ----
                                                
HeapTupleHeaderSetXmin(tuple.t_data, FrozenTransactionId);
                                                /* infomask should be okay 
already */
                                                Assert(tuple.t_data->t_infomask 
& HEAP_XMIN_COMMITTED);
!                                               frozen[nfrozen++] = offnum;
                                        }
  
                                        /*
***************
*** 485,492 ****
                if (hastup)
                        vacrelstats->nonempty_pages = blkno + 1;
  
!               if (pgchanged)
                        MarkBufferDirty(buf);
                UnlockReleaseBuffer(buf);
        }
  
--- 486,508 ----
                if (hastup)
                        vacrelstats->nonempty_pages = blkno + 1;
  
!               /* 
!                * If we froze any tuples, write a WAL record. We used to treat
!                * freezing the same as hint bit updates, because it was 
thought that 
!                * losing a tuple freeze doesn't matter since the tuple is 
marked as
!                * committed anyway. But that's not safe: if we later truncate 
the 
!                * clog and crash, we might end up with xids on the disk that 
belonged
!                * to a truncated clog segment.
!                */
!               if (nfrozen > 0)
!               {
!                       XLogRecPtr recptr;
! 
                        MarkBufferDirty(buf);
+                       recptr = log_heap_clean(onerel, buf, frozen, nfrozen, 
true);
+                       PageSetLSN(page, recptr);
+                       PageSetTLI(page, ThisTimeLineID);
+               }
                UnlockReleaseBuffer(buf);
        }
  
***************
*** 635,641 ****
        {
                XLogRecPtr      recptr;
  
!               recptr = log_heap_clean(onerel, buffer, unused, uncnt);
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
--- 651,657 ----
        {
                XLogRecPtr      recptr;
  
!               recptr = log_heap_clean(onerel, buffer, unused, uncnt, false);
                PageSetLSN(page, recptr);
                PageSetTLI(page, ThisTimeLineID);
        }
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/heapam.h,v
retrieving revision 1.116
diff -c -r1.116 heapam.h
*** src/include/access/heapam.h 4 Oct 2006 00:30:07 -0000       1.116
--- src/include/access/heapam.h 23 Oct 2006 17:52:27 -0000
***************
*** 182,188 ****
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
  extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
!                          OffsetNumber *unused, int uncnt);
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
                          ItemPointerData from,
                          Buffer newbuf, HeapTuple newtup);
--- 182,188 ----
  extern void heap_redo(XLogRecPtr lsn, XLogRecord *rptr);
  extern void heap_desc(StringInfo buf, uint8 xl_info, char *rec);
  extern XLogRecPtr log_heap_clean(Relation reln, Buffer buffer,
!                          OffsetNumber *offsets, int noffsets, bool freeze);
  extern XLogRecPtr log_heap_move(Relation reln, Buffer oldbuf,
                          ItemPointerData from,
                          Buffer newbuf, HeapTuple newtup);
Index: src/include/access/htup.h
===================================================================
RCS file: /home/hlinnaka/pgcvsrepository/pgsql/src/include/access/htup.h,v
retrieving revision 1.86
diff -c -r1.86 htup.h
*** src/include/access/htup.h   4 Oct 2006 00:30:07 -0000       1.86
--- src/include/access/htup.h   23 Oct 2006 17:14:44 -0000
***************
*** 510,515 ****
--- 510,521 ----
   * we can (and we do) restore entire page in redo
   */
  #define XLOG_HEAP_INIT_PAGE 0x80
+ /*
+  * XLOG_HEAP_CLEAN | XLOG_HEAP_FREEZE means that tuples on this page
+  * should be frozen. We can share the bit with XLOG_HEAP_INIT_PAGE,
+  * because it's not used when cleaning.
+  */
+ #define XLOG_HEAP_FREEZE      0x80
  
  /*
   * All what we need to find changed tuple

---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings

Reply via email to