On Sat, Jan 12, 2013 at 02:14:03PM -0500, Kevin Grittner wrote:
> Amit Kapila wrote:
> > On Thursday, January 10, 2013 6:09 AM Josh Berkus wrote:
> 
> >> Surely VACUUM FULL should rebuild the visibility map, and make
> >> tuples in the new relation all-visible, no?
> 
> Certainly it seems odd to me that VACUUM FULL leaves the the table
> in a less-well maintained state in terms of visibility than a
> "normal" vacuum. VACUUM FULL should not need to be followed by
> another VACUUM.
> 
> > I think it cannot made all visible.
> 
> I don't think all tuples in the relation are necessarily visible to
> all transactions, but the ones which are should probably be flagged
> that way.

I have developed the attached proof-of-concept patch to fix the problem
of having no visibility map after CLUSTER or VACUUM FULL.  I tested with
these queries:

        CREATE TABLE test(x INT PRIMARY KEY);
        INSERT INTO test VALUES (1);
        VACUUM FULL test; -- or CLUSTER
        SELECT relfilenode FROM pg_class WHERE relname = 'test';
         relfilenode
        -------------
               16399

Then 'ls -l data/base/16384/16399*' to see the *_vm file.  I am not sure
how to test that the vm contents are valid.

This patch is fairly tricky because our CLUSTER/VACUUM FULL behavior
does not do writes through the shared buffer cache, as outlined in this
C comment block:

 * We can't use the normal heap_insert function to insert into the new
 * heap, because heap_insert overwrites the visibility information.
 * We use a special-purpose raw_heap_insert function instead, which
 * is optimized for bulk inserting a lot of tuples, knowing that we have
 * exclusive access to the heap.  raw_heap_insert builds new pages in
 * local storage.  When a page is full, or at the end of the process,
 * we insert it to WAL as a single record and then write it to disk
 * directly through smgr.  Note, however, that any data sent to the new
 * heap's TOAST table will go through the normal bufmgr.

I originally tried to do this higher up in the stack but ran into
problems because I couldn't access the new heap page so I had to do it
at the non-shared-buffer page level.  I reused the lazy vacuum routines.

I need to know this is the right approach, and need to know what things
are wrong or missing.

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + Everyone has their own god. +
diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c
new file mode 100644
index 951894c..c01a6a8
*** a/src/backend/access/heap/rewriteheap.c
--- b/src/backend/access/heap/rewriteheap.c
***************
*** 107,112 ****
--- 107,114 ----
  #include "access/rewriteheap.h"
  #include "access/transam.h"
  #include "access/tuptoaster.h"
+ #include "access/visibilitymap.h"
+ #include "commands/vacuum.h"
  #include "storage/bufmgr.h"
  #include "storage/smgr.h"
  #include "utils/memutils.h"
*************** typedef OldToNewMappingData *OldToNewMap
*** 172,177 ****
--- 174,180 ----
  
  /* prototypes for internal functions */
  static void raw_heap_insert(RewriteState state, HeapTuple tup);
+ static void update_page_vm(Relation relation, Page page, BlockNumber blkno);
  
  
  /*
*************** end_heap_rewrite(RewriteState state)
*** 281,286 ****
--- 284,290 ----
  		RelationOpenSmgr(state->rs_new_rel);
  
  		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
+ 		update_page_vm(state->rs_new_rel, state->rs_buffer, state->rs_blockno);
  
  		smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno,
  				   (char *) state->rs_buffer, true);
*************** raw_heap_insert(RewriteState state, Heap
*** 633,638 ****
--- 637,643 ----
  			RelationOpenSmgr(state->rs_new_rel);
  
  			PageSetChecksumInplace(page, state->rs_blockno);
+ 			update_page_vm(state->rs_new_rel, page, state->rs_blockno);
  
  			smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM,
  					   state->rs_blockno, (char *) page, true);
*************** raw_heap_insert(RewriteState state, Heap
*** 677,679 ****
--- 682,704 ----
  	if (heaptup != tup)
  		heap_freetuple(heaptup);
  }
+ 
+ static void
+ update_page_vm(Relation relation, Page page, BlockNumber blkno)
+ {
+ 	Buffer		vmbuffer = InvalidBuffer;
+ 	TransactionId visibility_cutoff_xid;
+ 
+ 	visibilitymap_pin(relation, blkno, &vmbuffer);
+ 	Assert(BufferIsValid(vmbuffer));
+ 
+ 	if (!visibilitymap_test(relation, blkno, &vmbuffer) &&
+ 		heap_page_is_all_visible(relation, InvalidBuffer, page,
+ 								 &visibility_cutoff_xid))
+ 	{
+ 		PageSetAllVisible(page);
+ 		visibilitymap_set(relation, blkno, InvalidBlockNumber,
+ 						  InvalidXLogRecPtr, vmbuffer, visibility_cutoff_xid);
+ 	}
+ 	ReleaseBuffer(vmbuffer);
+ }
diff --git a/src/backend/access/heap/visibilitymap.c b/src/backend/access/heap/visibilitymap.c
new file mode 100644
index 7f40d89..a42511b
*** a/src/backend/access/heap/visibilitymap.c
--- b/src/backend/access/heap/visibilitymap.c
*************** visibilitymap_set(Relation rel, BlockNum
*** 278,284 ****
  		map[mapByte] |= (1 << mapBit);
  		MarkBufferDirty(vmBuf);
  
! 		if (RelationNeedsWAL(rel))
  		{
  			if (XLogRecPtrIsInvalid(recptr))
  			{
--- 278,284 ----
  		map[mapByte] |= (1 << mapBit);
  		MarkBufferDirty(vmBuf);
  
! 		if (RelationNeedsWAL(rel) && BufferIsValid(heapBuf))
  		{
  			if (XLogRecPtrIsInvalid(recptr))
  			{
diff --git a/src/backend/commands/vacuumlazy.c b/src/backend/commands/vacuumlazy.c
new file mode 100644
index 6688ab3..ea349a5
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
*************** static void lazy_record_dead_tuple(LVRel
*** 151,158 ****
  					   ItemPointer itemptr);
  static bool lazy_tid_reaped(ItemPointer itemptr, void *state);
  static int	vac_cmp_itemptr(const void *left, const void *right);
- static bool heap_page_is_all_visible(Relation rel, Buffer buf,
- 						 TransactionId *visibility_cutoff_xid);
  
  
  /*
--- 151,156 ----
*************** lazy_vacuum_page(Relation onerel, BlockN
*** 1187,1193 ****
  	 * check if the page has become all-visible.
  	 */
  	if (!visibilitymap_test(onerel, blkno, vmbuffer) &&
! 		heap_page_is_all_visible(onerel, buffer, &visibility_cutoff_xid))
  	{
  		Assert(BufferIsValid(*vmbuffer));
  		PageSetAllVisible(page);
--- 1185,1191 ----
  	 * check if the page has become all-visible.
  	 */
  	if (!visibilitymap_test(onerel, blkno, vmbuffer) &&
! 		heap_page_is_all_visible(onerel, buffer, NULL, &visibility_cutoff_xid))
  	{
  		Assert(BufferIsValid(*vmbuffer));
  		PageSetAllVisible(page);
*************** vac_cmp_itemptr(const void *left, const
*** 1694,1707 ****
   * transactions. Also return the visibility_cutoff_xid which is the highest
   * xmin amongst the visible tuples.
   */
! static bool
! heap_page_is_all_visible(Relation rel, Buffer buf, TransactionId *visibility_cutoff_xid)
  {
- 	Page		page = BufferGetPage(buf);
  	OffsetNumber offnum,
  				maxoff;
  	bool		all_visible = true;
  
  	*visibility_cutoff_xid = InvalidTransactionId;
  
  	/*
--- 1692,1708 ----
   * transactions. Also return the visibility_cutoff_xid which is the highest
   * xmin amongst the visible tuples.
   */
! bool
! heap_page_is_all_visible(Relation rel, Buffer buf, Page page,
! 						 TransactionId *visibility_cutoff_xid)
  {
  	OffsetNumber offnum,
  				maxoff;
  	bool		all_visible = true;
  
+ 	if (BufferIsValid(buf))
+ 		page = BufferGetPage(buf);
+ 
  	*visibility_cutoff_xid = InvalidTransactionId;
  
  	/*
*************** heap_page_is_all_visible(Relation rel, B
*** 1722,1728 ****
  		if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
  			continue;
  
! 		ItemPointerSet(&(tuple.t_self), BufferGetBlockNumber(buf), offnum);
  
  		/*
  		 * Dead line pointers can have index pointers pointing to them. So
--- 1723,1731 ----
  		if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
  			continue;
  
! 		/* XXX use 0 or real offset? */
! 		ItemPointerSet(&(tuple.t_self), BufferIsValid(buf) ?
! 					   BufferGetBlockNumber(buf) : 0, offnum);
  
  		/*
  		 * Dead line pointers can have index pointers pointing to them. So
diff --git a/src/backend/utils/time/tqual.c b/src/backend/utils/time/tqual.c
new file mode 100644
index ed66c49..7ad8b58
*** a/src/backend/utils/time/tqual.c
--- b/src/backend/utils/time/tqual.c
*************** static inline void
*** 108,113 ****
--- 108,117 ----
  SetHintBits(HeapTupleHeader tuple, Buffer buffer,
  			uint16 infomask, TransactionId xid)
  {
+ 	/* we might not have a buffer if we are doing raw_heap_insert() */
+ 	if (!BufferIsValid(buffer))
+ 		return;
+ 
  	if (TransactionIdIsValid(xid))
  	{
  		/* NB: xid must be known committed here! */
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
new file mode 100644
index 08bec25..04cd4b7
*** a/src/include/commands/vacuum.h
--- b/src/include/commands/vacuum.h
***************
*** 19,24 ****
--- 19,25 ----
  #include "catalog/pg_type.h"
  #include "nodes/parsenodes.h"
  #include "storage/buf.h"
+ #include "storage/bufpage.h"
  #include "storage/lock.h"
  #include "utils/relcache.h"
  
*************** extern void vacuum_delay_point(void);
*** 167,172 ****
--- 168,175 ----
  /* in commands/vacuumlazy.c */
  extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
  				BufferAccessStrategy bstrategy);
+ extern bool heap_page_is_all_visible(Relation rel, Buffer buf, Page page,
+ 						 TransactionId *visibility_cutoff_xid);
  
  /* in commands/analyze.c */
  extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to