I enclose a complete patch for avoiding WAL usage for CREATE TABLE AS
SELECT, when not in archive mode (PITR). The main use case for this is
large BI environments that create summary tables or prejoined tables,
though there are many general applications.

There is no user interface for this. The speed gain is automatic, when
archiving is not enabled.

This contains all the lower level machinery required to do the same
thing for COPY, as discussed on hackers. The machinery includes some
additional freespace thinkery, aimed mainly at the forthcoming COPY
patch, which solely needs to be integrated with Alon's work.

Patch is diff -c format, compiles and make checks on cvstip as of now. 

No performance tests *on this patch*, though the general principle has
already been proven via a similar prototype patch not published on list.

Best Regards, Simon Riggs
Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.191
diff -c -c -r1.191 heapam.c
*** src/backend/access/heap/heapam.c	19 May 2005 21:35:44 -0000	1.191
--- src/backend/access/heap/heapam.c	3 Jun 2005 21:58:58 -0000
***************
*** 1036,1042 ****
   * command ID.
   */
  Oid
! heap_insert(Relation relation, HeapTuple tup, CommandId cid)
  {
  	TransactionId xid = GetCurrentTransactionId();
  	Buffer		buffer;
--- 1036,1042 ----
   * command ID.
   */
  Oid
! heap_insert(Relation relation, HeapTuple tup, CommandId cid, bool use_wal)
  {
  	TransactionId xid = GetCurrentTransactionId();
  	Buffer		buffer;
***************
*** 1086,1092 ****
  		heap_tuple_toast_attrs(relation, tup, NULL);
  
  	/* Find buffer to insert this tuple into */
! 	buffer = RelationGetBufferForTuple(relation, tup->t_len, InvalidBuffer);
  
  	/* NO EREPORT(ERROR) from here till changes are logged */
  	START_CRIT_SECTION();
--- 1086,1093 ----
  		heap_tuple_toast_attrs(relation, tup, NULL);
  
  	/* Find buffer to insert this tuple into */
! 	buffer = RelationGetBufferForTuple(relation, tup->t_len, 
!                                         InvalidBuffer, use_wal);
  
  	/* NO EREPORT(ERROR) from here till changes are logged */
  	START_CRIT_SECTION();
***************
*** 1096,1102 ****
  	pgstat_count_heap_insert(&relation->pgstat_info);
  
  	/* XLOG stuff */
! 	if (!relation->rd_istemp)
  	{
  		xl_heap_insert xlrec;
  		xl_heap_header xlhdr;
--- 1097,1103 ----
  	pgstat_count_heap_insert(&relation->pgstat_info);
  
  	/* XLOG stuff */
! 	if (!relation->rd_istemp && use_wal)
  	{
  		xl_heap_insert xlrec;
  		xl_heap_header xlhdr;
***************
*** 1181,1187 ****
  Oid
  simple_heap_insert(Relation relation, HeapTuple tup)
  {
! 	return heap_insert(relation, tup, GetCurrentCommandId());
  }
  
  /*
--- 1182,1188 ----
  Oid
  simple_heap_insert(Relation relation, HeapTuple tup)
  {
! 	return heap_insert(relation, tup, GetCurrentCommandId(), true);
  }
  
  /*
***************
*** 1740,1746 ****
  		{
  			/* Assume there's no chance to put newtup on same page. */
  			newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
! 											   buffer);
  		}
  		else
  		{
--- 1741,1747 ----
  		{
  			/* Assume there's no chance to put newtup on same page. */
  			newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
! 											   buffer, true);
  		}
  		else
  		{
***************
*** 1757,1763 ****
  				 */
  				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
  				newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
! 												   buffer);
  			}
  			else
  			{
--- 1758,1764 ----
  				 */
  				LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
  				newbuf = RelationGetBufferForTuple(relation, newtup->t_len,
! 												   buffer, true);
  			}
  			else
  			{
Index: src/backend/access/heap/hio.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/access/heap/hio.c,v
retrieving revision 1.56
diff -c -c -r1.56 hio.c
*** src/backend/access/heap/hio.c	7 May 2005 21:32:23 -0000	1.56
--- src/backend/access/heap/hio.c	3 Jun 2005 21:58:58 -0000
***************
*** 84,90 ****
   */
  Buffer
  RelationGetBufferForTuple(Relation relation, Size len,
! 						  Buffer otherBuffer)
  {
  	Buffer		buffer = InvalidBuffer;
  	Page		pageHeader;
--- 84,90 ----
   */
  Buffer
  RelationGetBufferForTuple(Relation relation, Size len,
! 						  Buffer otherBuffer, bool use_wal)
  {
  	Buffer		buffer = InvalidBuffer;
  	Page		pageHeader;
***************
*** 121,150 ****
  	 * on each page that proves not to be suitable.)  If the FSM has no
  	 * record of a page with enough free space, we give up and extend the
  	 * relation.
! 	 */
  
  	targetBlock = relation->rd_targblock;
  
  	if (targetBlock == InvalidBlockNumber)
  	{
! 		/*
! 		 * We have no cached target page, so ask the FSM for an initial
! 		 * target.
! 		 */
! 		targetBlock = GetPageWithFreeSpace(&relation->rd_node, len);
! 
! 		/*
! 		 * If the FSM knows nothing of the rel, try the last page before
! 		 * we give up and extend.  This avoids one-tuple-per-page syndrome
! 		 * during bootstrapping or in a recently-started system.
! 		 */
! 		if (targetBlock == InvalidBlockNumber)
! 		{
! 			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);
! 
! 			if (nblocks > 0)
! 				targetBlock = nblocks - 1;
! 		}
  	}
  
  	while (targetBlock != InvalidBlockNumber)
--- 121,170 ----
  	 * on each page that proves not to be suitable.)  If the FSM has no
  	 * record of a page with enough free space, we give up and extend the
  	 * relation.
!      *
!      * If we have chosen to to not use_wal we must be careful to write only
!      * to pages that we ourselves have created during the execution of the
!      * current command. As a result we must avoid FSM completely, as well as
!      * avoiding using the last page (which by definition somebody else wrote).
!      * We must also assume that the relation->rd_targblock cache was
!      * cleared (or at least, never set) before we issue the first call of this
!      * routine, since we still need to use this cache for subsequent calls
!      * even when we chose to not use_wal.
!      * Since we avoid the FSM, any blocks extended are private to
!      * ourselves, so we still need to accumulate certain FSM info for when
!      * we complete the command. see freespace.c for description of that info
!      *
!      * XXX This is arguably not appropriate for new relations when fillfactor
!      * is set at 100% (the current case).
!      */
  
  	targetBlock = relation->rd_targblock;
  
  	if (targetBlock == InvalidBlockNumber)
  	{
!         if (use_wal)
!         {
!     		/*
!     		 * We have no cached target page, so ask the FSM for an initial
!     		 * target.
!     		 */
!     		targetBlock = GetPageWithFreeSpace(&relation->rd_node, len);  
! 
!     		/*
!     		 * If the FSM knows nothing of the rel, try the last page before
!     		 * we give up and extend.  This avoids one-tuple-per-page syndrome
!     		 * during bootstrapping or in a recently-started system.
!     		 */
!     		if (targetBlock == InvalidBlockNumber)
!     		{
!     			BlockNumber nblocks = RelationGetNumberOfBlocks(relation);   
! 
!     			if (nblocks > 0)
!     				targetBlock = nblocks - 1;
!     		}
!         }
!         else
!             AccumulateLocalFreeSpaceInfo(len);    
  	}
  
  	while (targetBlock != InvalidBlockNumber)
***************
*** 209,222 ****
  			ReleaseBuffer(buffer);
  		}
  
! 		/*
! 		 * Update FSM as to condition of this page, and ask for another
! 		 * page to try.
! 		 */
! 		targetBlock = RecordAndGetPageWithFreeSpace(&relation->rd_node,
  													targetBlock,
  													pageFreeSpace,
! 													len);
  	}
  
  	/*
--- 229,246 ----
  			ReleaseBuffer(buffer);
  		}
  
! 
!         if (use_wal)
!         {
!     		/*
!     		 * Update FSM as to condition of this page, and ask for another
!     		 * page to try.
!     		 */
!     		targetBlock = RecordAndGetPageWithFreeSpace(&relation->rd_node,
  													targetBlock,
  													pageFreeSpace,
!     												len);
!         }
  	}
  
  	/*
***************
*** 226,233 ****
  	 * the same time, else we will both try to initialize the same new
  	 * page.  We can skip locking for new or temp relations, however,
  	 * since no one else could be accessing them.
  	 */
! 	needLock = !RELATION_IS_LOCAL(relation);
  
  	if (needLock)
  		LockRelationForExtension(relation, ExclusiveLock);
--- 250,259 ----
  	 * the same time, else we will both try to initialize the same new
  	 * page.  We can skip locking for new or temp relations, however,
  	 * since no one else could be accessing them.
+      * We also skip locking for !use_wal, since we have an ExclusiveLock
+      * on the whole relation
  	 */
! 	needLock = !RELATION_IS_LOCAL(relation) && use_wal;
  
  	if (needLock)
  		LockRelationForExtension(relation, ExclusiveLock);
Index: src/backend/executor/execMain.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/executor/execMain.c,v
retrieving revision 1.249
diff -c -c -r1.249 execMain.c
*** src/backend/executor/execMain.c	22 May 2005 22:30:19 -0000	1.249
--- src/backend/executor/execMain.c	3 Jun 2005 21:59:01 -0000
***************
*** 33,38 ****
--- 33,39 ----
  #include "postgres.h"
  
  #include "access/heapam.h"
+ #include "access/xlog.h"
  #include "catalog/heap.h"
  #include "catalog/namespace.h"
  #include "commands/tablecmds.h"
***************
*** 44,49 ****
--- 45,52 ----
  #include "optimizer/clauses.h"
  #include "optimizer/var.h"
  #include "parser/parsetree.h"
+ #include "storage/freespace.h"
+ #include "storage/smgr.h"
  #include "utils/acl.h"
  #include "utils/guc.h"
  #include "utils/lsyscache.h"
***************
*** 784,792 ****
--- 787,798 ----
  		 * And open the constructed table for writing.
  		 */
  		intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock);
+         if (!XLogArchivingActive())
+             InitLocalFreeSpaceInfo();
  	}
  
  	estate->es_into_relation_descriptor = intoRelationDesc;
+     estate->es_into_relation_use_wal = !XLogArchivingActive();
  
  	queryDesc->tupDesc = tupType;
  	queryDesc->planstate = planstate;
***************
*** 979,985 ****
--- 985,1009 ----
  	 * close the "into" relation if necessary, again keeping lock
  	 */
  	if (estate->es_into_relation_descriptor != NULL)
+     {
+         if (estate->es_into_relation_use_wal)
+         {
+ 
+             Relation rel_use_wal = estate->es_into_relation_descriptor;
+     	    /*
+     	     * We must fsync the relation down to disk before it's
+     	     * safe to commit the transaction.
+              */
+             smgrimmedsync(rel_use_wal->rd_smgr);
+ 
+             /* 
+              * Update the FSM info for the relation. We only do this 
+              * here because we bypassed the FSM earlier.
+              */
+             RecordFreeSpaceInfo(&rel_use_wal->rd_node);
+         }
  		heap_close(estate->es_into_relation_descriptor, NoLock);
+     }
  
  	/*
  	 * close any relations selected FOR UPDATE/FOR SHARE, again keeping locks
***************
*** 1307,1313 ****
  
  		tuple = ExecCopySlotTuple(slot);
  		heap_insert(estate->es_into_relation_descriptor, tuple,
! 					estate->es_snapshot->curcid);
  		/* we know there are no indexes to update */
  		heap_freetuple(tuple);
  		IncrAppended();
--- 1331,1338 ----
  
  		tuple = ExecCopySlotTuple(slot);
  		heap_insert(estate->es_into_relation_descriptor, tuple,
! 					estate->es_snapshot->curcid,
!                     estate->es_into_relation_use_wal);
  		/* we know there are no indexes to update */
  		heap_freetuple(tuple);
  		IncrAppended();
***************
*** 1386,1392 ****
  	 * insert the tuple
  	 */
  	newId = heap_insert(resultRelationDesc, tuple,
! 						estate->es_snapshot->curcid);
  
  	IncrAppended();
  	(estate->es_processed)++;
--- 1411,1417 ----
  	 * insert the tuple
  	 */
  	newId = heap_insert(resultRelationDesc, tuple,
! 						estate->es_snapshot->curcid, true);
  
  	IncrAppended();
  	(estate->es_processed)++;
Index: src/backend/storage/freespace/freespace.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/storage/freespace/freespace.c,v
retrieving revision 1.45
diff -c -c -r1.45 freespace.c
*** src/backend/storage/freespace/freespace.c	29 May 2005 04:23:04 -0000	1.45
--- src/backend/storage/freespace/freespace.c	3 Jun 2005 21:59:03 -0000
***************
*** 248,253 ****
--- 248,254 ----
  static int	fsm_current_chunks(FSMRelation *fsmrel);
  static int	fsm_current_allocation(FSMRelation *fsmrel);
  
+ static int local_avgRequest = 0;
  
  /*
   * Exported routines
***************
*** 391,396 ****
--- 392,436 ----
  	return freepage;
  }
  
+ void
+ InitLocalFreeSpaceInfo(void)
+ {
+     local_avgRequest = 0;
+ }
+ 
+ void
+ AccumulateLocalFreeSpaceInfo(Size spaceNeeded)
+ {
+ 	/*
+ 	 * Update the moving average of space requests.  This code implements
+ 	 * an exponential moving average with an equivalent period of about 63
+ 	 * requests.  Ignore silly requests, however, to ensure that the
+ 	 * average stays sane.
+ 	 */
+ 	if (spaceNeeded > 0 && spaceNeeded < BLCKSZ)
+ 		local_avgRequest += ((int) spaceNeeded - local_avgRequest) / 32;
+ }
+ 
+ void
+ RecordFreeSpaceInfo(RelFileNode *rel)
+ {
+ 	FSMRelation *fsmrel;
+ 
+ 	LWLockAcquire(FreeSpaceLock, LW_EXCLUSIVE);
+ 
+ 	/*
+ 	 * Add rel to the hashtable to ensure it is known
+ 	 */
+ 	fsmrel = create_fsm_rel(rel);
+ 
+ 	fsmrel->avgRequest = (Size) local_avgRequest;
+ 
+ 	LWLockRelease(FreeSpaceLock);
+ 
+     /* Reset local info */
+     InitLocalFreeSpaceInfo();
+ }
+ 
  /*
   * RecordAndGetPageWithFreeSpace - update info about a page and try again.
   *
Index: src/include/access/heapam.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/access/heapam.h,v
retrieving revision 1.100
diff -c -c -r1.100 heapam.h
*** src/include/access/heapam.h	28 Apr 2005 21:47:16 -0000	1.100
--- src/include/access/heapam.h	3 Jun 2005 21:59:05 -0000
***************
*** 156,162 ****
  					ItemPointer tid);
  extern void setLastTid(const ItemPointer tid);
  
! extern Oid	heap_insert(Relation relation, HeapTuple tup, CommandId cid);
  extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid,
  			CommandId cid, Snapshot crosscheck, bool wait);
  extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
--- 156,163 ----
  					ItemPointer tid);
  extern void setLastTid(const ItemPointer tid);
  
! extern Oid	heap_insert(Relation relation, HeapTuple tup, 
!                             CommandId cid, bool use_wal);
  extern HTSU_Result heap_delete(Relation relation, ItemPointer tid, ItemPointer ctid,
  			CommandId cid, Snapshot crosscheck, bool wait);
  extern HTSU_Result heap_update(Relation relation, ItemPointer otid, HeapTuple tup,
Index: src/include/access/hio.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/access/hio.h,v
retrieving revision 1.27
diff -c -c -r1.27 hio.h
*** src/include/access/hio.h	31 Dec 2004 22:03:21 -0000	1.27
--- src/include/access/hio.h	3 Jun 2005 21:59:05 -0000
***************
*** 19,24 ****
  extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
  					 HeapTuple tuple);
  extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
! 						  Buffer otherBuffer);
  
  #endif   /* HIO_H */
--- 19,24 ----
  extern void RelationPutHeapTuple(Relation relation, Buffer buffer,
  					 HeapTuple tuple);
  extern Buffer RelationGetBufferForTuple(Relation relation, Size len,
! 						  Buffer otherBuffer, bool use_wal);
  
  #endif   /* HIO_H */
Index: src/include/nodes/execnodes.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/nodes/execnodes.h,v
retrieving revision 1.133
diff -c -c -r1.133 execnodes.h
*** src/include/nodes/execnodes.h	14 May 2005 21:29:23 -0000	1.133
--- src/include/nodes/execnodes.h	3 Jun 2005 21:59:08 -0000
***************
*** 305,310 ****
--- 305,311 ----
  												 * elt */
  	JunkFilter *es_junkFilter;	/* currently active junk filter */
  	Relation	es_into_relation_descriptor;	/* for SELECT INTO */
+     bool        es_into_relation_use_wal;
  
  	/* Parameter info: */
  	ParamListInfo es_param_list_info;	/* values of external params */
Index: src/include/storage/freespace.h
===================================================================
RCS file: /projects/cvsroot/pgsql/src/include/storage/freespace.h,v
retrieving revision 1.17
diff -c -c -r1.17 freespace.h
*** src/include/storage/freespace.h	31 Dec 2004 22:03:42 -0000	1.17
--- src/include/storage/freespace.h	3 Jun 2005 21:59:08 -0000
***************
*** 63,68 ****
--- 63,72 ----
  extern void DumpFreeSpaceMap(int code, Datum arg);
  extern void LoadFreeSpaceMap(void);
  
+ extern void InitLocalFreeSpaceInfo(void);
+ extern void AccumulateLocalFreeSpaceInfo(Size spaceNeeded);
+ extern void RecordFreeSpaceInfo(RelFileNode *rel);
+ 
  #ifdef FREESPACE_DEBUG
  extern void DumpFreeSpace(void);
  #endif
---------------------------(end of broadcast)---------------------------
TIP 2: you can get off all lists at once with the unregister command
    (send "unregister YourEmailAddressHere" to [EMAIL PROTECTED])

Reply via email to