The following patch implements a fairly light set of timing statements
aimed at understanding external sort performance. There is no attempt to
alter the algorithms.

Each major point in the algorithms is marked as shown in this example:

postgres=# set debug_sort=true;
SET

postgres=# explain analyze select * from test2 order by col1,col2;

NOTICE:  tuplesort begin  work_mem= 1024
NOTICE:  +0 secs  heap sort  nkeys= 2
NOTICE:  +0 secs  switching to external sort
NOTICE:  +1129 secs  starting build of next run
NOTICE:  +2229 secs  run building complete  nruns= 2
NOTICE:  +2229 secs  merging runs with 6 tapes

...

NOTICE:  +3036 secs  starting final merge

I'll add other information, as requested.

The "6 tapes" is currently hardcoded, though is included in expectation
of implementing variable numbers of tapes.

I'm not sure if I got the header file correct for full portability of
gettimeofday(). Please correct me, if this is the case.

Please post sort performance data back via this post.

Best Regards, Simon Riggs
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.289
diff -c -c -r1.289 guc.c
*** src/backend/utils/misc/guc.c	24 Sep 2005 22:54:39 -0000	1.289
--- src/backend/utils/misc/guc.c	2 Oct 2005 18:00:45 -0000
***************
*** 92,97 ****
--- 92,98 ----
  extern int	CommitSiblings;
  extern char *default_tablespace;
  extern bool	fullPageWrites;
+ extern bool debug_sort;
  
  static const char *assign_log_destination(const char *value,
  					   bool doit, GucSource source);
***************
*** 764,770 ****
  		false, NULL, NULL
  	},
  #endif
! 
  	{
  		{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
  			gettext_noop("Logs the host name in the connection logs."),
--- 765,779 ----
  		false, NULL, NULL
  	},
  #endif
! 	{
! 		{"debug_sort", PGC_USERSET, DEVELOPER_OPTIONS,
! 			gettext_noop("no description available"),
! 			NULL,
! 			GUC_NOT_IN_SAMPLE
! 		},
! 		&debug_sort,
! 		false, NULL, NULL
! 	},
  	{
  		{"log_hostname", PGC_SIGHUP, LOGGING_WHAT,
  			gettext_noop("Logs the host name in the connection logs."),
Index: src/backend/utils/sort/tuplesort.c
===================================================================
RCS file: /projects/cvsroot/pgsql/src/backend/utils/sort/tuplesort.c,v
retrieving revision 1.50
diff -c -c -r1.50 tuplesort.c
*** src/backend/utils/sort/tuplesort.c	23 Sep 2005 15:36:57 -0000	1.50
--- src/backend/utils/sort/tuplesort.c	2 Oct 2005 18:00:46 -0000
***************
*** 394,399 ****
--- 394,402 ----
   */
  static Tuplesortstate *qsort_tuplesortstate;
  
+ /* debug variables */
+ bool debug_sort = false;
+ struct timeval start_t, current_t;
  
  /*
   *		tuplesort_begin_xxx
***************
*** 422,427 ****
--- 425,437 ----
  
  	state = (Tuplesortstate *) palloc0(sizeof(Tuplesortstate));
  
+     if (debug_sort)
+ 	{
+     	ereport(NOTICE,
+ 			(errmsg("tuplesort begin  work_mem= %u", workMem)));
+        	gettimeofday(&start_t, NULL);
+     }
+ 
  	state->status = TSS_INITIAL;
  	state->randomAccess = randomAccess;
  	state->availMem = workMem * 1024L;
***************
*** 454,459 ****
--- 464,473 ----
  	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
  	int			i;
  
+     if (debug_sort)
+ 		ereport(NOTICE,
+ 			(errmsg("+0 secs  heap sort  nkeys= %u", nkeys)));
+ 
  	AssertArg(nkeys > 0);
  
  	state->comparetup = comparetup_heap;
***************
*** 499,504 ****
--- 513,522 ----
  {
  	Tuplesortstate *state = tuplesort_begin_common(workMem, randomAccess);
  
+     if (debug_sort)
+ 		ereport(NOTICE,
+ 			(errmsg("+0 secs  index sort")));
+ 
  	state->comparetup = comparetup_index;
  	state->copytup = copytup_index;
  	state->writetup = writetup_index;
***************
*** 666,671 ****
--- 684,697 ----
  			/*
  			 * Nope; time to switch to tape-based operation.
  			 */
+             if (debug_sort)
+             {
+            		gettimeofday(&current_t, NULL);
+         		ereport(NOTICE,
+         			(errmsg("+%ld secs  switching to external sort", 
+         					(long) (current_t.tv_sec - start_t.tv_sec),
+                             state->currentRun)));
+             }
  			inittapes(state);
  
  			/*
***************
*** 1042,1047 ****
--- 1068,1082 ----
  	int			j;
  	int			a;
  
+     if (debug_sort)
+     {
+    		gettimeofday(&current_t, NULL);
+ 		ereport(NOTICE,
+ 			(errmsg("+%ld secs  starting build of next run", 
+ 					(long) (current_t.tv_sec - start_t.tv_sec),
+                     state->currentRun)));
+     }
+ 
  	/* Step D3: advance j (destTape) */
  	if (state->tp_dummy[state->destTape] < state->tp_dummy[state->destTape + 1])
  	{
***************
*** 1082,1087 ****
--- 1117,1131 ----
  	Assert(state->status == TSS_BUILDRUNS);
  	Assert(state->memtupcount == 0);
  
+     if (debug_sort)
+     {
+    		gettimeofday(&current_t, NULL);
+ 		ereport(NOTICE,
+ 			(errmsg("+%ld secs  run building complete  nruns= %u", 
+ 					(long) (current_t.tv_sec - start_t.tv_sec),
+                     state->currentRun)));
+     }
+ 
  	/*
  	 * If we produced only one initial run (quite likely if the total data
  	 * volume is between 1X and 2X workMem), we can just use that tape as
***************
*** 1093,1100 ****
--- 1137,1158 ----
  		/* must freeze and rewind the finished output tape */
  		LogicalTapeFreeze(state->tapeset, state->result_tape);
  		state->status = TSS_SORTEDONTAPE;
+         if (debug_sort)
+         {
+        		gettimeofday(&current_t, NULL);
+        		ereport(NOTICE,
+        			(errmsg("+%ld secs  no final merge required", 
+    					(long) (current_t.tv_sec - start_t.tv_sec))));
+         }
  		return;
  	}
+     if (debug_sort)
+     {
+    		gettimeofday(&current_t, NULL);
+    		ereport(NOTICE,
+    			(errmsg("+%ld secs  merging runs with 6 tapes", 
+     			(long) (current_t.tv_sec - start_t.tv_sec))));
+     }
  
  	/* End of step D2: rewind all output tapes to prepare for merging */
  	for (tapenum = 0; tapenum < TAPERANGE; tapenum++)
***************
*** 1126,1131 ****
--- 1184,1196 ----
  				/* Initialize for the final merge pass */
  				beginmerge(state);
  				state->status = TSS_FINALMERGE;
+                 if (debug_sort)
+                 {
+                		gettimeofday(&current_t, NULL);
+             		ereport(NOTICE,
+             			(errmsg("+%ld secs  starting final merge", 
+         					(long) (current_t.tv_sec - start_t.tv_sec))));
+                 }
  				return;
  			}
  			if (allDummy)
***************
*** 1177,1182 ****
--- 1242,1254 ----
  	state->result_tape = state->tp_tapenum[TAPERANGE];
  	LogicalTapeFreeze(state->tapeset, state->result_tape);
  	state->status = TSS_SORTEDONTAPE;
+     if (debug_sort)
+     {
+     	gettimeofday(&current_t, NULL);
+    		ereport(NOTICE,
+         	(errmsg("+%ld secs  starting final merge", 
+    					(long) (current_t.tv_sec - start_t.tv_sec))));
+     }
  }
  
  /*
***************
*** 1243,1248 ****
--- 1315,1327 ----
  	 */
  	markrunend(state, destTape);
  	state->tp_runs[TAPERANGE]++;
+     if (debug_sort)
+     {
+     	gettimeofday(&current_t, NULL);
+    		ereport(NOTICE,
+         	(errmsg("+%ld secs  intermediate merge", 
+    					(long) (current_t.tv_sec - start_t.tv_sec))));
+     }
  }
  
  /*
---------------------------(end of broadcast)---------------------------
TIP 1: if posting/reading through Usenet, please send an appropriate
       subscribe-nomail command to [EMAIL PROTECTED] so that your
       message can get through to the mailing list cleanly

Reply via email to