This is an update to my earlier patch to add getrusage resource
tracking to EXPLAIN ANALYZE.

With this patch you get something like:

                                                  QUERY PLAN
--------------------------------------------------------------------------------------------------------------
 Seq Scan on i  (cost=0.00..6919.44 rows=262144 width=101) (actual
time=17.240..1123.751 rows=262144 loops=1)
   Resources: sys=210.000ms user=430.000ms read=33.6MB
   Buffers: shared read=4298
 Total runtime: 1548.651 ms
(4 rows)

The main change is to make it work under Windows. At least I think the
changes should make it work under Windows, I haven't been able to test
it. Actually I'm not to happy with the way I did it, I would be more
inclined to hack the getrusagestub,h definition of struct rusage to
have an instr_time in it so that we can use the same macros directly.
But that's more changes than I would be happy making without being
able to compile them to test them.


-- 
greg
*** a/src/backend/commands/explain.c
--- b/src/backend/commands/explain.c
***************
*** 110,115 **** static void ExplainJSONLineEnding(ExplainState *es);
--- 110,116 ----
  static void ExplainYAMLLineStarting(ExplainState *es);
  static void escape_json(StringInfo buf, const char *str);
  static void escape_yaml(StringInfo buf, const char *str);
+ static double normalize_memory(double amount, char **unit, int *precision);
  
  
  
***************
*** 142,147 **** ExplainQuery(ExplainStmt *stmt, const char *queryString,
--- 143,150 ----
  			es.costs = defGetBoolean(opt);
  		else if (strcmp(opt->defname, "buffers") == 0)
  			es.buffers = defGetBoolean(opt);
+ 		else if (strcmp(opt->defname, "resource") == 0)
+ 			es.rusage = defGetBoolean(opt);
  		else if (strcmp(opt->defname, "format") == 0)
  		{
  			char	   *p = defGetString(opt);
***************
*** 368,373 **** ExplainOnePlan(PlannedStmt *plannedstmt, ExplainState *es,
--- 371,378 ----
  		instrument_option |= INSTRUMENT_TIMER;
  	if (es->buffers)
  		instrument_option |= INSTRUMENT_BUFFERS;
+ 	if (es->rusage)
+ 		instrument_option |= INSTRUMENT_RUSAGE;
  
  	/*
  	 * Use a snapshot with an updated command ID to ensure this query sees
***************
*** 1098,1103 **** ExplainNode(PlanState *planstate, List *ancestors,
--- 1103,1199 ----
  			break;
  	}
  
+ 	/* Show resource usage from getrusage */
+ 	if (es->rusage && es->format == EXPLAIN_FORMAT_TEXT)
+ 	{
+ 		const struct rusage *usage = &planstate->instrument->rusage;
+ 
+ 		bool has_rusage = (!INSTR_TIME_IS_ZERO_TV(usage->ru_stime) || 
+ 						   !INSTR_TIME_IS_ZERO_TV(usage->ru_utime) || 
+ 						   usage->ru_inblock > 0 || 
+ 						   usage->ru_oublock > 0);
+ 		bool has_verbose_rusage = (usage->ru_minflt   > 0 || 
+ 								   usage->ru_majflt   > 0 ||
+ 								   usage->ru_nswap    > 0 || 
+ 								   usage->ru_msgsnd   > 0 || 
+ 								   usage->ru_msgrcv   > 0 || 
+ 								   usage->ru_nsignals > 0 || 
+ 								   usage->ru_nvcsw    > 0 || 
+ 								   usage->ru_nivcsw   > 0);
+ 
+ 		if (has_rusage || (es->verbose && has_verbose_rusage))
+ 		{
+ 			appendStringInfoSpaces(es->str, es->indent *2);
+ 			appendStringInfoString(es->str, "Resources:");
+ 			
+ 			if (!INSTR_TIME_IS_ZERO_TV(usage->ru_stime)) 
+ 			{
+ 				double stime = INSTR_TIME_GET_DOUBLE_TV(usage->ru_stime);
+ 				appendStringInfo(es->str, " sys=%.3fms", stime * 1000);
+ 			}
+ 			
+ 			if (!INSTR_TIME_IS_ZERO_TV(usage->ru_utime)) 
+ 			{
+ 				double utime = INSTR_TIME_GET_DOUBLE_TV(usage->ru_utime);
+ 				appendStringInfo(es->str, " user=%.3fms", utime * 1000);
+ 			}
+ 			
+ 			if (usage->ru_inblock > 0)
+ 			{
+ 				double inblock;
+ 				char *units;
+ 				int prec;
+ 				inblock = normalize_memory((double)usage->ru_inblock * 512, &units, &prec);
+ 				appendStringInfo(es->str, " read=%.*f%s", prec, inblock, units);
+ 			}
+ 			if (usage->ru_oublock > 0)
+ 			{
+ 				double oublock;
+ 				char *units;
+ 				int prec;
+ 				oublock = normalize_memory((double)usage->ru_oublock * 512, &units, &prec);
+ 				appendStringInfo(es->str, " written=%.*f%s", prec, oublock, units);
+ 			}
+ 			if (es->verbose)
+ 			{
+ 				if (usage->ru_minflt > 0)
+ 					appendStringInfo(es->str, " minflt=%ld", usage->ru_minflt);
+ 				if (usage->ru_majflt > 0)
+ 					appendStringInfo(es->str, " majflt=%ld", usage->ru_majflt);
+ 				if (usage->ru_nswap > 0)
+ 					appendStringInfo(es->str, " nswap=%ld", usage->ru_nswap);
+ 				if (usage->ru_msgsnd > 0)
+ 					appendStringInfo(es->str, " msgsnd=%ld", usage->ru_msgsnd);
+ 				if (usage->ru_msgrcv > 0)
+ 					appendStringInfo(es->str, " msgrcv=%ld", usage->ru_msgrcv);
+ 				if (usage->ru_nsignals > 0)
+ 					appendStringInfo(es->str, " nsignals=%ld", usage->ru_nsignals);
+ 				if (usage->ru_nvcsw > 0)
+ 					appendStringInfo(es->str, " nvcsw=%ld", usage->ru_nvcsw);
+ 				if (usage->ru_nivcsw > 0)
+ 					appendStringInfo(es->str, " nivcsw=%ld", usage->ru_nivcsw);
+ 			}
+ 			appendStringInfoChar(es->str, '\n');
+ 		}
+ 	} 
+ 	else if (es->rusage) 
+ 	{
+ 		const struct rusage *usage = &planstate->instrument->rusage;
+ 
+ 		ExplainPropertyFloat("User Time", INSTR_TIME_GET_DOUBLE_TV(usage->ru_utime), 3, es);
+ 		ExplainPropertyFloat("System Time", INSTR_TIME_GET_DOUBLE_TV(usage->ru_stime), 3, es);
+ 		ExplainPropertyLong("Minor Page Faults", usage->ru_minflt, es);
+ 		ExplainPropertyLong("Major Page Faults", usage->ru_majflt, es);
+ 		ExplainPropertyLong("Swaps", usage->ru_nswap, es);
+ 		ExplainPropertyLong("Blocks Written", usage->ru_inblock, es);
+ 		ExplainPropertyLong("Blocks Read", usage->ru_oublock, es);
+ 		ExplainPropertyLong("Messages Sent", usage->ru_msgsnd, es);
+ 		ExplainPropertyLong("Messages Received", usage->ru_msgrcv, es);
+ 		ExplainPropertyLong("Signals Received", usage->ru_nsignals, es);
+ 		ExplainPropertyLong("Voluntary Context Switches", usage->ru_nvcsw, es);
+ 		ExplainPropertyLong("Involuntary Context Switches", usage->ru_nivcsw, es);
+ 	}	
+ 
  	/* Show buffer usage */
  	if (es->buffers)
  	{
***************
*** 2216,2218 **** escape_yaml(StringInfo buf, const char *str)
--- 2312,2347 ----
  {
  	escape_json(buf, str);
  }
+ 
+ /*
+  * For a quantity of bytes pick a reasonable display unit for it and
+  * return the quantity in that unit. Also return the unit name and a
+  * reasonable precision via the reference parameters.
+  */
+ 
+ static double normalize_memory(double amount, char **unit, int *precision)
+ {
+ 	static char *units[] = {"bytes", "kB", "MB", "GB", "TB", "PB"};
+ 	char **u = units, **last = units + (sizeof(units)/sizeof(*units)-1);
+ 
+ 	while (amount > 1024.0 && u < last)
+ 	{
+ 		amount /= 1024.0;
+ 		u += 1;
+ 	}
+ 
+ 	*unit = *u;
+ 
+ 	/* if it's bytes or kB then don't print decimals since that's less
+ 	 * than blocksize, otherwise always print 3 significant digits */
+ 	if (u == units || u == units+1 )
+ 		*precision = 0;
+ 	else if (amount < 10)
+ 		*precision = 2;
+ 	else if (amount < 100)
+ 		*precision = 1;
+ 	else
+ 		*precision = 0;
+ 
+ 	return amount;
+ }
*** a/src/backend/executor/instrument.c
--- b/src/backend/executor/instrument.c
***************
*** 21,26 **** BufferUsage pgBufferUsage;
--- 21,29 ----
  
  static void BufferUsageAccumDiff(BufferUsage *dst,
  					 const BufferUsage *add, const BufferUsage *sub);
+ static void ResourceUsageAccumDiff(struct rusage *dst,
+ 					   const struct rusage *add,
+ 					   const struct rusage *sub);
  
  /* Allocate new instrumentation structure(s) */
  Instrumentation *
***************
*** 40,45 **** InstrAlloc(int n, int instrument_options)
--- 43,56 ----
  			instr[i].needs_bufusage = true;
  	}
  
+ 	if (instrument_options & INSTRUMENT_RUSAGE)
+ 	{
+ 		int			i;
+ 
+ 		for (i = 0; i < n; i++)
+ 			instr[i].needs_rusage = true;
+ 	}
+ 
  	return instr;
  }
  
***************
*** 55,60 **** InstrStartNode(Instrumentation *instr)
--- 66,73 ----
  	/* initialize buffer usage per plan node */
  	if (instr->needs_bufusage)
  		instr->bufusage_start = pgBufferUsage;
+ 	if (instr->needs_rusage)
+ 		getrusage(RUSAGE_SELF, &instr->rusage_start);
  }
  
  /* Exit from a plan node */
***************
*** 82,87 **** InstrStopNode(Instrumentation *instr, double nTuples)
--- 95,118 ----
  		BufferUsageAccumDiff(&instr->bufusage,
  							 &pgBufferUsage, &instr->bufusage_start);
  
+ 	if (instr->needs_rusage)
+ 	{
+ 		struct rusage rusage;
+ 
+ 		if (INSTR_TIME_IS_ZERO_TV(instr->rusage_start.ru_utime) ||
+ 			INSTR_TIME_IS_ZERO_TV(instr->rusage_start.ru_stime))
+ 		{
+ 			elog(DEBUG2, "InstrStopNode called without rusage start");
+ 		}
+ 
+ 		getrusage(RUSAGE_SELF, &rusage);
+ 		ResourceUsageAccumDiff(&instr->rusage,
+ 							   &rusage,
+ 							   &instr->rusage_start);
+ 		INSTR_TIME_SET_ZERO_TV(instr->rusage_start.ru_utime);
+ 		INSTR_TIME_SET_ZERO_TV(instr->rusage_start.ru_stime);
+ 	}
+ 
  	/* Is this the first tuple of this cycle? */
  	if (!instr->running)
  	{
***************
*** 134,136 **** BufferUsageAccumDiff(BufferUsage *dst,
--- 165,186 ----
  	dst->temp_blks_read += add->temp_blks_read - sub->temp_blks_read;
  	dst->temp_blks_written += add->temp_blks_written - sub->temp_blks_written;
  }
+ 
+ static void
+ ResourceUsageAccumDiff(struct rusage *dst,
+ 					   const struct rusage *add,
+ 					   const struct rusage *sub)
+ {
+ 	INSTR_TIME_ACCUM_DIFF_TV(dst->ru_utime, add->ru_utime, sub->ru_utime);
+ 	INSTR_TIME_ACCUM_DIFF_TV(dst->ru_stime, add->ru_stime, sub->ru_stime);
+ 	dst->ru_minflt   += add->ru_minflt   - sub->ru_minflt;
+ 	dst->ru_majflt   += add->ru_majflt   - sub->ru_majflt;
+ 	dst->ru_nswap    += add->ru_nswap    - sub->ru_nswap;
+ 	dst->ru_inblock  += add->ru_inblock  - sub->ru_inblock;
+ 	dst->ru_oublock  += add->ru_oublock  - sub->ru_oublock;
+ 	dst->ru_msgsnd   += add->ru_msgsnd   - sub->ru_msgsnd;
+ 	dst->ru_msgrcv   += add->ru_msgrcv   - sub->ru_msgrcv;
+ 	dst->ru_nsignals += add->ru_nsignals - sub->ru_nsignals;
+ 	dst->ru_nvcsw    += add->ru_nvcsw    - sub->ru_nvcsw;
+ 	dst->ru_nivcsw   += add->ru_nivcsw   - sub->ru_nivcsw;
+ }
*** a/src/include/commands/explain.h
--- b/src/include/commands/explain.h
***************
*** 31,36 **** typedef struct ExplainState
--- 31,37 ----
  	bool		analyze;		/* print actual times */
  	bool		costs;			/* print costs */
  	bool		buffers;		/* print buffer usage */
+ 	bool		rusage;			/* print resource usage (from getrusage) */
  	ExplainFormat format;		/* output format */
  	/* other states */
  	PlannedStmt *pstmt;			/* top of plan */
*** a/src/include/executor/instrument.h
--- b/src/include/executor/instrument.h
***************
*** 15,20 ****
--- 15,22 ----
  
  #include "portability/instr_time.h"
  
+ #include <sys/time.h>
+ #include <sys/resource.h>
  
  typedef struct BufferUsage
  {
***************
*** 32,37 **** typedef enum InstrumentOption
--- 34,40 ----
  {
  	INSTRUMENT_TIMER = 1 << 0,	/* needs timer */
  	INSTRUMENT_BUFFERS = 1 << 1,	/* needs buffer usage */
+ 	INSTRUMENT_RUSAGE = 1 << 2,	/* needs resource usage (getrusage) */
  	INSTRUMENT_ALL = 0x7FFFFFFF
  } InstrumentOption;
  
***************
*** 40,56 **** typedef struct Instrumentation
--- 43,62 ----
  	/* Info about current plan cycle: */
  	bool		running;		/* TRUE if we've completed first tuple */
  	bool		needs_bufusage; /* TRUE if we need buffer usage */
+ 	bool		needs_rusage;   /* TRUE if we need resource usage */
  	instr_time	starttime;		/* Start time of current iteration of node */
  	instr_time	counter;		/* Accumulated runtime for this node */
  	double		firsttuple;		/* Time for first tuple of this cycle */
  	double		tuplecount;		/* Tuples emitted so far this cycle */
  	BufferUsage bufusage_start; /* Buffer usage at start */
+ 	struct rusage rusage_start; /* Resource usage at start (from getrusage) */
  	/* Accumulated statistics across all completed cycles: */
  	double		startup;		/* Total startup time (in seconds) */
  	double		total;			/* Total total time (in seconds) */
  	double		ntuples;		/* Total tuples produced */
  	double		nloops;			/* # of run cycles for this node */
  	BufferUsage bufusage;		/* Total buffer usage */
+ 	struct rusage rusage;		/* Total resource usage (from getrusage) */ 
  } Instrumentation;
  
  extern PGDLLIMPORT BufferUsage pgBufferUsage;
*** a/src/include/portability/instr_time.h
--- b/src/include/portability/instr_time.h
***************
*** 9,14 ****
--- 9,18 ----
   * instead.  These macros also give some breathing room to use other
   * high-precision-timing APIs on yet other platforms.
   *
+  * It exports its macros for manipulating struct timeval with names
+  * ending in _TV. This is for the benefit of other routines that have
+  * to manipulate struct timevals (such as those in getrusage)
+ 
   * The basic data type is instr_time, which all callers should treat as an
   * opaque typedef.	instr_time can store either an absolute time (of
   * unspecified reference time) or an interval.	The operations provided
***************
*** 52,70 ****
  #ifndef INSTR_TIME_H
  #define INSTR_TIME_H
  
- #ifndef WIN32
- 
  #include <sys/time.h>
  
  typedef struct timeval instr_time;
  
! #define INSTR_TIME_IS_ZERO(t)	((t).tv_usec == 0 && (t).tv_sec == 0)
  
! #define INSTR_TIME_SET_ZERO(t)	((t).tv_sec = 0, (t).tv_usec = 0)
  
! #define INSTR_TIME_SET_CURRENT(t)	gettimeofday(&(t), NULL)
  
! #define INSTR_TIME_ADD(x,y) \
  	do { \
  		(x).tv_sec += (y).tv_sec; \
  		(x).tv_usec += (y).tv_usec; \
--- 56,72 ----
  #ifndef INSTR_TIME_H
  #define INSTR_TIME_H
  
  #include <sys/time.h>
  
  typedef struct timeval instr_time;
  
! #define INSTR_TIME_IS_ZERO_TV(t)	((t).tv_usec == 0 && (t).tv_sec == 0)
  
! #define INSTR_TIME_SET_ZERO_TV(t)	((t).tv_sec = 0, (t).tv_usec = 0)
  
! #define INSTR_TIME_SET_CURRENT_TV(t)	gettimeofday(&(t), NULL)
  
! #define INSTR_TIME_ADD_TV(x,y) \
  	do { \
  		(x).tv_sec += (y).tv_sec; \
  		(x).tv_usec += (y).tv_usec; \
***************
*** 76,82 **** typedef struct timeval instr_time;
  		} \
  	} while (0)
  
! #define INSTR_TIME_SUBTRACT(x,y) \
  	do { \
  		(x).tv_sec -= (y).tv_sec; \
  		(x).tv_usec -= (y).tv_usec; \
--- 78,84 ----
  		} \
  	} while (0)
  
! #define INSTR_TIME_SUBTRACT_TV(x,y) \
  	do { \
  		(x).tv_sec -= (y).tv_sec; \
  		(x).tv_usec -= (y).tv_usec; \
***************
*** 88,94 **** typedef struct timeval instr_time;
  		} \
  	} while (0)
  
! #define INSTR_TIME_ACCUM_DIFF(x,y,z) \
  	do { \
  		(x).tv_sec += (y).tv_sec - (z).tv_sec; \
  		(x).tv_usec += (y).tv_usec - (z).tv_usec; \
--- 90,96 ----
  		} \
  	} while (0)
  
! #define INSTR_TIME_ACCUM_DIFF_TV(x,y,z) \
  	do { \
  		(x).tv_sec += (y).tv_sec - (z).tv_sec; \
  		(x).tv_usec += (y).tv_usec - (z).tv_usec; \
***************
*** 105,119 **** typedef struct timeval instr_time;
  		} \
  	} while (0)
  
! #define INSTR_TIME_GET_DOUBLE(t) \
  	(((double) (t).tv_sec) + ((double) (t).tv_usec) / 1000000.0)
  
! #define INSTR_TIME_GET_MILLISEC(t) \
  	(((double) (t).tv_sec * 1000.0) + ((double) (t).tv_usec) / 1000.0)
  
! #define INSTR_TIME_GET_MICROSEC(t) \
  	(((uint64) (t).tv_sec * (uint64) 1000000) + (uint64) (t).tv_usec)
! #else							/* WIN32 */
  
  typedef LARGE_INTEGER instr_time;
  
--- 107,134 ----
  		} \
  	} while (0)
  
! #define INSTR_TIME_GET_DOUBLE_TV(t) \
  	(((double) (t).tv_sec) + ((double) (t).tv_usec) / 1000000.0)
  
! #define INSTR_TIME_GET_MILLISEC_TV(t) \
  	(((double) (t).tv_sec * 1000.0) + ((double) (t).tv_usec) / 1000.0)
  
! #define INSTR_TIME_GET_MICROSEC_TV(t) \
  	(((uint64) (t).tv_sec * (uint64) 1000000) + (uint64) (t).tv_usec)
! 
! #ifndef WIN32
! 
! #define INSTR_TIME_IS_ZERO(t) INSTR_TIME_IS_ZERO_TV(t)
! #define INSTR_TIME_SET_ZERO(t) INSTR_TIME_SET_ZERO_TV(t)
! #define INSTR_TIME_SET_CURRENT(t) INSTR_TIME_SET_CURRENT_TV(t)
! #define INSTR_TIME_ADD(x,y) INSTR_TIME_ADD_TV(x,y)
! #define INSTR_TIME_SUBTRACT(x,y) INSTR_TIME_SUBTRACT_TV(x,y)
! #define INSTR_TIME_ACCUM_DIFF(x,y,z) INSTR_TIME_ACCUM_DIFF_TV(x,y,z)
! #define INSTR_TIME_GET_DOUBLE(t) INSTR_TIME_GET_DOUBLE_TV(t)
! #define INSTR_TIME_GET_MILLISEC(t) INSTR_TIME_GET_MILLISEC_TV(t)
! #define INSTR_TIME_GET_MICROSEC(t) INSTR_TIME_GET_MICROSEC_TV(t)
! 
! #else
  
  typedef LARGE_INTEGER instr_time;
  
*** a/src/include/rusagestub.h
--- b/src/include/rusagestub.h
***************
*** 27,32 **** struct rusage
--- 27,46 ----
  {
  	struct timeval ru_utime;	/* user time used */
  	struct timeval ru_stime;	/* system time used */
+ 	long   ru_maxrss;           /* maximum resident set size */
+ 	long   ru_ixrss;            /* integral shared memory size */
+ 	long   ru_idrss;            /* integral unshared data size */
+ 	long   ru_isrss;            /* integral unshared stack size */
+ 	long   ru_minflt;           /* page reclaims */
+ 	long   ru_majflt;           /* page faults */
+ 	long   ru_nswap;            /* swaps */
+ 	long   ru_inblock;          /* block input operations */
+ 	long   ru_oublock;          /* block output operations */
+ 	long   ru_msgsnd;           /* messages sent */
+ 	long   ru_msgrcv;           /* messages received */
+ 	long   ru_nsignals;         /* signals received */
+ 	long   ru_nvcsw;            /* voluntary context switches */
+ 	long   ru_nivcsw;           /* involuntary context switches */
  };
  
  extern int	getrusage(int who, struct rusage * rusage);
*** a/src/port/getrusage.c
--- b/src/port/getrusage.c
***************
*** 40,45 **** getrusage(int who, struct rusage * rusage)
--- 40,46 ----
  	FILETIME	kerneltime;
  	FILETIME	usertime;
  	ULARGE_INTEGER li;
+ 	IOCOUNTERS iocounters;
  
  	if (who != RUSAGE_SELF)
  	{
***************
*** 61,66 **** getrusage(int who, struct rusage * rusage)
--- 62,73 ----
  		return -1;
  	}
  
+ 	if (GetProcessIoCounters(GetCurrentProcess(), &iocounters) == 0)
+ 	{
+ 		_dosmaperr(GetLastError());
+ 		return -1;
+ 	}
+ 
  	/* Convert FILETIMEs (0.1 us) to struct timeval */
  	memcpy(&li, &kerneltime, sizeof(FILETIME));
  	li.QuadPart /= 10L;			/* Convert to microseconds */
***************
*** 71,76 **** getrusage(int who, struct rusage * rusage)
--- 78,87 ----
  	li.QuadPart /= 10L;			/* Convert to microseconds */
  	rusage->ru_utime.tv_sec = li.QuadPart / 1000000L;
  	rusage->ru_utime.tv_usec = li.QuadPart % 1000000L;
+ 
+ 	rusage->ru_inblock = iocounters.ReadTransferCount/512;
+ 	rusage->ru_oublock = iocounters.WriteTransferCount/512;
+ 
  #else							/* all but WIN32 */
  
  	struct tms	tms;
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to