Per recent discussion on pgsql-performance, and per discussion on
-hackers that it might not be too late for small patches after all,
here is a patch (as yet without documentation) which adds some
additional instrumentation to EXPLAIN for hashes: number of buckets,
number of batches, original number of batches, and peak memory
utilization.  Thoughts?

I was also thinking about the possibility of adding a new option
called "output" and making that control whether the "Output" line gets
printed.  It's kind of annoying to use EXPLAIN (ANALYZE, VERBOSE)
right now (and moreso with this patch) specifically because of that
line, which is quite... verbose.  If we're going to change it ever we
should do it for 9.0, since we've made a lot of other changes that
people will be adjusting for anyhow.

Also, do we want to change the schema URL?  The existing URL was
suggested by Peter but IIRC there was some thought that it might not
be the best choice.

http://www.postgresql.org/2009/explain

...Robert
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 18ddeec..fca9ba4 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -20,6 +20,7 @@
 #include "commands/explain.h"
 #include "commands/prepare.h"
 #include "commands/trigger.h"
+#include "executor/hashjoin.h"
 #include "executor/instrument.h"
 #include "optimizer/clauses.h"
 #include "optimizer/planner.h"
@@ -67,6 +68,7 @@ static void show_upper_qual(List *qual, const char *qlabel, Plan *plan,
 				ExplainState *es);
 static void show_sort_keys(Plan *sortplan, ExplainState *es);
 static void show_sort_info(SortState *sortstate, ExplainState *es);
+static void show_hash_info(HashState *hashstate, ExplainState *es);
 static const char *explain_get_index_name(Oid indexId);
 static void ExplainScanTarget(Scan *plan, ExplainState *es);
 static void ExplainMemberNodes(List *plans, PlanState **planstate,
@@ -1052,6 +1054,9 @@ ExplainNode(Plan *plan, PlanState *planstate,
 							"One-Time Filter", plan, es);
 			show_upper_qual(plan->qual, "Filter", plan, es);
 			break;
+		case T_Hash:
+			show_hash_info((HashState *) planstate, es);
+			break;
 		default:
 			break;
 	}
@@ -1405,6 +1410,37 @@ show_sort_info(SortState *sortstate, ExplainState *es)
 }
 
 /*
+ * Show information on hash buckets/batches.
+ */
+static void
+show_hash_info(HashState *hashstate, ExplainState *es)
+{
+	HashJoinTable hashtable;
+
+	Assert(IsA(hashstate, HashState));
+	hashtable = hashstate->hashtable;
+
+	if (es->verbose && hashtable)
+	{
+		long spacePeakKb = (hashtable->spacePeak + 1023) / 1024;
+		ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es);
+		ExplainPropertyLong("Hash Batches", hashtable->nbatch, es);
+		ExplainPropertyLong("Original Hash Batches",
+			hashtable->nbatch_original, es);
+		if (es->format == EXPLAIN_FORMAT_TEXT)
+		{
+			appendStringInfoSpaces(es->str, es->indent * 2);
+			appendStringInfo(es->str, "Peak Memory Usage: %ldkB\n",
+							 spacePeakKb);
+		}
+		else
+		{
+			ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es);
+		}
+	}
+}
+
+/*
  * Fetch the name of an index in an EXPLAIN
  *
  * We allow plugins to get control here so that plans involving hypothetical
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index ef55b20..7ca387b 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -287,6 +287,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators)
 	hashtable->innerBatchFile = NULL;
 	hashtable->outerBatchFile = NULL;
 	hashtable->spaceUsed = 0;
+	hashtable->spacePeak = 0;
 	hashtable->spaceAllowed = work_mem * 1024L;
 	hashtable->spaceUsedSkew = 0;
 	hashtable->spaceAllowedSkew =
@@ -719,6 +720,8 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		hashTuple->next = hashtable->buckets[bucketno];
 		hashtable->buckets[bucketno] = hashTuple;
 		hashtable->spaceUsed += hashTupleSize;
+		if (hashtable->spaceUsed > hashtable->spacePeak)
+			hashtable->spacePeak = hashtable->spaceUsed;
 		if (hashtable->spaceUsed > hashtable->spaceAllowed)
 			ExecHashIncreaseNumBatches(hashtable);
 	}
@@ -1071,6 +1074,8 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
 			+ mcvsToUse * sizeof(int);
 		hashtable->spaceUsedSkew += nbuckets * sizeof(HashSkewBucket *)
 			+ mcvsToUse * sizeof(int);
+		if (hashtable->spaceUsed > hashtable->spacePeak)
+			hashtable->spacePeak = hashtable->spaceUsed;
 
 		/*
 		 * Create a skew bucket for each MCV hash value.
@@ -1119,6 +1124,8 @@ ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node, int mcvsToUse)
 			hashtable->nSkewBuckets++;
 			hashtable->spaceUsed += SKEW_BUCKET_OVERHEAD;
 			hashtable->spaceUsedSkew += SKEW_BUCKET_OVERHEAD;
+			if (hashtable->spaceUsed > hashtable->spacePeak)
+				hashtable->spacePeak = hashtable->spaceUsed;
 		}
 
 		free_attstatsslot(node->skewColType,
@@ -1205,6 +1212,8 @@ ExecHashSkewTableInsert(HashJoinTable hashtable,
 	/* Account for space used, and back off if we've used too much */
 	hashtable->spaceUsed += hashTupleSize;
 	hashtable->spaceUsedSkew += hashTupleSize;
+	if (hashtable->spaceUsed > hashtable->spacePeak)
+		hashtable->spacePeak = hashtable->spaceUsed;
 	while (hashtable->spaceUsedSkew > hashtable->spaceAllowedSkew)
 		ExecHashRemoveNextSkewBucket(hashtable);
 
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index 2ecea40..a1938dc 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -149,6 +149,7 @@ typedef struct HashJoinTableData
 
 	Size		spaceUsed;		/* memory space currently used by tuples */
 	Size		spaceAllowed;	/* upper limit for space used */
+	Size		spacePeak;		/* peak space used */
 	Size		spaceUsedSkew;	/* skew hash table's current space usage */
 	Size		spaceAllowedSkew;		/* upper limit for skew hashtable */
 
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to