Hi Thomas,

you are right of course. Thanks!

I have attached a new version of the patch that supports the syntax like
suggested. The previous patch was insonsistent in style indeed.

explain (analyze, serialize)

and

explain (analyze, serialize binary)

That doesn't make too much of a difference for most scenarios I am
certain. However the the seralize option itself does. Mostly because it
performs the detoasting and that was a trap for me in the past with just
plain analyze.


Eg this scenario really is not too far fetched in a world where people
have large JSONB values.


db1=# create table test(id bigint, val text);

db1=# insert into test(val) select string_agg(s::text, ',') from (select
generate_series(1, 10_000_000) as s) as a1;

now we have a cell that has roughly 80Mb in it. A large detoasting that
will happen in reallife but in explain(analyze).

and then...

db1=# explain (analyze) select * from test;
                                            QUERY PLAN
---------------------------------------------------------------------------------------------------
 Seq Scan on test  (cost=0.00..22.00 rows=1200 width=40) (actual
time=0.018..0.020 rows=1 loops=1)
 Planning Time: 0.085 ms
 Execution Time: 0.044 ms
(3 rows)

db1=# explain (analyze, serialize) select * from test;
                                            QUERY PLAN
---------------------------------------------------------------------------------------------------
 Seq Scan on test  (cost=0.00..22.00 rows=1200 width=40) (actual
time=0.023..0.027 rows=1 loops=1)
 Planning Time: 0.077 ms
 Execution Time: 303.281 ms
 Serialized Bytes: 78888953 Bytes. Mode Text. Bandwidth 248.068 MB/sec
(4 rows)

db1=#

So the explain(analyze) does not process the ~80 MB in 0.044ms in any
way of course.

Actually I could print the serialized bytes using 1. grouping-separators
(eg 78_888_953) or 2. in the way pg_size_pretty does it.

If doing it the pg_size_pretty way I am uncertain if it would be ok to
query the actual pg_size_pretty function via its (certainly frozen) oid
of 3166 and do OidFunctionCall1(3166...) to invoke it. Otherwise I'd say
it would be nice if the code from that function would be made available
as a utility function for all c-code.  Any suggestions on this topic?

Regards,

/Stepan


On 02.11.23 18:49, Tomas Vondra wrote:
Hi,

On 9/15/23 22:09, stepan rutz wrote:
Hi,

please see a revised version yesterday's mail. The patch attached now
provides the following:

EXPLAIN(ANALYZE,SERIALIZE)

and

EXPLAIN(ANALYZE,SERIALIZEBINARY)

I haven't looked at the patch in detail yet, but this option name looks
a bit strange/inconsistent. Either it should be SERIALIZE_BINARY (to
match other multi-word options), or maybe there should be just SERIALIZE
with a parameter to determine text/binary (like FORMAT, for example).

So we'd do either

     EXPLAIN (SERIALIZE)
     EXPLAIN (SERIALIZE TEXT)

to get serialization to text (which I guess 99% of people will do), or

     EXPLAIN (SERIALIZE BINARY)

to get binary.


regards

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index f1d71bc54e..bb75ac0b73 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -26,6 +26,7 @@
 #include "nodes/nodeFuncs.h"
 #include "parser/analyze.h"
 #include "parser/parsetree.h"
+#include "mb/pg_wchar.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/bufmgr.h"
 #include "tcop/tcopprot.h"
@@ -33,6 +34,7 @@
 #include "utils/guc_tables.h"
 #include "utils/json.h"
 #include "utils/lsyscache.h"
+#include "utils/numeric.h"
 #include "utils/rel.h"
 #include "utils/ruleutils.h"
 #include "utils/snapmgr.h"
@@ -47,7 +49,6 @@ ExplainOneQuery_hook_type ExplainOneQuery_hook = NULL;
 /* Hook for plugins to get control in explain_get_index_name() */
 explain_get_index_name_hook_type explain_get_index_name_hook = NULL;
 
-
 /* OR-able flags for ExplainXMLTag() */
 #define X_OPENING 0
 #define X_CLOSING 1
@@ -154,7 +155,8 @@ static void ExplainJSONLineEnding(ExplainState *es);
 static void ExplainYAMLLineStarting(ExplainState *es);
 static void escape_yaml(StringInfo buf, const char *str);
 
-
+static DestReceiver *CreateSerializeDestReceiver(int16 format);
+static uint64 GetNumberOfSerializedBytes(DestReceiver *dest);
 
 /*
  * ExplainQuery -
@@ -192,6 +194,23 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 			es->settings = defGetBoolean(opt);
 		else if (strcmp(opt->defname, "generic_plan") == 0)
 			es->generic = defGetBoolean(opt);
+		else if (strcmp(opt->defname, "serialize") == 0)
+		{
+			es->serialize = true;
+			/* optionally check for BINARY as argument */
+			if (opt->arg)
+			{
+				char *p = defGetString(opt);
+				if (strcmp(p, "binary") == 0)
+					es->serialize = es->serializebinary = true;
+				else
+					ereport(ERROR,
+						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+						 errmsg("unrecognized value for EXPLAIN option \"%s\": \"%s\"",
+								opt->defname, p),
+						 parser_errposition(pstate, opt->location)));
+			}
+		}
 		else if (strcmp(opt->defname, "timing") == 0)
 		{
 			timing_set = true;
@@ -244,6 +263,12 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("EXPLAIN option TIMING requires ANALYZE")));
 
+	/* check that serialize is used with EXPLAIN ANALYZE */
+	if (es->serialize && !es->analyze)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("EXPLAIN option SERIALIZE requires ANALYZE")));
+
 	/* check that GENERIC_PLAN is not used with EXPLAIN ANALYZE */
 	if (es->generic && es->analyze)
 		ereport(ERROR,
@@ -535,6 +560,7 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	double		totaltime = 0;
 	int			eflags;
 	int			instrument_option = 0;
+	int64		bytesSend = 0;
 
 	Assert(plannedstmt->commandType != CMD_UTILITY);
 
@@ -565,9 +591,15 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	/*
 	 * Normally we discard the query's output, but if explaining CREATE TABLE
 	 * AS, we'd better use the appropriate tuple receiver.
+	 * Also if we choose to serialize during explain, we need to send the
+	 * tuples to a dest-receiver that performs serializing
+	 * into the protocol receiving format. this implicitly also detoasts
+	 * values.
 	 */
 	if (into)
 		dest = CreateIntoRelDestReceiver(into);
+	else if (es->analyze && es->serialize)
+		dest = CreateSerializeDestReceiver(es->serializebinary ? 1 : 0);
 	else
 		dest = None_Receiver;
 
@@ -606,6 +638,15 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		/* run cleanup too */
 		ExecutorFinish(queryDesc);
 
+		if (es->serialize)
+		{
+			bytesSend = GetNumberOfSerializedBytes(dest);
+		}
+
+
+		/* call the dest receiver's destroy method even during explain */
+		dest->rDestroy(dest);
+
 		/* We can't run ExecutorEnd 'till we're done printing the stats... */
 		totaltime += elapsed_time(&starttime);
 	}
@@ -671,6 +712,18 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		ExplainPropertyFloat("Execution Time", "ms", 1000.0 * totaltime, 3,
 							 es);
 
+	/* Print info about serialization of data */
+	if (es->summary && es->analyze && es->serialize)
+	{
+		char *outputbuf;
+		outputbuf = psprintf("%ld Bytes. Mode %s. Bandwidth %.3f MB/sec",
+			bytesSend,
+			es->serializebinary ? "Binary" : "Text",
+			bytesSend / 1024.0 / 1024.0 / totaltime);
+		ExplainPropertyText("Serialized Bytes", outputbuf, es);
+		pfree(outputbuf);
+	}
+
 	ExplainCloseGroup("Query", NULL, true, es);
 }
 
@@ -5082,3 +5135,238 @@ escape_yaml(StringInfo buf, const char *str)
 {
 	escape_json(buf, str);
 }
+
+
+/* Serializing DestReceiver functions
+ *
+ * EXPLAIN (ANALYZE)  sometimes gives results that unrealistic.
+ * During EXPLAIN(ANALYZE) there is no detoasting of values.
+ * That means you get no idea what your actual query time is going
+ * to be, nor what the actual overhead of detoasting is.
+ *
+ * EXPLAIN (ANAYLZE, SERIALIZE) will perform detoasting implicitly and
+ * will also do count the number of bytes being send to the client but
+ * will not actually send them. SERIALIZE uses text format for all
+ * attributes.
+ *
+ * EXPLAIN (ANAYLZE, SERIALIZE BINARY) serializes using binary format.
+ */
+
+/* an attribute info cached for each column */
+typedef struct
+{								/* Per-attribute information */
+	Oid			typoutput;		/* Oid for the type's text output fn */
+	Oid			typsend;		/* Oid for the type's binary output fn */
+	bool		typisvarlena;	/* is it varlena (ie possibly toastable)? */
+	int16		format;			/* format code for this column */
+	FmgrInfo	finfo;			/* Precomputed call info for output fn */
+} SerializeAttrInfo;
+
+typedef struct
+{
+	/* receiver for the tuples, that just serializes */
+	DestReceiver		destRecevier;
+	MemoryContext		memoryContext;
+	MemoryContext		oldMemoryContext;	/* restore to old when done */
+	int16				format;				/* text of binary */
+	TupleDesc			attrinfo;
+	int					nattrs;
+	uint64				bytesSend;			/* Num of bytes over the wire */
+	SerializeAttrInfo	*infos;				/* Cached info about each attr */
+} SerializeDestReceiver;
+
+/*
+ * Get the lookup info that the row-callback of the receiver needs. this code
+ * is similar to the code from printup.c except that it doesn't do any actual
+ * output.
+ */
+static void
+serialize_prepare_info(SerializeDestReceiver *receiver, TupleDesc typeinfo,
+	int nattrs)
+{
+	int i;
+
+	/* get rid of any old data */
+	if (receiver->infos)
+		pfree(receiver->infos);
+	receiver->infos = NULL;
+
+	receiver->attrinfo = typeinfo;
+	receiver->nattrs = nattrs;
+	if (nattrs <= 0)
+		return;
+
+	receiver->infos = (SerializeAttrInfo *) palloc0(
+		nattrs * sizeof(SerializeAttrInfo));
+	for (i = 0; i < nattrs; i++)
+	{
+		SerializeAttrInfo *info = &(receiver->infos[i]);
+		Form_pg_attribute attr = TupleDescAttr(typeinfo, i);
+		info->format = receiver->format;
+		if (info->format == 0)
+		{
+			getTypeOutputInfo(attr->atttypid,
+							  &info->typoutput,
+							  &info->typisvarlena);
+			fmgr_info(info->typoutput, &info->finfo);
+		}
+		else if (info->format == 1)
+		{
+			getTypeBinaryOutputInfo(attr->atttypid,
+									&info->typsend,
+									&info->typisvarlena);
+			fmgr_info(info->typsend, &info->finfo);
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+					 errmsg("unsupported format code: %d", info->format)));
+	}
+}
+
+
+
+/* this method receives the tuples/records during explain (analyze, serialize)
+ * and serializes them.
+ * this method is otherwise not called and its purpose is to serialize to
+ * make analyze optionally more realistic. serialization includes detoasting
+ * and invoking the type's output (or send) function, which is what happens
+ * when you actually run the query. both steps cost time and thus
+ * analyze can be very misleading if those two steps are omitted.
+ */
+static bool
+serializeAnalyzeReceive(TupleTableSlot *slot, DestReceiver *self)
+{
+	int i;
+	TupleDesc tupdesc;
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+
+	tupdesc  = slot->tts_tupleDescriptor;
+	/* Cache attribute infos and function oid if outdated */
+	if (receiver->attrinfo != tupdesc || receiver->nattrs != tupdesc->natts)
+	{
+		serialize_prepare_info(receiver, tupdesc, tupdesc->natts);
+	}
+
+	/* Fill all of the slot's attributes, we can now use slot->tts_values
+	 * and its tts_isnull array which should be long enough even if added
+	 * a null-column to the table */
+	slot_getallattrs(slot);
+
+	receiver->oldMemoryContext = MemoryContextSwitchTo(
+		receiver->memoryContext);
+
+	/* Iterate over all attributes of the tuple and invoke the output func
+	 * (or send function in case of a binary format). completely ignore the
+	 * result. MemoryContext is free'd via reset at the end of the this
+	 * per-tuple callback anyhow.
+	 * NOTE: when invoking the output function detoasting is implicitly
+	 * performed when needed.
+	 */
+
+	receiver->bytesSend += 3; /* protocol message type and attribute-count */
+	for (i = 0; i < tupdesc->natts; i++)
+	{
+		SerializeAttrInfo *info = &(receiver->infos[i]);
+		Datum attr = slot->tts_values[i];
+		if (slot->tts_isnull[i])
+		{
+			receiver->bytesSend += 4; /* null is send as -1 length */
+		}
+		else
+		{
+			if (info->format == 0)
+			{
+				/* Text output, ignore the result but count it */
+				char* output = OutputFunctionCall(&info->finfo, attr);
+				size_t outputlen = strlen(output);
+				/* convert from server encoding to client encoding if needed */
+				char *converted = pg_server_to_client(output, outputlen);
+				if (converted != output)
+				{
+					/* converted encoding, get new length */
+					outputlen = strlen(converted);
+					pfree(converted);
+				}
+				/* append length that would have gone over the wire, 4
+				 * the number of preceding bytes that contain the length */
+				receiver->bytesSend += 4 + outputlen;
+			}
+			else
+			{
+				/* Binary output, ignore the result but count it
+				 * length is included in varsize */
+				bytea* bytes = SendFunctionCall(&info->finfo, attr);
+				uint64 n = VARSIZE(bytes);
+				receiver->bytesSend += n;
+			}
+		}
+	}
+	MemoryContextSwitchTo(receiver->oldMemoryContext);
+	MemoryContextReset(receiver->memoryContext);
+	return true;
+}
+
+static void
+serializeAnalyzeStartup(DestReceiver *self, int operation, TupleDesc typeinfo)
+{
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+	int i;
+	/* memory context for our work */
+	receiver->memoryContext = AllocSetContextCreate(CurrentMemoryContext,
+		"SerializeTupleReceive", ALLOCSET_DEFAULT_SIZES);
+
+	/* account for the attribute headers for send bytes */
+	receiver->bytesSend += 3; /* protocol message type and attribute-count */
+	for (i = 0; i < typeinfo->natts; ++i)
+	{
+		Form_pg_attribute att = TupleDescAttr(typeinfo, i);
+		char* name = NameStr(att->attname);
+		size_t namelen = strlen(name);
+		/* convert from server encoding to client encoding if needed */
+		char *converted = pg_server_to_client(name, namelen);
+		if (converted != name)
+		{
+			namelen = strlen(converted);
+			pfree(converted);
+		}
+		/* see printtup.h why we add 18 bytes here. These are the infos
+		 * needed for each attribute plus the attributes's name */
+		receiver->bytesSend += namelen + 1 + 18;
+	}
+}
+
+static void
+serializeAnalyzeCleanup(DestReceiver *self)
+{
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) self;
+	MemoryContextDelete(receiver->memoryContext);
+}
+
+static void
+serializeAnalyzeDestroy(DestReceiver *self)
+{
+	pfree(self);
+}
+
+/* format is either 0 (text) or 1 (binary) */
+static DestReceiver *CreateSerializeDestReceiver(int16 format)
+{
+	/* Attention must palloc0 (like calloc) here.
+	 * infos and nattrs are expected to be 0 or NULL initially */
+	SerializeDestReceiver *self = (SerializeDestReceiver*) palloc0(
+		sizeof(SerializeDestReceiver));
+	self->destRecevier.receiveSlot = serializeAnalyzeReceive;
+	self->destRecevier.rStartup = serializeAnalyzeStartup;
+	self->destRecevier.rShutdown = serializeAnalyzeCleanup;
+	self->destRecevier.rDestroy = serializeAnalyzeDestroy;
+	self->destRecevier.mydest = DestNone;
+	self->format = format;
+	return (DestReceiver *) self;
+}
+
+static uint64 GetNumberOfSerializedBytes(DestReceiver *dest)
+{
+	SerializeDestReceiver *receiver = (SerializeDestReceiver*) dest;
+	return receiver->bytesSend;
+}
diff --git a/src/include/commands/explain.h b/src/include/commands/explain.h
index f9525fb572..dfa1b94cea 100644
--- a/src/include/commands/explain.h
+++ b/src/include/commands/explain.h
@@ -47,6 +47,16 @@ typedef struct ExplainState
 	bool		summary;		/* print total planning and execution timing */
 	bool		settings;		/* print modified settings */
 	bool		generic;		/* generate a generic plan */
+	bool		serialize;      /* serialize all values, only applicable
+								 * when "analyze" is active. serializing
+								 * invokes the type's output function in
+								 * the same way printtup.c does then tuples
+								 * are send to the client. this includes
+								 * detoasting of toasted values and makes
+								 * EXPLAIN(ANALYZE,SERIALIZE) return realistic
+								 * (longer runtimes */
+	bool		serializebinary;/* serialize in binary format */
+
 	ExplainFormat format;		/* output format */
 	/* state for output formatting --- not reset for each new plan tree */
 	int			indent;			/* current indentation level */
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 809655e16e..3aed4e9356 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -563,3 +563,48 @@ select explain_filter('explain (verbose) select * from int8_tbl i8');
  Query Identifier: N
 (3 rows)
 
+-- Test that SERIALIZE is accepted as a parameter to explain
+-- timings are filtered out by explain_filter
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize) select * from test_serialize');
+                                          explain_filter                                          
+--------------------------------------------------------------------------------------------------
+ Seq Scan on test_serialize  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+ Serialized Bytes: N Bytes. Mode Text. Bandwidth N.N MB/sec
+(4 rows)
+
+drop table test_serialize;
+-- Test that SERIALIZE BINARY is accepted as a parameter to explain
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize binary) select * from test_serialize');
+                                          explain_filter                                          
+--------------------------------------------------------------------------------------------------
+ Seq Scan on test_serialize  (cost=N.N..N.N rows=N width=N) (actual time=N.N..N.N rows=N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+ Serialized Bytes: N Bytes. Mode Binary. Bandwidth N.N MB/sec
+(4 rows)
+
+drop table test_serialize;
+-- Test that _SERIALIZE invalidparameter_ is not accepted as a parameter to explain
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize invalidparameter) select * from test_serialize');
+ERROR:  unrecognized value for EXPLAIN option "serialize": "invalidparameter"
+LINE 1: select explain_filter('explain (analyze,serialize invalidpar...
+                         ^
+CONTEXT:  PL/pgSQL function explain_filter(text) line 5 at FOR over EXECUTE statement
+drop table test_serialize;
+-- Test SERIALIZE is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize) select * from test_serialize');
+ERROR:  EXPLAIN option SERIALIZE requires ANALYZE
+CONTEXT:  PL/pgSQL function explain_filter(text) line 5 at FOR over EXECUTE statement
+drop table test_serialize;
+-- Test SERIALIZEBINARY is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize binary) select * from test_serialize');
+ERROR:  EXPLAIN option SERIALIZE requires ANALYZE
+CONTEXT:  PL/pgSQL function explain_filter(text) line 5 at FOR over EXECUTE statement
+drop table test_serialize;
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index b6b7beab27..4dc10066b5 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -154,3 +154,29 @@ select explain_filter('explain (verbose) select * from t1 where pg_temp.mysin(f1
 -- Test compute_query_id
 set compute_query_id = on;
 select explain_filter('explain (verbose) select * from int8_tbl i8');
+
+-- Test that SERIALIZE is accepted as a parameter to explain
+-- timings are filtered out by explain_filter
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize) select * from test_serialize');
+drop table test_serialize;
+
+-- Test that SERIALIZE BINARY is accepted as a parameter to explain
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize binary) select * from test_serialize');
+drop table test_serialize;
+
+-- Test that _SERIALIZE invalidparameter_ is not accepted as a parameter to explain
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (analyze,serialize invalidparameter) select * from test_serialize');
+drop table test_serialize;
+
+-- Test SERIALIZE is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize) select * from test_serialize');
+drop table test_serialize;
+
+-- Test SERIALIZEBINARY is _not_ accepted as a parameter to explain unless ANALYZE is specified
+create table test_serialize(id bigserial, val text);
+select explain_filter('explain (serialize binary) select * from test_serialize');
+drop table test_serialize;

Reply via email to