Re: Feature improvement: can we add queryId for pg_catalog.pg_stat_activity view?

Julien Rouhaud Wed, 24 Mar 2021 08:20:44 -0700

On Wed, Mar 24, 2021 at 08:13:40AM -0400, Bruce Momjian wrote:
> On Wed, Mar 24, 2021 at 04:51:40PM +0800, Julien Rouhaud wrote:
> > > but if you do that it'd be better to avoid
> > > introducing a function with one name and renaming it in your next
> > > commit.
> > 
> > Oops, I apparently messed a fixup when working on it.  Bruce, should I take
> > care of that of do you want to?  I think you have some local modifications
> > already I'd rather not miss some changes.
> 
> I have no local modifications.  Please modify the patch I posted and
> repost your version, thanks.


Ok!  I used the last version of the patch you sent and addressed the following
comments from earlier messages in attached v20:

- copyright year to 2021
- s/has has been compute/has been compute/
- use the name CleanQuerytext in the first commit

I didn't change the position of queryid in pg_stat_get_activity(), as the
"real" order is actually define in system_views.sql when creating
pg_stat_activity view.  Adding the new fields at the end of
pg_stat_get_activity() helps to keep the C code simpler and less bug prone, so
I think it's best to continue this way.

I also used the previous commit message if that helps.

>From 5df95cb13c3505d654bd480c8978fe6f5eba00bb Mon Sep 17 00:00:00 2001
From: Bruce Momjian <[email protected]>
Date: Mon, 22 Mar 2021 17:43:22 -0400
Subject: [PATCH v20 1/3] Move pg_stat_statements query jumbling to core.

A new compute_query_id GUC is also added, to control whether a query identifier
should be computed by the core.  It's thefore now possible to disable core
queryid computation and use pg_stat_statements with a different algorithm to
compute the query identifier by using third-party module.

To ensure that a single source of query identifier can be used and is well
defined, modules that calculate a query identifier should throw an error if
compute_query_id is enabled or if a query idenfitier was already calculated.
---
 .../pg_stat_statements/pg_stat_statements.c   | 805 +----------------
 .../pg_stat_statements.conf                   |   1 +
 doc/src/sgml/config.sgml                      |  25 +
 doc/src/sgml/pgstatstatements.sgml            |  20 +-
 src/backend/parser/analyze.c                  |  14 +-
 src/backend/tcop/postgres.c                   |   6 +-
 src/backend/utils/misc/Makefile               |   1 +
 src/backend/utils/misc/guc.c                  |  10 +
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/backend/utils/misc/queryjumble.c          | 834 ++++++++++++++++++
 src/include/parser/analyze.h                  |   4 +-
 src/include/utils/guc.h                       |   1 +
 src/include/utils/queryjumble.h               |  58 ++
 13 files changed, 995 insertions(+), 785 deletions(-)
 create mode 100644 src/backend/utils/misc/queryjumble.c
 create mode 100644 src/include/utils/queryjumble.h

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 62cccbfa44..bd8c96728c 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -8,24 +8,9 @@
  * a shared hashtable.  (We track only as many distinct queries as will fit
  * in the designated amount of shared memory.)
  *
- * As of Postgres 9.2, this module normalizes query entries.  Normalization
- * is a process whereby similar queries, typically differing only in their
- * constants (though the exact rules are somewhat more subtle than that) are
- * recognized as equivalent, and are tracked as a single entry.  This is
- * particularly useful for non-prepared queries.
- *
- * Normalization is implemented by fingerprinting queries, selectively
- * serializing those fields of each query tree's nodes that are judged to be
- * essential to the query.  This is referred to as a query jumble.  This is
- * distinct from a regular serialization in that various extraneous
- * information is ignored as irrelevant or not essential to the query, such
- * as the collations of Vars and, most notably, the values of constants.
- *
- * This jumble is acquired at the end of parse analysis of each query, and
- * a 64-bit hash of it is stored into the query's Query.queryId field.
- * The server then copies this value around, making it available in plan
- * tree(s) generated from the query.  The executor can then use this value
- * to blame query costs on the proper queryId.
+ * Starting in Postgres 9.2, this module normalized query entries.  As of
+ * Postgres 14, the normalization is done by the core if compute_query_id is
+ * enabled, or optionally by third-party modules.
  *
  * To facilitate presenting entries to users, we create "representative" query
  * strings in which constants are replaced with parameter symbols ($n), to
@@ -114,8 +99,6 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
 #define USAGE_DEALLOC_PERCENT	5	/* free this % of entries at once */
 #define IS_STICKY(c)	((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
-#define JUMBLE_SIZE				1024	/* query serialization buffer size */
-
 /*
  * Extension version number, for supporting older extension versions' objects
  */
@@ -235,40 +218,6 @@ typedef struct pgssSharedState
 	pgssGlobalStats stats;		/* global statistics for pgss */
 } pgssSharedState;
 
-/*
- * Struct for tracking locations/lengths of constants during normalization
- */
-typedef struct pgssLocationLen
-{
-	int			location;		/* start offset in query text */
-	int			length;			/* length in bytes, or -1 to ignore */
-} pgssLocationLen;
-
-/*
- * Working state for computing a query jumble and producing a normalized
- * query string
- */
-typedef struct pgssJumbleState
-{
-	/* Jumble of current query tree */
-	unsigned char *jumble;
-
-	/* Number of bytes used in jumble[] */
-	Size		jumble_len;
-
-	/* Array of locations of constants that should be removed */
-	pgssLocationLen *clocations;
-
-	/* Allocated length of clocations array */
-	int			clocations_buf_size;
-
-	/* Current number of valid entries in clocations array */
-	int			clocations_count;
-
-	/* highest Param id we've seen, in order to start normalization correctly */
-	int			highest_extern_param_id;
-} pgssJumbleState;
-
 /*---- Local variables ----*/
 
 /* Current nesting depth of ExecutorRun+ProcessUtility calls */
@@ -342,7 +291,8 @@ PG_FUNCTION_INFO_V1(pg_stat_statements_info);
 
 static void pgss_shmem_startup(void);
 static void pgss_shmem_shutdown(int code, Datum arg);
-static void pgss_post_parse_analyze(ParseState *pstate, Query *query);
+static void pgss_post_parse_analyze(ParseState *pstate, Query *query,
+									JumbleState *jstate);
 static PlannedStmt *pgss_planner(Query *parse,
 								 const char *query_string,
 								 int cursorOptions,
@@ -364,7 +314,7 @@ static void pgss_store(const char *query, uint64 queryId,
 					   double total_time, uint64 rows,
 					   const BufferUsage *bufusage,
 					   const WalUsage *walusage,
-					   pgssJumbleState *jstate);
+					   JumbleState *jstate);
 static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
 										pgssVersion api_version,
 										bool showtext);
@@ -380,16 +330,9 @@ static char *qtext_fetch(Size query_offset, int query_len,
 static bool need_gc_qtexts(void);
 static void gc_qtexts(void);
 static void entry_reset(Oid userid, Oid dbid, uint64 queryid);
-static void AppendJumble(pgssJumbleState *jstate,
-						 const unsigned char *item, Size size);
-static void JumbleQuery(pgssJumbleState *jstate, Query *query);
-static void JumbleRangeTable(pgssJumbleState *jstate, List *rtable);
-static void JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks);
-static void JumbleExpr(pgssJumbleState *jstate, Node *node);
-static void RecordConstLocation(pgssJumbleState *jstate, int location);
-static char *generate_normalized_query(pgssJumbleState *jstate, const char *query,
+static char *generate_normalized_query(JumbleState *jstate, const char *query,
 									   int query_loc, int *query_len_p);
-static void fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
+static void fill_in_constant_lengths(JumbleState *jstate, const char *query,
 									 int query_loc);
 static int	comp_location(const void *a, const void *b);
 
@@ -851,15 +794,10 @@ error:
  * Post-parse-analysis hook: mark query with a queryId
  */
 static void
-pgss_post_parse_analyze(ParseState *pstate, Query *query)
+pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
 {
-	pgssJumbleState jstate;
-
 	if (prev_post_parse_analyze_hook)
-		prev_post_parse_analyze_hook(pstate, query);
-
-	/* Assert we didn't do this already */
-	Assert(query->queryId == UINT64CONST(0));
+		prev_post_parse_analyze_hook(pstate, query, jstate);
 
 	/* Safety check... */
 	if (!pgss || !pgss_hash || !pgss_enabled(exec_nested_level))
@@ -879,35 +817,14 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
 		return;
 	}
 
-	/* Set up workspace for query jumbling */
-	jstate.jumble = (unsigned char *) palloc(JUMBLE_SIZE);
-	jstate.jumble_len = 0;
-	jstate.clocations_buf_size = 32;
-	jstate.clocations = (pgssLocationLen *)
-		palloc(jstate.clocations_buf_size * sizeof(pgssLocationLen));
-	jstate.clocations_count = 0;
-	jstate.highest_extern_param_id = 0;
-
-	/* Compute query ID and mark the Query node with it */
-	JumbleQuery(&jstate, query);
-	query->queryId =
-		DatumGetUInt64(hash_any_extended(jstate.jumble, jstate.jumble_len, 0));
-
 	/*
-	 * If we are unlucky enough to get a hash of zero, use 1 instead, to
-	 * prevent confusion with the utility-statement case.
+	 * If query jumbling were able to identify any ignorable constants, we
+	 * immediately create a hash table entry for the query, so that we can
+	 * record the normalized form of the query string.  If there were no such
+	 * constants, the normalized string would be the same as the query text
+	 * anyway, so there's no need for an early entry.
 	 */
-	if (query->queryId == UINT64CONST(0))
-		query->queryId = UINT64CONST(1);
-
-	/*
-	 * If we were able to identify any ignorable constants, we immediately
-	 * create a hash table entry for the query, so that we can record the
-	 * normalized form of the query string.  If there were no such constants,
-	 * the normalized string would be the same as the query text anyway, so
-	 * there's no need for an early entry.
-	 */
-	if (jstate.clocations_count > 0)
+	if (jstate && jstate->clocations_count > 0)
 		pgss_store(pstate->p_sourcetext,
 				   query->queryId,
 				   query->stmt_location,
@@ -917,7 +834,7 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query)
 				   0,
 				   NULL,
 				   NULL,
-				   &jstate);
+				   jstate);
 }
 
 /*
@@ -1267,7 +1184,7 @@ pgss_store(const char *query, uint64 queryId,
 		   double total_time, uint64 rows,
 		   const BufferUsage *bufusage,
 		   const WalUsage *walusage,
-		   pgssJumbleState *jstate)
+		   JumbleState *jstate)
 {
 	pgssHashKey key;
 	pgssEntry  *entry;
@@ -2627,678 +2544,6 @@ release_lock:
 	LWLockRelease(pgss->lock);
 }
 
-/*
- * AppendJumble: Append a value that is substantive in a given query to
- * the current jumble.
- */
-static void
-AppendJumble(pgssJumbleState *jstate, const unsigned char *item, Size size)
-{
-	unsigned char *jumble = jstate->jumble;
-	Size		jumble_len = jstate->jumble_len;
-
-	/*
-	 * Whenever the jumble buffer is full, we hash the current contents and
-	 * reset the buffer to contain just that hash value, thus relying on the
-	 * hash to summarize everything so far.
-	 */
-	while (size > 0)
-	{
-		Size		part_size;
-
-		if (jumble_len >= JUMBLE_SIZE)
-		{
-			uint64		start_hash;
-
-			start_hash = DatumGetUInt64(hash_any_extended(jumble,
-														  JUMBLE_SIZE, 0));
-			memcpy(jumble, &start_hash, sizeof(start_hash));
-			jumble_len = sizeof(start_hash);
-		}
-		part_size = Min(size, JUMBLE_SIZE - jumble_len);
-		memcpy(jumble + jumble_len, item, part_size);
-		jumble_len += part_size;
-		item += part_size;
-		size -= part_size;
-	}
-	jstate->jumble_len = jumble_len;
-}
-
-/*
- * Wrappers around AppendJumble to encapsulate details of serialization
- * of individual local variable elements.
- */
-#define APP_JUMB(item) \
-	AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
-#define APP_JUMB_STRING(str) \
-	AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
-
-/*
- * JumbleQuery: Selectively serialize the query tree, appending significant
- * data to the "query jumble" while ignoring nonsignificant data.
- *
- * Rule of thumb for what to include is that we should ignore anything not
- * semantically significant (such as alias names) as well as anything that can
- * be deduced from child nodes (else we'd just be double-hashing that piece
- * of information).
- */
-static void
-JumbleQuery(pgssJumbleState *jstate, Query *query)
-{
-	Assert(IsA(query, Query));
-	Assert(query->utilityStmt == NULL);
-
-	APP_JUMB(query->commandType);
-	/* resultRelation is usually predictable from commandType */
-	JumbleExpr(jstate, (Node *) query->cteList);
-	JumbleRangeTable(jstate, query->rtable);
-	JumbleExpr(jstate, (Node *) query->jointree);
-	JumbleExpr(jstate, (Node *) query->targetList);
-	JumbleExpr(jstate, (Node *) query->onConflict);
-	JumbleExpr(jstate, (Node *) query->returningList);
-	JumbleExpr(jstate, (Node *) query->groupClause);
-	JumbleExpr(jstate, (Node *) query->groupingSets);
-	JumbleExpr(jstate, query->havingQual);
-	JumbleExpr(jstate, (Node *) query->windowClause);
-	JumbleExpr(jstate, (Node *) query->distinctClause);
-	JumbleExpr(jstate, (Node *) query->sortClause);
-	JumbleExpr(jstate, query->limitOffset);
-	JumbleExpr(jstate, query->limitCount);
-	JumbleRowMarks(jstate, query->rowMarks);
-	JumbleExpr(jstate, query->setOperations);
-}
-
-/*
- * Jumble a range table
- */
-static void
-JumbleRangeTable(pgssJumbleState *jstate, List *rtable)
-{
-	ListCell   *lc;
-
-	foreach(lc, rtable)
-	{
-		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
-
-		APP_JUMB(rte->rtekind);
-		switch (rte->rtekind)
-		{
-			case RTE_RELATION:
-				APP_JUMB(rte->relid);
-				JumbleExpr(jstate, (Node *) rte->tablesample);
-				break;
-			case RTE_SUBQUERY:
-				JumbleQuery(jstate, rte->subquery);
-				break;
-			case RTE_JOIN:
-				APP_JUMB(rte->jointype);
-				break;
-			case RTE_FUNCTION:
-				JumbleExpr(jstate, (Node *) rte->functions);
-				break;
-			case RTE_TABLEFUNC:
-				JumbleExpr(jstate, (Node *) rte->tablefunc);
-				break;
-			case RTE_VALUES:
-				JumbleExpr(jstate, (Node *) rte->values_lists);
-				break;
-			case RTE_CTE:
-
-				/*
-				 * Depending on the CTE name here isn't ideal, but it's the
-				 * only info we have to identify the referenced WITH item.
-				 */
-				APP_JUMB_STRING(rte->ctename);
-				APP_JUMB(rte->ctelevelsup);
-				break;
-			case RTE_NAMEDTUPLESTORE:
-				APP_JUMB_STRING(rte->enrname);
-				break;
-			case RTE_RESULT:
-				break;
-			default:
-				elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
-				break;
-		}
-	}
-}
-
-/*
- * Jumble a rowMarks list
- */
-static void
-JumbleRowMarks(pgssJumbleState *jstate, List *rowMarks)
-{
-	ListCell   *lc;
-
-	foreach(lc, rowMarks)
-	{
-		RowMarkClause *rowmark = lfirst_node(RowMarkClause, lc);
-
-		if (!rowmark->pushedDown)
-		{
-			APP_JUMB(rowmark->rti);
-			APP_JUMB(rowmark->strength);
-			APP_JUMB(rowmark->waitPolicy);
-		}
-	}
-}
-
-/*
- * Jumble an expression tree
- *
- * In general this function should handle all the same node types that
- * expression_tree_walker() does, and therefore it's coded to be as parallel
- * to that function as possible.  However, since we are only invoked on
- * queries immediately post-parse-analysis, we need not handle node types
- * that only appear in planning.
- *
- * Note: the reason we don't simply use expression_tree_walker() is that the
- * point of that function is to support tree walkers that don't care about
- * most tree node types, but here we care about all types.  We should complain
- * about any unrecognized node type.
- */
-static void
-JumbleExpr(pgssJumbleState *jstate, Node *node)
-{
-	ListCell   *temp;
-
-	if (node == NULL)
-		return;
-
-	/* Guard against stack overflow due to overly complex expressions */
-	check_stack_depth();
-
-	/*
-	 * We always emit the node's NodeTag, then any additional fields that are
-	 * considered significant, and then we recurse to any child nodes.
-	 */
-	APP_JUMB(node->type);
-
-	switch (nodeTag(node))
-	{
-		case T_Var:
-			{
-				Var		   *var = (Var *) node;
-
-				APP_JUMB(var->varno);
-				APP_JUMB(var->varattno);
-				APP_JUMB(var->varlevelsup);
-			}
-			break;
-		case T_Const:
-			{
-				Const	   *c = (Const *) node;
-
-				/* We jumble only the constant's type, not its value */
-				APP_JUMB(c->consttype);
-				/* Also, record its parse location for query normalization */
-				RecordConstLocation(jstate, c->location);
-			}
-			break;
-		case T_Param:
-			{
-				Param	   *p = (Param *) node;
-
-				APP_JUMB(p->paramkind);
-				APP_JUMB(p->paramid);
-				APP_JUMB(p->paramtype);
-				/* Also, track the highest external Param id */
-				if (p->paramkind == PARAM_EXTERN &&
-					p->paramid > jstate->highest_extern_param_id)
-					jstate->highest_extern_param_id = p->paramid;
-			}
-			break;
-		case T_Aggref:
-			{
-				Aggref	   *expr = (Aggref *) node;
-
-				APP_JUMB(expr->aggfnoid);
-				JumbleExpr(jstate, (Node *) expr->aggdirectargs);
-				JumbleExpr(jstate, (Node *) expr->args);
-				JumbleExpr(jstate, (Node *) expr->aggorder);
-				JumbleExpr(jstate, (Node *) expr->aggdistinct);
-				JumbleExpr(jstate, (Node *) expr->aggfilter);
-			}
-			break;
-		case T_GroupingFunc:
-			{
-				GroupingFunc *grpnode = (GroupingFunc *) node;
-
-				JumbleExpr(jstate, (Node *) grpnode->refs);
-			}
-			break;
-		case T_WindowFunc:
-			{
-				WindowFunc *expr = (WindowFunc *) node;
-
-				APP_JUMB(expr->winfnoid);
-				APP_JUMB(expr->winref);
-				JumbleExpr(jstate, (Node *) expr->args);
-				JumbleExpr(jstate, (Node *) expr->aggfilter);
-			}
-			break;
-		case T_SubscriptingRef:
-			{
-				SubscriptingRef *sbsref = (SubscriptingRef *) node;
-
-				JumbleExpr(jstate, (Node *) sbsref->refupperindexpr);
-				JumbleExpr(jstate, (Node *) sbsref->reflowerindexpr);
-				JumbleExpr(jstate, (Node *) sbsref->refexpr);
-				JumbleExpr(jstate, (Node *) sbsref->refassgnexpr);
-			}
-			break;
-		case T_FuncExpr:
-			{
-				FuncExpr   *expr = (FuncExpr *) node;
-
-				APP_JUMB(expr->funcid);
-				JumbleExpr(jstate, (Node *) expr->args);
-			}
-			break;
-		case T_NamedArgExpr:
-			{
-				NamedArgExpr *nae = (NamedArgExpr *) node;
-
-				APP_JUMB(nae->argnumber);
-				JumbleExpr(jstate, (Node *) nae->arg);
-			}
-			break;
-		case T_OpExpr:
-		case T_DistinctExpr:	/* struct-equivalent to OpExpr */
-		case T_NullIfExpr:		/* struct-equivalent to OpExpr */
-			{
-				OpExpr	   *expr = (OpExpr *) node;
-
-				APP_JUMB(expr->opno);
-				JumbleExpr(jstate, (Node *) expr->args);
-			}
-			break;
-		case T_ScalarArrayOpExpr:
-			{
-				ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
-
-				APP_JUMB(expr->opno);
-				APP_JUMB(expr->useOr);
-				JumbleExpr(jstate, (Node *) expr->args);
-			}
-			break;
-		case T_BoolExpr:
-			{
-				BoolExpr   *expr = (BoolExpr *) node;
-
-				APP_JUMB(expr->boolop);
-				JumbleExpr(jstate, (Node *) expr->args);
-			}
-			break;
-		case T_SubLink:
-			{
-				SubLink    *sublink = (SubLink *) node;
-
-				APP_JUMB(sublink->subLinkType);
-				APP_JUMB(sublink->subLinkId);
-				JumbleExpr(jstate, (Node *) sublink->testexpr);
-				JumbleQuery(jstate, castNode(Query, sublink->subselect));
-			}
-			break;
-		case T_FieldSelect:
-			{
-				FieldSelect *fs = (FieldSelect *) node;
-
-				APP_JUMB(fs->fieldnum);
-				JumbleExpr(jstate, (Node *) fs->arg);
-			}
-			break;
-		case T_FieldStore:
-			{
-				FieldStore *fstore = (FieldStore *) node;
-
-				JumbleExpr(jstate, (Node *) fstore->arg);
-				JumbleExpr(jstate, (Node *) fstore->newvals);
-			}
-			break;
-		case T_RelabelType:
-			{
-				RelabelType *rt = (RelabelType *) node;
-
-				APP_JUMB(rt->resulttype);
-				JumbleExpr(jstate, (Node *) rt->arg);
-			}
-			break;
-		case T_CoerceViaIO:
-			{
-				CoerceViaIO *cio = (CoerceViaIO *) node;
-
-				APP_JUMB(cio->resulttype);
-				JumbleExpr(jstate, (Node *) cio->arg);
-			}
-			break;
-		case T_ArrayCoerceExpr:
-			{
-				ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
-
-				APP_JUMB(acexpr->resulttype);
-				JumbleExpr(jstate, (Node *) acexpr->arg);
-				JumbleExpr(jstate, (Node *) acexpr->elemexpr);
-			}
-			break;
-		case T_ConvertRowtypeExpr:
-			{
-				ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
-
-				APP_JUMB(crexpr->resulttype);
-				JumbleExpr(jstate, (Node *) crexpr->arg);
-			}
-			break;
-		case T_CollateExpr:
-			{
-				CollateExpr *ce = (CollateExpr *) node;
-
-				APP_JUMB(ce->collOid);
-				JumbleExpr(jstate, (Node *) ce->arg);
-			}
-			break;
-		case T_CaseExpr:
-			{
-				CaseExpr   *caseexpr = (CaseExpr *) node;
-
-				JumbleExpr(jstate, (Node *) caseexpr->arg);
-				foreach(temp, caseexpr->args)
-				{
-					CaseWhen   *when = lfirst_node(CaseWhen, temp);
-
-					JumbleExpr(jstate, (Node *) when->expr);
-					JumbleExpr(jstate, (Node *) when->result);
-				}
-				JumbleExpr(jstate, (Node *) caseexpr->defresult);
-			}
-			break;
-		case T_CaseTestExpr:
-			{
-				CaseTestExpr *ct = (CaseTestExpr *) node;
-
-				APP_JUMB(ct->typeId);
-			}
-			break;
-		case T_ArrayExpr:
-			JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
-			break;
-		case T_RowExpr:
-			JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
-			break;
-		case T_RowCompareExpr:
-			{
-				RowCompareExpr *rcexpr = (RowCompareExpr *) node;
-
-				APP_JUMB(rcexpr->rctype);
-				JumbleExpr(jstate, (Node *) rcexpr->largs);
-				JumbleExpr(jstate, (Node *) rcexpr->rargs);
-			}
-			break;
-		case T_CoalesceExpr:
-			JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
-			break;
-		case T_MinMaxExpr:
-			{
-				MinMaxExpr *mmexpr = (MinMaxExpr *) node;
-
-				APP_JUMB(mmexpr->op);
-				JumbleExpr(jstate, (Node *) mmexpr->args);
-			}
-			break;
-		case T_SQLValueFunction:
-			{
-				SQLValueFunction *svf = (SQLValueFunction *) node;
-
-				APP_JUMB(svf->op);
-				/* type is fully determined by op */
-				APP_JUMB(svf->typmod);
-			}
-			break;
-		case T_XmlExpr:
-			{
-				XmlExpr    *xexpr = (XmlExpr *) node;
-
-				APP_JUMB(xexpr->op);
-				JumbleExpr(jstate, (Node *) xexpr->named_args);
-				JumbleExpr(jstate, (Node *) xexpr->args);
-			}
-			break;
-		case T_NullTest:
-			{
-				NullTest   *nt = (NullTest *) node;
-
-				APP_JUMB(nt->nulltesttype);
-				JumbleExpr(jstate, (Node *) nt->arg);
-			}
-			break;
-		case T_BooleanTest:
-			{
-				BooleanTest *bt = (BooleanTest *) node;
-
-				APP_JUMB(bt->booltesttype);
-				JumbleExpr(jstate, (Node *) bt->arg);
-			}
-			break;
-		case T_CoerceToDomain:
-			{
-				CoerceToDomain *cd = (CoerceToDomain *) node;
-
-				APP_JUMB(cd->resulttype);
-				JumbleExpr(jstate, (Node *) cd->arg);
-			}
-			break;
-		case T_CoerceToDomainValue:
-			{
-				CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
-
-				APP_JUMB(cdv->typeId);
-			}
-			break;
-		case T_SetToDefault:
-			{
-				SetToDefault *sd = (SetToDefault *) node;
-
-				APP_JUMB(sd->typeId);
-			}
-			break;
-		case T_CurrentOfExpr:
-			{
-				CurrentOfExpr *ce = (CurrentOfExpr *) node;
-
-				APP_JUMB(ce->cvarno);
-				if (ce->cursor_name)
-					APP_JUMB_STRING(ce->cursor_name);
-				APP_JUMB(ce->cursor_param);
-			}
-			break;
-		case T_NextValueExpr:
-			{
-				NextValueExpr *nve = (NextValueExpr *) node;
-
-				APP_JUMB(nve->seqid);
-				APP_JUMB(nve->typeId);
-			}
-			break;
-		case T_InferenceElem:
-			{
-				InferenceElem *ie = (InferenceElem *) node;
-
-				APP_JUMB(ie->infercollid);
-				APP_JUMB(ie->inferopclass);
-				JumbleExpr(jstate, ie->expr);
-			}
-			break;
-		case T_TargetEntry:
-			{
-				TargetEntry *tle = (TargetEntry *) node;
-
-				APP_JUMB(tle->resno);
-				APP_JUMB(tle->ressortgroupref);
-				JumbleExpr(jstate, (Node *) tle->expr);
-			}
-			break;
-		case T_RangeTblRef:
-			{
-				RangeTblRef *rtr = (RangeTblRef *) node;
-
-				APP_JUMB(rtr->rtindex);
-			}
-			break;
-		case T_JoinExpr:
-			{
-				JoinExpr   *join = (JoinExpr *) node;
-
-				APP_JUMB(join->jointype);
-				APP_JUMB(join->isNatural);
-				APP_JUMB(join->rtindex);
-				JumbleExpr(jstate, join->larg);
-				JumbleExpr(jstate, join->rarg);
-				JumbleExpr(jstate, join->quals);
-			}
-			break;
-		case T_FromExpr:
-			{
-				FromExpr   *from = (FromExpr *) node;
-
-				JumbleExpr(jstate, (Node *) from->fromlist);
-				JumbleExpr(jstate, from->quals);
-			}
-			break;
-		case T_OnConflictExpr:
-			{
-				OnConflictExpr *conf = (OnConflictExpr *) node;
-
-				APP_JUMB(conf->action);
-				JumbleExpr(jstate, (Node *) conf->arbiterElems);
-				JumbleExpr(jstate, conf->arbiterWhere);
-				JumbleExpr(jstate, (Node *) conf->onConflictSet);
-				JumbleExpr(jstate, conf->onConflictWhere);
-				APP_JUMB(conf->constraint);
-				APP_JUMB(conf->exclRelIndex);
-				JumbleExpr(jstate, (Node *) conf->exclRelTlist);
-			}
-			break;
-		case T_List:
-			foreach(temp, (List *) node)
-			{
-				JumbleExpr(jstate, (Node *) lfirst(temp));
-			}
-			break;
-		case T_IntList:
-			foreach(temp, (List *) node)
-			{
-				APP_JUMB(lfirst_int(temp));
-			}
-			break;
-		case T_SortGroupClause:
-			{
-				SortGroupClause *sgc = (SortGroupClause *) node;
-
-				APP_JUMB(sgc->tleSortGroupRef);
-				APP_JUMB(sgc->eqop);
-				APP_JUMB(sgc->sortop);
-				APP_JUMB(sgc->nulls_first);
-			}
-			break;
-		case T_GroupingSet:
-			{
-				GroupingSet *gsnode = (GroupingSet *) node;
-
-				JumbleExpr(jstate, (Node *) gsnode->content);
-			}
-			break;
-		case T_WindowClause:
-			{
-				WindowClause *wc = (WindowClause *) node;
-
-				APP_JUMB(wc->winref);
-				APP_JUMB(wc->frameOptions);
-				JumbleExpr(jstate, (Node *) wc->partitionClause);
-				JumbleExpr(jstate, (Node *) wc->orderClause);
-				JumbleExpr(jstate, wc->startOffset);
-				JumbleExpr(jstate, wc->endOffset);
-			}
-			break;
-		case T_CommonTableExpr:
-			{
-				CommonTableExpr *cte = (CommonTableExpr *) node;
-
-				/* we store the string name because RTE_CTE RTEs need it */
-				APP_JUMB_STRING(cte->ctename);
-				APP_JUMB(cte->ctematerialized);
-				JumbleQuery(jstate, castNode(Query, cte->ctequery));
-			}
-			break;
-		case T_SetOperationStmt:
-			{
-				SetOperationStmt *setop = (SetOperationStmt *) node;
-
-				APP_JUMB(setop->op);
-				APP_JUMB(setop->all);
-				JumbleExpr(jstate, setop->larg);
-				JumbleExpr(jstate, setop->rarg);
-			}
-			break;
-		case T_RangeTblFunction:
-			{
-				RangeTblFunction *rtfunc = (RangeTblFunction *) node;
-
-				JumbleExpr(jstate, rtfunc->funcexpr);
-			}
-			break;
-		case T_TableFunc:
-			{
-				TableFunc  *tablefunc = (TableFunc *) node;
-
-				JumbleExpr(jstate, tablefunc->docexpr);
-				JumbleExpr(jstate, tablefunc->rowexpr);
-				JumbleExpr(jstate, (Node *) tablefunc->colexprs);
-			}
-			break;
-		case T_TableSampleClause:
-			{
-				TableSampleClause *tsc = (TableSampleClause *) node;
-
-				APP_JUMB(tsc->tsmhandler);
-				JumbleExpr(jstate, (Node *) tsc->args);
-				JumbleExpr(jstate, (Node *) tsc->repeatable);
-			}
-			break;
-		default:
-			/* Only a warning, since we can stumble along anyway */
-			elog(WARNING, "unrecognized node type: %d",
-				 (int) nodeTag(node));
-			break;
-	}
-}
-
-/*
- * Record location of constant within query string of query tree
- * that is currently being walked.
- */
-static void
-RecordConstLocation(pgssJumbleState *jstate, int location)
-{
-	/* -1 indicates unknown or undefined location */
-	if (location >= 0)
-	{
-		/* enlarge array if needed */
-		if (jstate->clocations_count >= jstate->clocations_buf_size)
-		{
-			jstate->clocations_buf_size *= 2;
-			jstate->clocations = (pgssLocationLen *)
-				repalloc(jstate->clocations,
-						 jstate->clocations_buf_size *
-						 sizeof(pgssLocationLen));
-		}
-		jstate->clocations[jstate->clocations_count].location = location;
-		/* initialize lengths to -1 to simplify fill_in_constant_lengths */
-		jstate->clocations[jstate->clocations_count].length = -1;
-		jstate->clocations_count++;
-	}
-}
-
 /*
  * Generate a normalized version of the query string that will be used to
  * represent all similar queries.
@@ -3319,7 +2564,7 @@ RecordConstLocation(pgssJumbleState *jstate, int location)
  * Returns a palloc'd string.
  */
 static char *
-generate_normalized_query(pgssJumbleState *jstate, const char *query,
+generate_normalized_query(JumbleState *jstate, const char *query,
 						  int query_loc, int *query_len_p)
 {
 	char	   *norm_query;
@@ -3426,10 +2671,10 @@ generate_normalized_query(pgssJumbleState *jstate, const char *query,
  * reason for a constant to start with a '-'.
  */
 static void
-fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
+fill_in_constant_lengths(JumbleState *jstate, const char *query,
 						 int query_loc)
 {
-	pgssLocationLen *locs;
+	LocationLen *locs;
 	core_yyscan_t yyscanner;
 	core_yy_extra_type yyextra;
 	core_YYSTYPE yylval;
@@ -3443,7 +2688,7 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
 	 */
 	if (jstate->clocations_count > 1)
 		qsort(jstate->clocations, jstate->clocations_count,
-			  sizeof(pgssLocationLen), comp_location);
+			  sizeof(LocationLen), comp_location);
 	locs = jstate->clocations;
 
 	/* initialize the flex scanner --- should match raw_parser() */
@@ -3523,13 +2768,13 @@ fill_in_constant_lengths(pgssJumbleState *jstate, const char *query,
 }
 
 /*
- * comp_location: comparator for qsorting pgssLocationLen structs by location
+ * comp_location: comparator for qsorting LocationLen structs by location
  */
 static int
 comp_location(const void *a, const void *b)
 {
-	int			l = ((const pgssLocationLen *) a)->location;
-	int			r = ((const pgssLocationLen *) b)->location;
+	int			l = ((const LocationLen *) a)->location;
+	int			r = ((const LocationLen *) b)->location;
 
 	if (l < r)
 		return -1;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.conf b/contrib/pg_stat_statements/pg_stat_statements.conf
index 13346e2807..e47b26040f 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.conf
+++ b/contrib/pg_stat_statements/pg_stat_statements.conf
@@ -1 +1,2 @@
 shared_preload_libraries = 'pg_stat_statements'
+compute_query_id = on
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 1f0e0fc1fb..d1aa746224 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7538,6 +7538,31 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
      <title>Statistics Monitoring</title>
      <variablelist>
 
+     <varlistentry id="guc-compute-query-id" xreflabel="compute_query_id">
+      <term><varname>compute_query_id</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>compute_query_id</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Enables in-core computation of a query identifier.  The <xref
+        linkend="pgstatstatements"/> extension requires a query identifier
+        to be computed.  Note that an external module can alternatively
+        be used if the in-core query identifier computation method
+        isn't acceptable.  In this case, in-core computation should
+        remain disabled.  The default is <literal>off</literal>.
+       </para>
+       <note>
+        <para>
+         To ensure that a only one query identifier is calculated and
+         displayed, extensions that calculate query identifiers should
+         throw an error if a query identifier has already been computed.
+        </para>
+       </note>
+      </listitem>
+     </varlistentry>
+
      <varlistentry>
       <term><varname>log_statement_stats</varname> (<type>boolean</type>)
       <indexterm>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index 464bf0e5ae..3ca292d71f 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -20,6 +20,14 @@
   This means that a server restart is needed to add or remove the module.
  </para>
 
+ <para>
+  The module will not track statistics unless query
+  identifiers are calculated.  This can be done by enabling <xref
+  linkend="guc-compute-query-id"/> or using a third-party module that
+  computes its own query identifiers.  Note that all statistics tracked
+  by this module must be reset if the query identifier method is changed.
+ </para>
+
  <para>
    When <filename>pg_stat_statements</filename> is loaded, it tracks
    statistics across all databases of the server.  To access and manipulate
@@ -84,7 +92,7 @@
        <structfield>queryid</structfield> <type>bigint</type>
       </para>
       <para>
-       Internal hash code, computed from the statement's parse tree
+       Hash code to identify identical normalized queries.
       </para></entry>
      </row>
 
@@ -386,6 +394,16 @@
    are compared strictly on the basis of their textual query strings, however.
   </para>
 
+  <note>
+   <para>
+    The following details about constant replacement and
+    <structfield>queryid</structfield> only applies when <xref
+    linkend="guc-compute-query-id"/> is enabled.  If you use an external
+    module instead to compute <structfield>queryid</structfield>, you
+    should refer to its documentation for details.
+   </para>
+  </note>
+
   <para>
    When a constant's value has been ignored for purposes of matching the query
    to other queries, the constant is replaced by a parameter symbol, such
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index 7149724953..c565c80365 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -46,6 +46,8 @@
 #include "parser/parsetree.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
+#include "utils/guc.h"
+#include "utils/queryjumble.h"
 #include "utils/rel.h"
 
 
@@ -107,6 +109,7 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
 {
 	ParseState *pstate = make_parsestate(NULL);
 	Query	   *query;
+	JumbleState *jstate = NULL;
 
 	Assert(sourceText != NULL); /* required as of 8.4 */
 
@@ -119,8 +122,11 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
 
 	query = transformTopLevelStmt(pstate, parseTree);
 
+	if (compute_query_id)
+		jstate = JumbleQuery(query, sourceText);
+
 	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query);
+		(*post_parse_analyze_hook) (pstate, query, jstate);
 
 	free_parsestate(pstate);
 
@@ -140,6 +146,7 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 {
 	ParseState *pstate = make_parsestate(NULL);
 	Query	   *query;
+	JumbleState *jstate = NULL;
 
 	Assert(sourceText != NULL); /* required as of 8.4 */
 
@@ -152,8 +159,11 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 	/* make sure all is well with parameter types */
 	check_variable_parameters(pstate, query);
 
+	if (compute_query_id)
+		jstate = JumbleQuery(query, sourceText);
+
 	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query);
+		(*post_parse_analyze_hook) (pstate, query, jstate);
 
 	free_parsestate(pstate);
 
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 2b1b68109f..7e034b72b1 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -665,6 +665,7 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
 	ParseState *pstate;
 	Query	   *query;
 	List	   *querytree_list;
+	JumbleState *jstate = NULL;
 
 	Assert(query_string != NULL);	/* required as of 8.4 */
 
@@ -683,8 +684,11 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
 
 	query = transformTopLevelStmt(pstate, parsetree);
 
+	if (compute_query_id)
+		jstate = JumbleQuery(query, query_string);
+
 	if (post_parse_analyze_hook)
-		(*post_parse_analyze_hook) (pstate, query);
+		(*post_parse_analyze_hook) (pstate, query, jstate);
 
 	free_parsestate(pstate);
 
diff --git a/src/backend/utils/misc/Makefile b/src/backend/utils/misc/Makefile
index 2397fc2453..1d5327cf64 100644
--- a/src/backend/utils/misc/Makefile
+++ b/src/backend/utils/misc/Makefile
@@ -22,6 +22,7 @@ OBJS = \
 	pg_rusage.o \
 	ps_status.o \
 	queryenvironment.o \
+	queryjumble.o \
 	rls.o \
 	sampling.o \
 	superuser.o \
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 8bfaa53541..87287ac13e 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -521,6 +521,7 @@ extern const struct config_enum_entry dynamic_shared_memory_options[];
 /*
  * GUC option variables that are exported from this module
  */
+bool		compute_query_id = false;
 bool		log_duration = false;
 bool		Debug_print_plan = false;
 bool		Debug_print_parse = false;
@@ -1425,6 +1426,15 @@ static struct config_bool ConfigureNamesBool[] =
 		true,
 		NULL, NULL, NULL
 	},
+	{
+		{"compute_query_id", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Compute query identifiers."),
+			NULL
+		},
+		&compute_query_id,
+		false,
+		NULL, NULL, NULL
+	},
 	{
 		{"log_parser_stats", PGC_SUSET, STATS_MONITORING,
 			gettext_noop("Writes parser performance statistics to the server log."),
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 74b416b74a..aa34c99f0c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -594,6 +594,7 @@
 
 # - Monitoring -
 
+#compute_query_id = off
 #log_parser_stats = off
 #log_planner_stats = off
 #log_executor_stats = off
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
new file mode 100644
index 0000000000..2a47688fd6
--- /dev/null
+++ b/src/backend/utils/misc/queryjumble.c
@@ -0,0 +1,834 @@
+/*-------------------------------------------------------------------------
+ *
+ * queryjumble.c
+ *	 Query normalization and fingerprinting.
+ *
+ * Normalization is a process whereby similar queries, typically differing only
+ * in their constants (though the exact rules are somewhat more subtle than
+ * that) are recognized as equivalent, and are tracked as a single entry.  This
+ * is particularly useful for non-prepared queries.
+ *
+ * Normalization is implemented by fingerprinting queries, selectively
+ * serializing those fields of each query tree's nodes that are judged to be
+ * essential to the query.  This is referred to as a query jumble.  This is
+ * distinct from a regular serialization in that various extraneous
+ * information is ignored as irrelevant or not essential to the query, such
+ * as the collations of Vars and, most notably, the values of constants.
+ *
+ * This jumble is acquired at the end of parse analysis of each query, and
+ * a 64-bit hash of it is stored into the query's Query.queryId field.
+ * The server then copies this value around, making it available in plan
+ * tree(s) generated from the query.  The executor can then use this value
+ * to blame query costs on the proper queryId.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ *	  src/backend/utils/misc/queryjumble.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "common/hashfn.h"
+#include "miscadmin.h"
+#include "parser/scansup.h"
+#include "utils/queryjumble.h"
+
+#define JUMBLE_SIZE				1024	/* query serialization buffer size */
+
+static uint64 compute_utility_queryid(const char *str, int query_len);
+static void AppendJumble(JumbleState *jstate,
+						 const unsigned char *item, Size size);
+static void JumbleQueryInternal(JumbleState *jstate, Query *query);
+static void JumbleRangeTable(JumbleState *jstate, List *rtable);
+static void JumbleRowMarks(JumbleState *jstate, List *rowMarks);
+static void JumbleExpr(JumbleState *jstate, Node *node);
+static void RecordConstLocation(JumbleState *jstate, int location);
+
+/*
+ * Given a possibly multi-statement source string, confine our attention to the
+ * relevant part of the string.
+ */
+const char *
+CleanQuerytext(const char *query, int *location, int *len)
+{
+	int query_location = *location;
+	int query_len = *len;
+
+	/* First apply starting offset, unless it's -1 (unknown). */
+	if (query_location >= 0)
+	{
+		Assert(query_location <= strlen(query));
+		query += query_location;
+		/* Length of 0 (or -1) means "rest of string" */
+		if (query_len <= 0)
+			query_len = strlen(query);
+		else
+			Assert(query_len <= strlen(query));
+	}
+	else
+	{
+		/* If query location is unknown, distrust query_len as well */
+		query_location = 0;
+		query_len = strlen(query);
+	}
+
+	/*
+	 * Discard leading and trailing whitespace, too.  Use scanner_isspace()
+	 * not libc's isspace(), because we want to match the lexer's behavior.
+	 */
+	while (query_len > 0 && scanner_isspace(query[0]))
+		query++, query_location++, query_len--;
+	while (query_len > 0 && scanner_isspace(query[query_len - 1]))
+		query_len--;
+
+	*location = query_location;
+	*len = query_len;
+
+	return query;
+}
+
+JumbleState *
+JumbleQuery(Query *query, const char *querytext)
+{
+	JumbleState *jstate = NULL;
+	if (query->utilityStmt)
+	{
+		const char *sql;
+		int query_location = query->stmt_location;
+		int query_len = query->stmt_len;
+
+		/*
+		 * Confine our attention to the relevant part of the string, if the
+		 * query is a portion of a multi-statement source string.
+		 */
+		sql = CleanQuerytext(querytext, &query_location, &query_len);
+
+		query->queryId = compute_utility_queryid(sql, query_len);
+	}
+	else
+	{
+		jstate = (JumbleState *) palloc(sizeof(JumbleState));
+
+		/* Set up workspace for query jumbling */
+		jstate->jumble = (unsigned char *) palloc(JUMBLE_SIZE);
+		jstate->jumble_len = 0;
+		jstate->clocations_buf_size = 32;
+		jstate->clocations = (LocationLen *)
+			palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+		jstate->clocations_count = 0;
+		jstate->highest_extern_param_id = 0;
+
+		/* Compute query ID and mark the Query node with it */
+		JumbleQueryInternal(jstate, query);
+		query->queryId = DatumGetUInt64(hash_any_extended(jstate->jumble,
+														  jstate->jumble_len,
+														  0));
+
+		/*
+		 * If we are unlucky enough to get a hash of zero, use 1 instead, to
+		 * prevent confusion with the utility-statement case.
+		 */
+		if (query->queryId == UINT64CONST(0))
+			query->queryId = UINT64CONST(1);
+	}
+
+	return jstate;
+}
+
+/*
+ * Compute a query identifier for the given utility query string.
+ */
+static uint64
+compute_utility_queryid(const char *str, int query_len)
+{
+	uint64 queryId;
+
+	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) str,
+											   query_len, 0));
+
+	/*
+	 * If we are unlucky enough to get a hash of zero(invalid), use
+	 * queryID as 2 instead, queryID 1 is already in use for normal
+	 * statements.
+	 */
+	if (queryId == UINT64CONST(0))
+		queryId = UINT64CONST(2);
+
+	return queryId;
+}
+
+/*
+ * AppendJumble: Append a value that is substantive in a given query to
+ * the current jumble.
+ */
+static void
+AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
+{
+	unsigned char *jumble = jstate->jumble;
+	Size		jumble_len = jstate->jumble_len;
+
+	/*
+	 * Whenever the jumble buffer is full, we hash the current contents and
+	 * reset the buffer to contain just that hash value, thus relying on the
+	 * hash to summarize everything so far.
+	 */
+	while (size > 0)
+	{
+		Size		part_size;
+
+		if (jumble_len >= JUMBLE_SIZE)
+		{
+			uint64		start_hash;
+
+			start_hash = DatumGetUInt64(hash_any_extended(jumble,
+														  JUMBLE_SIZE, 0));
+			memcpy(jumble, &start_hash, sizeof(start_hash));
+			jumble_len = sizeof(start_hash);
+		}
+		part_size = Min(size, JUMBLE_SIZE - jumble_len);
+		memcpy(jumble + jumble_len, item, part_size);
+		jumble_len += part_size;
+		item += part_size;
+		size -= part_size;
+	}
+	jstate->jumble_len = jumble_len;
+}
+
+/*
+ * Wrappers around AppendJumble to encapsulate details of serialization
+ * of individual local variable elements.
+ */
+#define APP_JUMB(item) \
+	AppendJumble(jstate, (const unsigned char *) &(item), sizeof(item))
+#define APP_JUMB_STRING(str) \
+	AppendJumble(jstate, (const unsigned char *) (str), strlen(str) + 1)
+
+/*
+ * JumbleQueryInternal: Selectively serialize the query tree, appending
+ * significant data to the "query jumble" while ignoring nonsignificant data.
+ *
+ * Rule of thumb for what to include is that we should ignore anything not
+ * semantically significant (such as alias names) as well as anything that can
+ * be deduced from child nodes (else we'd just be double-hashing that piece
+ * of information).
+ */
+static void
+JumbleQueryInternal(JumbleState *jstate, Query *query)
+{
+	Assert(IsA(query, Query));
+	Assert(query->utilityStmt == NULL);
+
+	APP_JUMB(query->commandType);
+	/* resultRelation is usually predictable from commandType */
+	JumbleExpr(jstate, (Node *) query->cteList);
+	JumbleRangeTable(jstate, query->rtable);
+	JumbleExpr(jstate, (Node *) query->jointree);
+	JumbleExpr(jstate, (Node *) query->targetList);
+	JumbleExpr(jstate, (Node *) query->onConflict);
+	JumbleExpr(jstate, (Node *) query->returningList);
+	JumbleExpr(jstate, (Node *) query->groupClause);
+	JumbleExpr(jstate, (Node *) query->groupingSets);
+	JumbleExpr(jstate, query->havingQual);
+	JumbleExpr(jstate, (Node *) query->windowClause);
+	JumbleExpr(jstate, (Node *) query->distinctClause);
+	JumbleExpr(jstate, (Node *) query->sortClause);
+	JumbleExpr(jstate, query->limitOffset);
+	JumbleExpr(jstate, query->limitCount);
+	JumbleRowMarks(jstate, query->rowMarks);
+	JumbleExpr(jstate, query->setOperations);
+}
+
+/*
+ * Jumble a range table
+ */
+static void
+JumbleRangeTable(JumbleState *jstate, List *rtable)
+{
+	ListCell   *lc;
+
+	foreach(lc, rtable)
+	{
+		RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
+
+		APP_JUMB(rte->rtekind);
+		switch (rte->rtekind)
+		{
+			case RTE_RELATION:
+				APP_JUMB(rte->relid);
+				JumbleExpr(jstate, (Node *) rte->tablesample);
+				break;
+			case RTE_SUBQUERY:
+				JumbleQueryInternal(jstate, rte->subquery);
+				break;
+			case RTE_JOIN:
+				APP_JUMB(rte->jointype);
+				break;
+			case RTE_FUNCTION:
+				JumbleExpr(jstate, (Node *) rte->functions);
+				break;
+			case RTE_TABLEFUNC:
+				JumbleExpr(jstate, (Node *) rte->tablefunc);
+				break;
+			case RTE_VALUES:
+				JumbleExpr(jstate, (Node *) rte->values_lists);
+				break;
+			case RTE_CTE:
+
+				/*
+				 * Depending on the CTE name here isn't ideal, but it's the
+				 * only info we have to identify the referenced WITH item.
+				 */
+				APP_JUMB_STRING(rte->ctename);
+				APP_JUMB(rte->ctelevelsup);
+				break;
+			case RTE_NAMEDTUPLESTORE:
+				APP_JUMB_STRING(rte->enrname);
+				break;
+			case RTE_RESULT:
+				break;
+			default:
+				elog(ERROR, "unrecognized RTE kind: %d", (int) rte->rtekind);
+				break;
+		}
+	}
+}
+
+/*
+ * Jumble a rowMarks list
+ */
+static void
+JumbleRowMarks(JumbleState *jstate, List *rowMarks)
+{
+	ListCell   *lc;
+
+	foreach(lc, rowMarks)
+	{
+		RowMarkClause *rowmark = lfirst_node(RowMarkClause, lc);
+
+		if (!rowmark->pushedDown)
+		{
+			APP_JUMB(rowmark->rti);
+			APP_JUMB(rowmark->strength);
+			APP_JUMB(rowmark->waitPolicy);
+		}
+	}
+}
+
+/*
+ * Jumble an expression tree
+ *
+ * In general this function should handle all the same node types that
+ * expression_tree_walker() does, and therefore it's coded to be as parallel
+ * to that function as possible.  However, since we are only invoked on
+ * queries immediately post-parse-analysis, we need not handle node types
+ * that only appear in planning.
+ *
+ * Note: the reason we don't simply use expression_tree_walker() is that the
+ * point of that function is to support tree walkers that don't care about
+ * most tree node types, but here we care about all types.  We should complain
+ * about any unrecognized node type.
+ */
+static void
+JumbleExpr(JumbleState *jstate, Node *node)
+{
+	ListCell   *temp;
+
+	if (node == NULL)
+		return;
+
+	/* Guard against stack overflow due to overly complex expressions */
+	check_stack_depth();
+
+	/*
+	 * We always emit the node's NodeTag, then any additional fields that are
+	 * considered significant, and then we recurse to any child nodes.
+	 */
+	APP_JUMB(node->type);
+
+	switch (nodeTag(node))
+	{
+		case T_Var:
+			{
+				Var		   *var = (Var *) node;
+
+				APP_JUMB(var->varno);
+				APP_JUMB(var->varattno);
+				APP_JUMB(var->varlevelsup);
+			}
+			break;
+		case T_Const:
+			{
+				Const	   *c = (Const *) node;
+
+				/* We jumble only the constant's type, not its value */
+				APP_JUMB(c->consttype);
+				/* Also, record its parse location for query normalization */
+				RecordConstLocation(jstate, c->location);
+			}
+			break;
+		case T_Param:
+			{
+				Param	   *p = (Param *) node;
+
+				APP_JUMB(p->paramkind);
+				APP_JUMB(p->paramid);
+				APP_JUMB(p->paramtype);
+				/* Also, track the highest external Param id */
+				if (p->paramkind == PARAM_EXTERN &&
+					p->paramid > jstate->highest_extern_param_id)
+					jstate->highest_extern_param_id = p->paramid;
+			}
+			break;
+		case T_Aggref:
+			{
+				Aggref	   *expr = (Aggref *) node;
+
+				APP_JUMB(expr->aggfnoid);
+				JumbleExpr(jstate, (Node *) expr->aggdirectargs);
+				JumbleExpr(jstate, (Node *) expr->args);
+				JumbleExpr(jstate, (Node *) expr->aggorder);
+				JumbleExpr(jstate, (Node *) expr->aggdistinct);
+				JumbleExpr(jstate, (Node *) expr->aggfilter);
+			}
+			break;
+		case T_GroupingFunc:
+			{
+				GroupingFunc *grpnode = (GroupingFunc *) node;
+
+				JumbleExpr(jstate, (Node *) grpnode->refs);
+			}
+			break;
+		case T_WindowFunc:
+			{
+				WindowFunc *expr = (WindowFunc *) node;
+
+				APP_JUMB(expr->winfnoid);
+				APP_JUMB(expr->winref);
+				JumbleExpr(jstate, (Node *) expr->args);
+				JumbleExpr(jstate, (Node *) expr->aggfilter);
+			}
+			break;
+		case T_SubscriptingRef:
+			{
+				SubscriptingRef *sbsref = (SubscriptingRef *) node;
+
+				JumbleExpr(jstate, (Node *) sbsref->refupperindexpr);
+				JumbleExpr(jstate, (Node *) sbsref->reflowerindexpr);
+				JumbleExpr(jstate, (Node *) sbsref->refexpr);
+				JumbleExpr(jstate, (Node *) sbsref->refassgnexpr);
+			}
+			break;
+		case T_FuncExpr:
+			{
+				FuncExpr   *expr = (FuncExpr *) node;
+
+				APP_JUMB(expr->funcid);
+				JumbleExpr(jstate, (Node *) expr->args);
+			}
+			break;
+		case T_NamedArgExpr:
+			{
+				NamedArgExpr *nae = (NamedArgExpr *) node;
+
+				APP_JUMB(nae->argnumber);
+				JumbleExpr(jstate, (Node *) nae->arg);
+			}
+			break;
+		case T_OpExpr:
+		case T_DistinctExpr:	/* struct-equivalent to OpExpr */
+		case T_NullIfExpr:		/* struct-equivalent to OpExpr */
+			{
+				OpExpr	   *expr = (OpExpr *) node;
+
+				APP_JUMB(expr->opno);
+				JumbleExpr(jstate, (Node *) expr->args);
+			}
+			break;
+		case T_ScalarArrayOpExpr:
+			{
+				ScalarArrayOpExpr *expr = (ScalarArrayOpExpr *) node;
+
+				APP_JUMB(expr->opno);
+				APP_JUMB(expr->useOr);
+				JumbleExpr(jstate, (Node *) expr->args);
+			}
+			break;
+		case T_BoolExpr:
+			{
+				BoolExpr   *expr = (BoolExpr *) node;
+
+				APP_JUMB(expr->boolop);
+				JumbleExpr(jstate, (Node *) expr->args);
+			}
+			break;
+		case T_SubLink:
+			{
+				SubLink    *sublink = (SubLink *) node;
+
+				APP_JUMB(sublink->subLinkType);
+				APP_JUMB(sublink->subLinkId);
+				JumbleExpr(jstate, (Node *) sublink->testexpr);
+				JumbleQueryInternal(jstate, castNode(Query, sublink->subselect));
+			}
+			break;
+		case T_FieldSelect:
+			{
+				FieldSelect *fs = (FieldSelect *) node;
+
+				APP_JUMB(fs->fieldnum);
+				JumbleExpr(jstate, (Node *) fs->arg);
+			}
+			break;
+		case T_FieldStore:
+			{
+				FieldStore *fstore = (FieldStore *) node;
+
+				JumbleExpr(jstate, (Node *) fstore->arg);
+				JumbleExpr(jstate, (Node *) fstore->newvals);
+			}
+			break;
+		case T_RelabelType:
+			{
+				RelabelType *rt = (RelabelType *) node;
+
+				APP_JUMB(rt->resulttype);
+				JumbleExpr(jstate, (Node *) rt->arg);
+			}
+			break;
+		case T_CoerceViaIO:
+			{
+				CoerceViaIO *cio = (CoerceViaIO *) node;
+
+				APP_JUMB(cio->resulttype);
+				JumbleExpr(jstate, (Node *) cio->arg);
+			}
+			break;
+		case T_ArrayCoerceExpr:
+			{
+				ArrayCoerceExpr *acexpr = (ArrayCoerceExpr *) node;
+
+				APP_JUMB(acexpr->resulttype);
+				JumbleExpr(jstate, (Node *) acexpr->arg);
+				JumbleExpr(jstate, (Node *) acexpr->elemexpr);
+			}
+			break;
+		case T_ConvertRowtypeExpr:
+			{
+				ConvertRowtypeExpr *crexpr = (ConvertRowtypeExpr *) node;
+
+				APP_JUMB(crexpr->resulttype);
+				JumbleExpr(jstate, (Node *) crexpr->arg);
+			}
+			break;
+		case T_CollateExpr:
+			{
+				CollateExpr *ce = (CollateExpr *) node;
+
+				APP_JUMB(ce->collOid);
+				JumbleExpr(jstate, (Node *) ce->arg);
+			}
+			break;
+		case T_CaseExpr:
+			{
+				CaseExpr   *caseexpr = (CaseExpr *) node;
+
+				JumbleExpr(jstate, (Node *) caseexpr->arg);
+				foreach(temp, caseexpr->args)
+				{
+					CaseWhen   *when = lfirst_node(CaseWhen, temp);
+
+					JumbleExpr(jstate, (Node *) when->expr);
+					JumbleExpr(jstate, (Node *) when->result);
+				}
+				JumbleExpr(jstate, (Node *) caseexpr->defresult);
+			}
+			break;
+		case T_CaseTestExpr:
+			{
+				CaseTestExpr *ct = (CaseTestExpr *) node;
+
+				APP_JUMB(ct->typeId);
+			}
+			break;
+		case T_ArrayExpr:
+			JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements);
+			break;
+		case T_RowExpr:
+			JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args);
+			break;
+		case T_RowCompareExpr:
+			{
+				RowCompareExpr *rcexpr = (RowCompareExpr *) node;
+
+				APP_JUMB(rcexpr->rctype);
+				JumbleExpr(jstate, (Node *) rcexpr->largs);
+				JumbleExpr(jstate, (Node *) rcexpr->rargs);
+			}
+			break;
+		case T_CoalesceExpr:
+			JumbleExpr(jstate, (Node *) ((CoalesceExpr *) node)->args);
+			break;
+		case T_MinMaxExpr:
+			{
+				MinMaxExpr *mmexpr = (MinMaxExpr *) node;
+
+				APP_JUMB(mmexpr->op);
+				JumbleExpr(jstate, (Node *) mmexpr->args);
+			}
+			break;
+		case T_SQLValueFunction:
+			{
+				SQLValueFunction *svf = (SQLValueFunction *) node;
+
+				APP_JUMB(svf->op);
+				/* type is fully determined by op */
+				APP_JUMB(svf->typmod);
+			}
+			break;
+		case T_XmlExpr:
+			{
+				XmlExpr    *xexpr = (XmlExpr *) node;
+
+				APP_JUMB(xexpr->op);
+				JumbleExpr(jstate, (Node *) xexpr->named_args);
+				JumbleExpr(jstate, (Node *) xexpr->args);
+			}
+			break;
+		case T_NullTest:
+			{
+				NullTest   *nt = (NullTest *) node;
+
+				APP_JUMB(nt->nulltesttype);
+				JumbleExpr(jstate, (Node *) nt->arg);
+			}
+			break;
+		case T_BooleanTest:
+			{
+				BooleanTest *bt = (BooleanTest *) node;
+
+				APP_JUMB(bt->booltesttype);
+				JumbleExpr(jstate, (Node *) bt->arg);
+			}
+			break;
+		case T_CoerceToDomain:
+			{
+				CoerceToDomain *cd = (CoerceToDomain *) node;
+
+				APP_JUMB(cd->resulttype);
+				JumbleExpr(jstate, (Node *) cd->arg);
+			}
+			break;
+		case T_CoerceToDomainValue:
+			{
+				CoerceToDomainValue *cdv = (CoerceToDomainValue *) node;
+
+				APP_JUMB(cdv->typeId);
+			}
+			break;
+		case T_SetToDefault:
+			{
+				SetToDefault *sd = (SetToDefault *) node;
+
+				APP_JUMB(sd->typeId);
+			}
+			break;
+		case T_CurrentOfExpr:
+			{
+				CurrentOfExpr *ce = (CurrentOfExpr *) node;
+
+				APP_JUMB(ce->cvarno);
+				if (ce->cursor_name)
+					APP_JUMB_STRING(ce->cursor_name);
+				APP_JUMB(ce->cursor_param);
+			}
+			break;
+		case T_NextValueExpr:
+			{
+				NextValueExpr *nve = (NextValueExpr *) node;
+
+				APP_JUMB(nve->seqid);
+				APP_JUMB(nve->typeId);
+			}
+			break;
+		case T_InferenceElem:
+			{
+				InferenceElem *ie = (InferenceElem *) node;
+
+				APP_JUMB(ie->infercollid);
+				APP_JUMB(ie->inferopclass);
+				JumbleExpr(jstate, ie->expr);
+			}
+			break;
+		case T_TargetEntry:
+			{
+				TargetEntry *tle = (TargetEntry *) node;
+
+				APP_JUMB(tle->resno);
+				APP_JUMB(tle->ressortgroupref);
+				JumbleExpr(jstate, (Node *) tle->expr);
+			}
+			break;
+		case T_RangeTblRef:
+			{
+				RangeTblRef *rtr = (RangeTblRef *) node;
+
+				APP_JUMB(rtr->rtindex);
+			}
+			break;
+		case T_JoinExpr:
+			{
+				JoinExpr   *join = (JoinExpr *) node;
+
+				APP_JUMB(join->jointype);
+				APP_JUMB(join->isNatural);
+				APP_JUMB(join->rtindex);
+				JumbleExpr(jstate, join->larg);
+				JumbleExpr(jstate, join->rarg);
+				JumbleExpr(jstate, join->quals);
+			}
+			break;
+		case T_FromExpr:
+			{
+				FromExpr   *from = (FromExpr *) node;
+
+				JumbleExpr(jstate, (Node *) from->fromlist);
+				JumbleExpr(jstate, from->quals);
+			}
+			break;
+		case T_OnConflictExpr:
+			{
+				OnConflictExpr *conf = (OnConflictExpr *) node;
+
+				APP_JUMB(conf->action);
+				JumbleExpr(jstate, (Node *) conf->arbiterElems);
+				JumbleExpr(jstate, conf->arbiterWhere);
+				JumbleExpr(jstate, (Node *) conf->onConflictSet);
+				JumbleExpr(jstate, conf->onConflictWhere);
+				APP_JUMB(conf->constraint);
+				APP_JUMB(conf->exclRelIndex);
+				JumbleExpr(jstate, (Node *) conf->exclRelTlist);
+			}
+			break;
+		case T_List:
+			foreach(temp, (List *) node)
+			{
+				JumbleExpr(jstate, (Node *) lfirst(temp));
+			}
+			break;
+		case T_IntList:
+			foreach(temp, (List *) node)
+			{
+				APP_JUMB(lfirst_int(temp));
+			}
+			break;
+		case T_SortGroupClause:
+			{
+				SortGroupClause *sgc = (SortGroupClause *) node;
+
+				APP_JUMB(sgc->tleSortGroupRef);
+				APP_JUMB(sgc->eqop);
+				APP_JUMB(sgc->sortop);
+				APP_JUMB(sgc->nulls_first);
+			}
+			break;
+		case T_GroupingSet:
+			{
+				GroupingSet *gsnode = (GroupingSet *) node;
+
+				JumbleExpr(jstate, (Node *) gsnode->content);
+			}
+			break;
+		case T_WindowClause:
+			{
+				WindowClause *wc = (WindowClause *) node;
+
+				APP_JUMB(wc->winref);
+				APP_JUMB(wc->frameOptions);
+				JumbleExpr(jstate, (Node *) wc->partitionClause);
+				JumbleExpr(jstate, (Node *) wc->orderClause);
+				JumbleExpr(jstate, wc->startOffset);
+				JumbleExpr(jstate, wc->endOffset);
+			}
+			break;
+		case T_CommonTableExpr:
+			{
+				CommonTableExpr *cte = (CommonTableExpr *) node;
+
+				/* we store the string name because RTE_CTE RTEs need it */
+				APP_JUMB_STRING(cte->ctename);
+				APP_JUMB(cte->ctematerialized);
+				JumbleQueryInternal(jstate, castNode(Query, cte->ctequery));
+			}
+			break;
+		case T_SetOperationStmt:
+			{
+				SetOperationStmt *setop = (SetOperationStmt *) node;
+
+				APP_JUMB(setop->op);
+				APP_JUMB(setop->all);
+				JumbleExpr(jstate, setop->larg);
+				JumbleExpr(jstate, setop->rarg);
+			}
+			break;
+		case T_RangeTblFunction:
+			{
+				RangeTblFunction *rtfunc = (RangeTblFunction *) node;
+
+				JumbleExpr(jstate, rtfunc->funcexpr);
+			}
+			break;
+		case T_TableFunc:
+			{
+				TableFunc  *tablefunc = (TableFunc *) node;
+
+				JumbleExpr(jstate, tablefunc->docexpr);
+				JumbleExpr(jstate, tablefunc->rowexpr);
+				JumbleExpr(jstate, (Node *) tablefunc->colexprs);
+			}
+			break;
+		case T_TableSampleClause:
+			{
+				TableSampleClause *tsc = (TableSampleClause *) node;
+
+				APP_JUMB(tsc->tsmhandler);
+				JumbleExpr(jstate, (Node *) tsc->args);
+				JumbleExpr(jstate, (Node *) tsc->repeatable);
+			}
+			break;
+		default:
+			/* Only a warning, since we can stumble along anyway */
+			elog(WARNING, "unrecognized node type: %d",
+				 (int) nodeTag(node));
+			break;
+	}
+}
+
+/*
+ * Record location of constant within query string of query tree
+ * that is currently being walked.
+ */
+static void
+RecordConstLocation(JumbleState *jstate, int location)
+{
+	/* -1 indicates unknown or undefined location */
+	if (location >= 0)
+	{
+		/* enlarge array if needed */
+		if (jstate->clocations_count >= jstate->clocations_buf_size)
+		{
+			jstate->clocations_buf_size *= 2;
+			jstate->clocations = (LocationLen *)
+				repalloc(jstate->clocations,
+						 jstate->clocations_buf_size *
+						 sizeof(LocationLen));
+		}
+		jstate->clocations[jstate->clocations_count].location = location;
+		/* initialize lengths to -1 to simplify third-party module usage */
+		jstate->clocations[jstate->clocations_count].length = -1;
+		jstate->clocations_count++;
+	}
+}
diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h
index 4a3c9686f9..6716db6c13 100644
--- a/src/include/parser/analyze.h
+++ b/src/include/parser/analyze.h
@@ -15,10 +15,12 @@
 #define ANALYZE_H
 
 #include "parser/parse_node.h"
+#include "utils/queryjumble.h"
 
 /* Hook for plugins to get control at end of parse analysis */
 typedef void (*post_parse_analyze_hook_type) (ParseState *pstate,
-											  Query *query);
+											  Query *query,
+											  JumbleState *jstate);
 extern PGDLLIMPORT post_parse_analyze_hook_type post_parse_analyze_hook;
 
 
diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h
index 5004ee4177..9b6552b25b 100644
--- a/src/include/utils/guc.h
+++ b/src/include/utils/guc.h
@@ -248,6 +248,7 @@ extern bool log_btree_build_stats;
 extern PGDLLIMPORT bool check_function_bodies;
 extern bool session_auth_is_superuser;
 
+extern bool compute_query_id;
 extern bool log_duration;
 extern int	log_parameter_max_length;
 extern int	log_parameter_max_length_on_error;
diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h
new file mode 100644
index 0000000000..83ba7339fa
--- /dev/null
+++ b/src/include/utils/queryjumble.h
@@ -0,0 +1,58 @@
+/*-------------------------------------------------------------------------
+ *
+ * queryjumble.h
+ *	  Query normalization and fingerprinting.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/include/utils/queryjumble.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef QUERYJUBLE_H
+#define QUERYJUBLE_H
+
+#include "nodes/parsenodes.h"
+
+#define JUMBLE_SIZE				1024	/* query serialization buffer size */
+
+/*
+ * Struct for tracking locations/lengths of constants during normalization
+ */
+typedef struct LocationLen
+{
+	int			location;		/* start offset in query text */
+	int			length;			/* length in bytes, or -1 to ignore */
+} LocationLen;
+
+/*
+ * Working state for computing a query jumble and producing a normalized
+ * query string
+ */
+typedef struct JumbleState
+{
+	/* Jumble of current query tree */
+	unsigned char *jumble;
+
+	/* Number of bytes used in jumble[] */
+	Size		jumble_len;
+
+	/* Array of locations of constants that should be removed */
+	LocationLen *clocations;
+
+	/* Allocated length of clocations array */
+	int			clocations_buf_size;
+
+	/* Current number of valid entries in clocations array */
+	int			clocations_count;
+
+	/* highest Param id we've seen, in order to start normalization correctly */
+	int			highest_extern_param_id;
+} JumbleState;
+
+const char *CleanQuerytext(const char *query, int *location, int *len);
+JumbleState *JumbleQuery(Query *query, const char *querytext);
+
+#endif							/* QUERYJUMBLE_H */
-- 
2.30.1

>From e08c9d5fc86ba722844d97000798de868890aba3 Mon Sep 17 00:00:00 2001
From: Bruce Momjian <[email protected]>
Date: Mon, 22 Mar 2021 17:43:23 -0400
Subject: [PATCH v20 2/3] Expose queryid in pg_stat_activity and
 log_line_prefix

Similarly to other fields in pg_stat_activity, only the queryid from the top
level statements are exposed, and if the backends status isn't active then the
queryid from the last executed statements is displayed.

Also add a %Q placeholder to include the queryid in the log_line_prefix, which
will also only expose top level statements.
---
 .../pg_stat_statements/pg_stat_statements.c   | 112 +++++++-----------
 doc/src/sgml/config.sgml                      |  29 +++--
 doc/src/sgml/monitoring.sgml                  |  16 +++
 src/backend/catalog/system_views.sql          |   1 +
 src/backend/executor/execMain.c               |   9 ++
 src/backend/executor/execParallel.c           |  14 ++-
 src/backend/executor/nodeGather.c             |   3 +-
 src/backend/executor/nodeGatherMerge.c        |   4 +-
 src/backend/parser/analyze.c                  |   5 +
 src/backend/postmaster/pgstat.c               |  65 ++++++++++
 src/backend/tcop/postgres.c                   |   5 +
 src/backend/utils/adt/pgstatfuncs.c           |   7 +-
 src/backend/utils/error/elog.c                |   9 +-
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/backend/utils/misc/queryjumble.c          |  27 ++---
 src/include/catalog/pg_proc.dat               |   6 +-
 src/include/executor/execParallel.h           |   3 +-
 src/include/pgstat.h                          |   5 +
 src/test/regress/expected/rules.out           |   9 +-
 19 files changed, 222 insertions(+), 108 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bd8c96728c..f62b9a2bfd 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -65,6 +65,7 @@
 #include "tcop/utility.h"
 #include "utils/acl.h"
 #include "utils/builtins.h"
+#include "utils/queryjumble.h"
 #include "utils/memutils.h"
 #include "utils/timestamp.h"
 
@@ -99,6 +100,14 @@ static const uint32 PGSS_PG_MAJOR_VERSION = PG_VERSION_NUM / 100;
 #define USAGE_DEALLOC_PERCENT	5	/* free this % of entries at once */
 #define IS_STICKY(c)	((c.calls[PGSS_PLAN] + c.calls[PGSS_EXEC]) == 0)
 
+/*
+ * Utility statements that pgss_ProcessUtility and pgss_post_parse_analyze
+ * ignores.
+ */
+#define PGSS_HANDLED_UTILITY(n)		(!IsA(n, ExecuteStmt) && \
+									!IsA(n, PrepareStmt) && \
+									!IsA(n, DeallocateStmt))
+
 /*
  * Extension version number, for supporting older extension versions' objects
  */
@@ -307,7 +316,6 @@ static void pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 								ProcessUtilityContext context, ParamListInfo params,
 								QueryEnvironment *queryEnv,
 								DestReceiver *dest, QueryCompletion *qc);
-static uint64 pgss_hash_string(const char *str, int len);
 static void pgss_store(const char *query, uint64 queryId,
 					   int query_location, int query_len,
 					   pgssStoreKind kind,
@@ -804,16 +812,14 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
 		return;
 
 	/*
-	 * Utility statements get queryId zero.  We do this even in cases where
-	 * the statement contains an optimizable statement for which a queryId
-	 * could be derived (such as EXPLAIN or DECLARE CURSOR).  For such cases,
-	 * runtime control will first go through ProcessUtility and then the
-	 * executor, and we don't want the executor hooks to do anything, since we
-	 * are already measuring the statement's costs at the utility level.
+	 * Clear queryId for prepared statements related utility, as those will
+	 * inherit from the underlying statement's one (except DEALLOCATE which is
+	 * entirely untracked).
 	 */
 	if (query->utilityStmt)
 	{
-		query->queryId = UINT64CONST(0);
+		if (pgss_track_utility && !PGSS_HANDLED_UTILITY(query->utilityStmt))
+			query->queryId = UINT64CONST(0);
 		return;
 	}
 
@@ -1055,6 +1061,23 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 					DestReceiver *dest, QueryCompletion *qc)
 {
 	Node	   *parsetree = pstmt->utilityStmt;
+	uint64		saved_queryId = pstmt->queryId;
+
+	/*
+	 * Force utility statements to get queryId zero.  We do this even in cases
+	 * where the statement contains an optimizable statement for which a
+	 * queryId could be derived (such as EXPLAIN or DECLARE CURSOR).  For such
+	 * cases, runtime control will first go through ProcessUtility and then the
+	 * executor, and we don't want the executor hooks to do anything, since we
+	 * are already measuring the statement's costs at the utility level.
+	 *
+	 * Note that this is only done if pg_stat_statements is enabled and
+	 * configured to track utility statements, in the unlikely possibility
+	 * that user configured another extension to handle utility statements
+	 * only.
+	 */
+	if (pgss_enabled(exec_nested_level) && pgss_track_utility)
+		pstmt->queryId = UINT64CONST(0);
 
 	/*
 	 * If it's an EXECUTE statement, we don't track it and don't increment the
@@ -1071,9 +1094,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	 * Likewise, we don't track execution of DEALLOCATE.
 	 */
 	if (pgss_track_utility && pgss_enabled(exec_nested_level) &&
-		!IsA(parsetree, ExecuteStmt) &&
-		!IsA(parsetree, PrepareStmt) &&
-		!IsA(parsetree, DeallocateStmt))
+		PGSS_HANDLED_UTILITY(parsetree))
 	{
 		instr_time	start;
 		instr_time	duration;
@@ -1128,7 +1149,7 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 		WalUsageAccumDiff(&walusage, &pgWalUsage, &walusage_start);
 
 		pgss_store(queryString,
-				   0,			/* signal that it's a utility stmt */
+				   saved_queryId,
 				   pstmt->stmt_location,
 				   pstmt->stmt_len,
 				   PGSS_EXEC,
@@ -1151,23 +1172,12 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
 	}
 }
 
-/*
- * Given an arbitrarily long query string, produce a hash for the purposes of
- * identifying the query, without normalizing constants.  Used when hashing
- * utility statements.
- */
-static uint64
-pgss_hash_string(const char *str, int len)
-{
-	return DatumGetUInt64(hash_any_extended((const unsigned char *) str,
-											len, 0));
-}
-
 /*
  * Store some statistics for a statement.
  *
- * If queryId is 0 then this is a utility statement and we should compute
- * a suitable queryId internally.
+ * If queryId is 0 then this is a utility statement for which we couldn't
+ * compute a queryId during parse analysis, and we should compute a suitable
+ * queryId internally.
  *
  * If jstate is not NULL then we're trying to create an entry for which
  * we have no statistics as yet; we just want to record the normalized
@@ -1198,52 +1208,18 @@ pgss_store(const char *query, uint64 queryId,
 		return;
 
 	/*
-	 * Confine our attention to the relevant part of the string, if the query
-	 * is a portion of a multi-statement source string.
-	 *
-	 * First apply starting offset, unless it's -1 (unknown).
-	 */
-	if (query_location >= 0)
-	{
-		Assert(query_location <= strlen(query));
-		query += query_location;
-		/* Length of 0 (or -1) means "rest of string" */
-		if (query_len <= 0)
-			query_len = strlen(query);
-		else
-			Assert(query_len <= strlen(query));
-	}
-	else
-	{
-		/* If query location is unknown, distrust query_len as well */
-		query_location = 0;
-		query_len = strlen(query);
-	}
-
-	/*
-	 * Discard leading and trailing whitespace, too.  Use scanner_isspace()
-	 * not libc's isspace(), because we want to match the lexer's behavior.
+	 * Nothing to do if compute_query_id isn't enabled and no other module
+	 * computed a query identifier.
 	 */
-	while (query_len > 0 && scanner_isspace(query[0]))
-		query++, query_location++, query_len--;
-	while (query_len > 0 && scanner_isspace(query[query_len - 1]))
-		query_len--;
+	if (queryId == UINT64CONST(0))
+		return;
 
 	/*
-	 * For utility statements, we just hash the query string to get an ID.
+	 * Confine our attention to the relevant part of the string, if the query
+	 * is a portion of a multi-statement source string, and update query
+	 * location and length if needed.
 	 */
-	if (queryId == UINT64CONST(0))
-	{
-		queryId = pgss_hash_string(query, query_len);
-
-		/*
-		 * If we are unlucky enough to get a hash of zero(invalid), use
-		 * queryID as 2 instead, queryID 1 is already in use for normal
-		 * statements.
-		 */
-		if (queryId == UINT64CONST(0))
-			queryId = UINT64CONST(2);
-	}
+	query = CleanQuerytext(query, &query_location, &query_len);
 
 	/* Set up key for hashtable search */
 	key.userid = GetUserId();
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d1aa746224..65ad8ca29e 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -6920,6 +6920,15 @@ local0.*    /var/log/postgresql
              session processes</entry>
              <entry>no</entry>
             </row>
+            <row>
+             <entry><literal>%Q</literal></entry>
+             <entry>query identifier of the current query.  Query
+             identifiers are not computed by default, so this field
+             will be zero unless <xref linkend="guc-compute-query-id"/>
+             parameter is enabled or a third-party module that computes
+             query identifiers is configured.</entry>
+             <entry>yes</entry>
+            </row>
             <row>
              <entry><literal>%%</literal></entry>
              <entry>Literal <literal>%</literal></entry>
@@ -7396,8 +7405,8 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       <listitem>
        <para>
         Enables the collection of information on the currently
-        executing command of each session, along with the time when
-        that command began execution. This parameter is on by
+        executing command of each session, along with its identifier and the
+        time when that command began execution. This parameter is on by
         default. Note that even when enabled, this information is not
         visible to all users, only to superusers and the user owning
         the session being reported on, so it should not represent a
@@ -7546,12 +7555,16 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </term>
       <listitem>
        <para>
-        Enables in-core computation of a query identifier.  The <xref
-        linkend="pgstatstatements"/> extension requires a query identifier
-        to be computed.  Note that an external module can alternatively
-        be used if the in-core query identifier computation method
-        isn't acceptable.  In this case, in-core computation should
-        remain disabled.  The default is <literal>off</literal>.
+        Enables in-core computation of a query identifier.
+        Query identifiers can be displayed in the <link
+        linkend="monitoring-pg-stat-activity-view"><structname>pg_stat_activity</structname></link>
+        view, or emitted in the log if configured via the <xref
+        linkend="guc-log-line-prefix"/> parameter.  The <xref
+        linkend="pgstatstatements"/> extension also requires a query
+        identifier to be computed.  Note that an external module can
+        alternatively be used if the in-core query identifier computation
+        specification isn't acceptable.  In this case, in-core computation
+        must be disabled.  The default is <literal>off</literal>.
        </para>
        <note>
         <para>
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 43c07da20e..4995ccfedb 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -910,6 +910,22 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </para></entry>
      </row>
 
+    <row>
+     <entry role="catalog_table_entry"><para role="column_definition">
+      <structfield>queryid</structfield> <type>bigint</type>
+     </para>
+     <para>
+      Identifier of this backend's most recent query. If
+      <structfield>state</structfield> is <literal>active</literal> this
+      field shows the identifier of the currently executing query. In
+      all other states, it shows the identifier of last query that was
+      executed.  Query identifiers are not computed by default so this
+      field will be null unless <xref linkend="guc-compute-query-id"/>
+      parameter is enabled or a third-party module that computes query
+      identifiers is configured.
+     </para></entry>
+    </row>
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>query</structfield> <type>text</type>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 0dca65dc7b..012d86217f 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -764,6 +764,7 @@ CREATE VIEW pg_stat_activity AS
             S.state,
             S.backend_xid,
             s.backend_xmin,
+            S.queryid,
             S.query,
             S.backend_type
     FROM pg_stat_get_activity(NULL) AS S
diff --git a/src/backend/executor/execMain.c b/src/backend/executor/execMain.c
index c74ce36ffb..8c6c644211 100644
--- a/src/backend/executor/execMain.c
+++ b/src/backend/executor/execMain.c
@@ -54,6 +54,7 @@
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "parser/parsetree.h"
+#include "pgstat.h"
 #include "storage/bufmgr.h"
 #include "storage/lmgr.h"
 #include "tcop/utility.h"
@@ -128,6 +129,14 @@ static void EvalPlanQualStart(EPQState *epqstate, Plan *planTree);
 void
 ExecutorStart(QueryDesc *queryDesc, int eflags)
 {
+	/*
+	 * In some cases (e.g. an EXECUTE statement) a query execution will skip
+	 * parse analysis, which means that the queryid won't be reported.  Note
+	 * that it's harmless to report the queryid multiple time, as the call will
+	 * be ignored if the top level queryid has already been reported.
+	 */
+	pgstat_report_queryid(queryDesc->plannedstmt->queryId, false);
+
 	if (ExecutorStart_hook)
 		(*ExecutorStart_hook) (queryDesc, eflags);
 	else
diff --git a/src/backend/executor/execParallel.c b/src/backend/executor/execParallel.c
index c95d5170e4..26f1994a31 100644
--- a/src/backend/executor/execParallel.c
+++ b/src/backend/executor/execParallel.c
@@ -124,7 +124,7 @@ typedef struct ExecParallelInitializeDSMContext
 } ExecParallelInitializeDSMContext;
 
 /* Helper functions that run in the parallel leader. */
-static char *ExecSerializePlan(Plan *plan, EState *estate);
+static char *ExecSerializePlan(Plan *plan, EState *estate, uint64 queryId);
 static bool ExecParallelEstimate(PlanState *node,
 								 ExecParallelEstimateContext *e);
 static bool ExecParallelInitializeDSM(PlanState *node,
@@ -143,7 +143,7 @@ static DestReceiver *ExecParallelGetReceiver(dsm_segment *seg, shm_toc *toc);
  * Create a serialized representation of the plan to be sent to each worker.
  */
 static char *
-ExecSerializePlan(Plan *plan, EState *estate)
+ExecSerializePlan(Plan *plan, EState *estate, uint64 queryId)
 {
 	PlannedStmt *pstmt;
 	ListCell   *lc;
@@ -174,7 +174,7 @@ ExecSerializePlan(Plan *plan, EState *estate)
 	 */
 	pstmt = makeNode(PlannedStmt);
 	pstmt->commandType = CMD_SELECT;
-	pstmt->queryId = UINT64CONST(0);
+	pstmt->queryId = queryId;
 	pstmt->hasReturning = false;
 	pstmt->hasModifyingCTE = false;
 	pstmt->canSetTag = true;
@@ -578,7 +578,8 @@ ExecParallelSetupTupleQueues(ParallelContext *pcxt, bool reinitialize)
 ParallelExecutorInfo *
 ExecInitParallelPlan(PlanState *planstate, EState *estate,
 					 Bitmapset *sendParams, int nworkers,
-					 int64 tuples_needed)
+					 int64 tuples_needed,
+					 uint64 queryId)
 {
 	ParallelExecutorInfo *pei;
 	ParallelContext *pcxt;
@@ -620,7 +621,7 @@ ExecInitParallelPlan(PlanState *planstate, EState *estate,
 	pei->planstate = planstate;
 
 	/* Fix up and serialize plan to be sent to workers. */
-	pstmt_data = ExecSerializePlan(planstate->plan, estate);
+	pstmt_data = ExecSerializePlan(planstate->plan, estate, queryId);
 
 	/* Create a parallel context. */
 	pcxt = CreateParallelContext("postgres", "ParallelQueryMain", nworkers);
@@ -1403,8 +1404,9 @@ ParallelQueryMain(dsm_segment *seg, shm_toc *toc)
 	/* Setting debug_query_string for individual workers */
 	debug_query_string = queryDesc->sourceText;
 
-	/* Report workers' query for monitoring purposes */
+	/* Report workers' query and queryId for monitoring purposes */
 	pgstat_report_activity(STATE_RUNNING, debug_query_string);
+	pgstat_report_queryid(queryDesc->plannedstmt->queryId, false);
 
 	/* Attach to the dynamic shared memory area. */
 	area_space = shm_toc_lookup(toc, PARALLEL_KEY_DSA, false);
diff --git a/src/backend/executor/nodeGather.c b/src/backend/executor/nodeGather.c
index 9e1dc464cb..04c860f678 100644
--- a/src/backend/executor/nodeGather.c
+++ b/src/backend/executor/nodeGather.c
@@ -172,7 +172,8 @@ ExecGather(PlanState *pstate)
 												 estate,
 												 gather->initParam,
 												 gather->num_workers,
-												 node->tuples_needed);
+												 node->tuples_needed,
+												 pgstat_get_my_queryid());
 			else
 				ExecParallelReinitialize(node->ps.lefttree,
 										 node->pei,
diff --git a/src/backend/executor/nodeGatherMerge.c b/src/backend/executor/nodeGatherMerge.c
index aa5743cebf..32f74e8c23 100644
--- a/src/backend/executor/nodeGatherMerge.c
+++ b/src/backend/executor/nodeGatherMerge.c
@@ -24,6 +24,7 @@
 #include "lib/binaryheap.h"
 #include "miscadmin.h"
 #include "optimizer/optimizer.h"
+#include "pgstat.h"
 #include "utils/memutils.h"
 #include "utils/rel.h"
 
@@ -216,7 +217,8 @@ ExecGatherMerge(PlanState *pstate)
 												 estate,
 												 gm->initParam,
 												 gm->num_workers,
-												 node->tuples_needed);
+												 node->tuples_needed,
+												 pgstat_get_my_queryid());
 			else
 				ExecParallelReinitialize(node->ps.lefttree,
 										 node->pei,
diff --git a/src/backend/parser/analyze.c b/src/backend/parser/analyze.c
index c565c80365..d125ef7f98 100644
--- a/src/backend/parser/analyze.c
+++ b/src/backend/parser/analyze.c
@@ -44,6 +44,7 @@
 #include "parser/parse_target.h"
 #include "parser/parse_type.h"
 #include "parser/parsetree.h"
+#include "pgstat.h"
 #include "rewrite/rewriteManip.h"
 #include "utils/builtins.h"
 #include "utils/guc.h"
@@ -130,6 +131,8 @@ parse_analyze(RawStmt *parseTree, const char *sourceText,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	return query;
 }
 
@@ -167,6 +170,8 @@ parse_analyze_varparams(RawStmt *parseTree, const char *sourceText,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	return query;
 }
 
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 60f45ccc4e..b57deb42d7 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3381,6 +3381,7 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 			beentry->st_activity_start_timestamp = 0;
 			/* st_xact_start_timestamp and wait_event_info are also disabled */
 			beentry->st_xact_start_timestamp = 0;
+			beentry->st_queryid = 0;
 			proc->wait_event_info = 0;
 			PGSTAT_END_WRITE_ACTIVITY(beentry);
 		}
@@ -3435,6 +3436,14 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	beentry->st_state = state;
 	beentry->st_state_start_timestamp = current_timestamp;
 
+	/*
+	 * If a new query is started, we reset the query identifier as it'll only
+	 * be known after parse analysis, to avoid reporting last query's
+	 * identifier.
+	 */
+	if (state == STATE_RUNNING)
+		beentry->st_queryid = 0;
+
 	if (cmd_str != NULL)
 	{
 		memcpy((char *) beentry->st_activity_raw, cmd_str, len);
@@ -3445,6 +3454,48 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/* --------
+ * pgstat_report_queryid() -
+ *
+ *	Called to update top-level query identifier.
+ * --------
+ */
+void
+pgstat_report_queryid(uint64 queryId, bool force)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+
+	if (!beentry)
+		return;
+
+	/*
+	 * if track_activities is disabled, st_queryid should already have been
+	 * reset
+	 */
+	if (!pgstat_track_activities)
+		return;
+
+	/*
+	 * We only report the top-level query identifiers.  The stored queryid is
+	 * reset when a backend calls pgstat_report_activity(STATE_RUNNING), or
+	 * with an explicit call to this function using the force flag.  If the
+	 * saved query identifier is not zero it means that it's not a top-level
+	 * command, so ignore the one provided unless it's an explicit call to
+	 * reset the identifier.
+	 */
+	if (beentry->st_queryid != 0 && !force)
+		return;
+
+	/*
+	 * Update my status entry, following the protocol of bumping
+	 * st_changecount before and after.  We use a volatile pointer here to
+	 * ensure the compiler doesn't try to get cute.
+	 */
+	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
+	beentry->st_queryid = queryId;
+	PGSTAT_END_WRITE_ACTIVITY(beentry);
+}
+
 /*-----------
  * pgstat_progress_start_command() -
  *
@@ -5178,6 +5229,20 @@ pgstat_get_db_entry(Oid databaseid, bool create)
 	return result;
 }
 
+/* ----------
+ * pgstat_get_my_queryid() -
+ *
+ *	Return current backend's query identifier.
+ */
+uint64
+pgstat_get_my_queryid(void)
+{
+	if (!MyBEEntry)
+		return 0;
+
+	return MyBEEntry->st_queryid;
+}
+
 
 /*
  * Lookup the hash table entry for the specified table. If no hash
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 7e034b72b1..d66cee79f0 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -692,6 +692,8 @@ pg_analyze_and_rewrite_params(RawStmt *parsetree,
 
 	free_parsestate(pstate);
 
+	pgstat_report_queryid(query->queryId, false);
+
 	if (log_parser_stats)
 		ShowUsage("PARSE ANALYSIS STATISTICS");
 
@@ -910,6 +912,7 @@ pg_plan_queries(List *querytrees, const char *query_string, int cursorOptions,
 			stmt->utilityStmt = query->utilityStmt;
 			stmt->stmt_location = query->stmt_location;
 			stmt->stmt_len = query->stmt_len;
+			stmt->queryId = query->queryId;
 		}
 		else
 		{
@@ -1026,6 +1029,8 @@ exec_simple_query(const char *query_string)
 		DestReceiver *receiver;
 		int16		format;
 
+		pgstat_report_queryid(0, true);
+
 		/*
 		 * Get the command name for use in status display (it also becomes the
 		 * default completion tag, down inside PortalRun).  Set ps_status and
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 5102227a60..8e81eef8cb 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -569,7 +569,7 @@ pg_stat_get_progress_info(PG_FUNCTION_ARGS)
 Datum
 pg_stat_get_activity(PG_FUNCTION_ARGS)
 {
-#define PG_STAT_GET_ACTIVITY_COLS	29
+#define PG_STAT_GET_ACTIVITY_COLS	30
 	int			num_backends = pgstat_fetch_stat_numbackends();
 	int			curr_backend;
 	int			pid = PG_ARGISNULL(0) ? -1 : PG_GETARG_INT32(0);
@@ -914,6 +914,10 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 				values[27] = BoolGetDatum(false);	/* GSS Encryption not in
 													 * use */
 			}
+			if (beentry->st_queryid == 0)
+				nulls[29] = true;
+			else
+				values[29] = DatumGetUInt64(beentry->st_queryid);
 		}
 		else
 		{
@@ -941,6 +945,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 			nulls[26] = true;
 			nulls[27] = true;
 			nulls[28] = true;
+			nulls[29] = true;
 		}
 
 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
diff --git a/src/backend/utils/error/elog.c b/src/backend/utils/error/elog.c
index e729ebece7..7aa484c5ed 100644
--- a/src/backend/utils/error/elog.c
+++ b/src/backend/utils/error/elog.c
@@ -77,7 +77,6 @@
 #include "postmaster/postmaster.h"
 #include "postmaster/syslogger.h"
 #include "storage/ipc.h"
-#include "storage/proc.h"
 #include "tcop/tcopprot.h"
 #include "utils/guc.h"
 #include "utils/memutils.h"
@@ -2685,6 +2684,14 @@ log_line_prefix(StringInfo buf, ErrorData *edata)
 				else
 					appendStringInfoString(buf, unpack_sql_state(edata->sqlerrcode));
 				break;
+			case 'Q':
+				if (padding != 0)
+					appendStringInfo(buf, "%*ld", padding,
+							pgstat_get_my_queryid());
+				else
+					appendStringInfo(buf, "%ld",
+							pgstat_get_my_queryid());
+				break;
 			default:
 				/* format error - ignore it */
 				break;
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index aa34c99f0c..d702ea24b6 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -541,6 +541,7 @@
 					#   %t = timestamp without milliseconds
 					#   %m = timestamp with milliseconds
 					#   %n = timestamp with milliseconds (as a Unix epoch)
+					#   %Q = query ID (0 if none or not computed)
 					#   %i = command tag
 					#   %e = SQL state
 					#   %c = session ID
diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c
index 2a47688fd6..53286bb333 100644
--- a/src/backend/utils/misc/queryjumble.c
+++ b/src/backend/utils/misc/queryjumble.c
@@ -39,7 +39,7 @@
 
 #define JUMBLE_SIZE				1024	/* query serialization buffer size */
 
-static uint64 compute_utility_queryid(const char *str, int query_len);
+static uint64 compute_utility_queryid(const char *str, int query_location, int query_len);
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
 static void JumbleQueryInternal(JumbleState *jstate, Query *query);
@@ -97,17 +97,9 @@ JumbleQuery(Query *query, const char *querytext)
 	JumbleState *jstate = NULL;
 	if (query->utilityStmt)
 	{
-		const char *sql;
-		int query_location = query->stmt_location;
-		int query_len = query->stmt_len;
-
-		/*
-		 * Confine our attention to the relevant part of the string, if the
-		 * query is a portion of a multi-statement source string.
-		 */
-		sql = CleanQuerytext(querytext, &query_location, &query_len);
-
-		query->queryId = compute_utility_queryid(sql, query_len);
+		query->queryId = compute_utility_queryid(querytext,
+												 query->stmt_location,
+												 query->stmt_len);
 	}
 	else
 	{
@@ -143,11 +135,18 @@ JumbleQuery(Query *query, const char *querytext)
  * Compute a query identifier for the given utility query string.
  */
 static uint64
-compute_utility_queryid(const char *str, int query_len)
+compute_utility_queryid(const char *query_text, int query_location, int query_len)
 {
 	uint64 queryId;
+	const char *sql;
+
+	/*
+	 * Confine our attention to the relevant part of the string, if the
+	 * query is a portion of a multi-statement source string.
+	 */
+	sql = CleanQuerytext(query_text, &query_location, &query_len);
 
-	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) str,
+	queryId = DatumGetUInt64(hash_any_extended((const unsigned char *) sql,
 											   query_len, 0));
 
 	/*
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 604ac564b3..5cebdd4379 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5255,9 +5255,9 @@
   proname => 'pg_stat_get_activity', prorows => '100', proisstrict => 'f',
   proretset => 't', provolatile => 's', proparallel => 'r',
   prorettype => 'record', proargtypes => 'int4',
-  proallargtypes => '{int4,oid,int4,oid,text,text,text,text,text,timestamptz,timestamptz,timestamptz,timestamptz,inet,text,int4,xid,xid,text,bool,text,text,int4,text,numeric,text,bool,text,bool,int4}',
-  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
-  proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc,leader_pid}',
+  proallargtypes => '{int4,oid,int4,oid,text,text,text,text,text,timestamptz,timestamptz,timestamptz,timestamptz,inet,text,int4,xid,xid,text,bool,text,text,int4,text,numeric,text,bool,text,bool,int4,int8}',
+  proargmodes => '{i,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
+  proargnames => '{pid,datid,pid,usesysid,application_name,state,query,wait_event_type,wait_event,xact_start,query_start,backend_start,state_change,client_addr,client_hostname,client_port,backend_xid,backend_xmin,backend_type,ssl,sslversion,sslcipher,sslbits,ssl_client_dn,ssl_client_serial,ssl_issuer_dn,gss_auth,gss_princ,gss_enc,leader_pid,queryid}',
   prosrc => 'pg_stat_get_activity' },
 { oid => '3318',
   descr => 'statistics: information about progress of backends running maintenance command',
diff --git a/src/include/executor/execParallel.h b/src/include/executor/execParallel.h
index 3888175a2f..e0e08e0b27 100644
--- a/src/include/executor/execParallel.h
+++ b/src/include/executor/execParallel.h
@@ -39,7 +39,8 @@ typedef struct ParallelExecutorInfo
 
 extern ParallelExecutorInfo *ExecInitParallelPlan(PlanState *planstate,
 												  EState *estate, Bitmapset *sendParam, int nworkers,
-												  int64 tuples_needed);
+												  int64 tuples_needed,
+												  uint64 queryId);
 extern void ExecParallelCreateReaders(ParallelExecutorInfo *pei);
 extern void ExecParallelFinish(ParallelExecutorInfo *pei);
 extern void ExecParallelCleanup(ParallelExecutorInfo *pei);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 87672e6f30..6966696191 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1263,6 +1263,9 @@ typedef struct PgBackendStatus
 	ProgressCommandType st_progress_command;
 	Oid			st_progress_command_target;
 	int64		st_progress_param[PGSTAT_NUM_PROGRESS_PARAM];
+
+	/* query identifier, optionally computed using post_parse_analyze_hook */
+	uint64		st_queryid;
 } PgBackendStatus;
 
 /*
@@ -1457,6 +1460,7 @@ extern void pgstat_initialize(void);
 extern void pgstat_bestart(void);
 
 extern void pgstat_report_activity(BackendState state, const char *cmd_str);
+extern void pgstat_report_queryid(uint64 queryId, bool force);
 extern void pgstat_report_tempfile(size_t filesize);
 extern void pgstat_report_appname(const char *appname);
 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
@@ -1465,6 +1469,7 @@ extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
 extern const char *pgstat_get_crashed_backend_activity(int pid, char *buffer,
 													   int buflen);
+extern uint64 pgstat_get_my_queryid(void);
 
 extern void pgstat_progress_start_command(ProgressCommandType cmdtype,
 										  Oid relid);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 9b12cc122a..ff3506d5d7 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1762,9 +1762,10 @@ pg_stat_activity| SELECT s.datid,
     s.state,
     s.backend_xid,
     s.backend_xmin,
+    s.queryid,
     s.query,
     s.backend_type
-   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)))
      LEFT JOIN pg_authid u ON ((s.usesysid = u.oid)));
 pg_stat_all_indexes| SELECT c.oid AS relid,
@@ -1876,7 +1877,7 @@ pg_stat_gssapi| SELECT s.pid,
     s.gss_auth AS gss_authenticated,
     s.gss_princ AS principal,
     s.gss_enc AS encrypted
-   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
   WHERE (s.client_port IS NOT NULL);
 pg_stat_progress_analyze| SELECT s.pid,
     s.datid,
@@ -2046,7 +2047,7 @@ pg_stat_replication| SELECT s.pid,
     w.sync_priority,
     w.sync_state,
     w.reply_time
-   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM ((pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
      JOIN pg_stat_get_wal_senders() w(pid, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, write_lag, flush_lag, replay_lag, sync_priority, sync_state, reply_time) ON ((s.pid = w.pid)))
      LEFT JOIN pg_authid u ON ((s.usesysid = u.oid)));
 pg_stat_replication_slots| SELECT s.slot_name,
@@ -2076,7 +2077,7 @@ pg_stat_ssl| SELECT s.pid,
     s.ssl_client_dn AS client_dn,
     s.ssl_client_serial AS client_serial,
     s.ssl_issuer_dn AS issuer_dn
-   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid)
+   FROM pg_stat_get_activity(NULL::integer) s(datid, pid, usesysid, application_name, state, query, wait_event_type, wait_event, xact_start, query_start, backend_start, state_change, client_addr, client_hostname, client_port, backend_xid, backend_xmin, backend_type, ssl, sslversion, sslcipher, sslbits, ssl_client_dn, ssl_client_serial, ssl_issuer_dn, gss_auth, gss_princ, gss_enc, leader_pid, queryid)
   WHERE (s.client_port IS NOT NULL);
 pg_stat_subscription| SELECT su.oid AS subid,
     su.subname,
-- 
2.30.1

>From 730eac44d5ea4dc539f734e2bc672316cb75ae80 Mon Sep 17 00:00:00 2001
From: Bruce Momjian <[email protected]>
Date: Mon, 22 Mar 2021 17:43:24 -0400
Subject: [PATCH v20 3/3] Expose query identifier in verbose explain

If a query identifier has been computed, either by enabling compute_query_id or
using a third-party module, verbose explain will display it.
---
 doc/src/sgml/config.sgml              |  6 +++---
 doc/src/sgml/ref/explain.sgml         |  6 ++++--
 src/backend/commands/explain.c        | 18 ++++++++++++++++++
 src/test/regress/expected/explain.out | 11 ++++++++++-
 src/test/regress/sql/explain.sql      |  5 ++++-
 5 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 65ad8ca29e..e6d9046042 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -7558,9 +7558,9 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
         Enables in-core computation of a query identifier.
         Query identifiers can be displayed in the <link
         linkend="monitoring-pg-stat-activity-view"><structname>pg_stat_activity</structname></link>
-        view, or emitted in the log if configured via the <xref
-        linkend="guc-log-line-prefix"/> parameter.  The <xref
-        linkend="pgstatstatements"/> extension also requires a query
+        view, using <command>EXPLAIN</command>, or emitted in the log if
+        configured via the <xref linkend="guc-log-line-prefix"/> parameter.
+        The <xref linkend="pgstatstatements"/> extension also requires a query
         identifier to be computed.  Note that an external module can
         alternatively be used if the in-core query identifier computation
         specification isn't acceptable.  In this case, in-core computation
diff --git a/doc/src/sgml/ref/explain.sgml b/doc/src/sgml/ref/explain.sgml
index c4512332a0..4d758fb237 100644
--- a/doc/src/sgml/ref/explain.sgml
+++ b/doc/src/sgml/ref/explain.sgml
@@ -136,8 +136,10 @@ ROLLBACK;
       the output column list for each node in the plan tree, schema-qualify
       table and function names, always label variables in expressions with
       their range table alias, and always print the name of each trigger for
-      which statistics are displayed.  This parameter defaults to
-      <literal>FALSE</literal>.
+      which statistics are displayed.  The query identifier will also be
+      displayed if one has been computed, see <xref
+      linkend="guc-compute-query-id"/> for more details.  This parameter
+      defaults to <literal>FALSE</literal>.
      </para>
     </listitem>
    </varlistentry>
diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index afc45429ba..9794c4e794 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -24,6 +24,7 @@
 #include "nodes/extensible.h"
 #include "nodes/makefuncs.h"
 #include "nodes/nodeFuncs.h"
+#include "parser/analyze.h"
 #include "parser/parsetree.h"
 #include "rewrite/rewriteHandler.h"
 #include "storage/bufmgr.h"
@@ -163,6 +164,8 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 {
 	ExplainState *es = NewExplainState();
 	TupOutputState *tstate;
+	JumbleState *jstate = NULL;
+	Query		*query;
 	List	   *rewritten;
 	ListCell   *lc;
 	bool		timing_set = false;
@@ -239,6 +242,13 @@ ExplainQuery(ParseState *pstate, ExplainStmt *stmt,
 	/* if the summary was not set explicitly, set default value */
 	es->summary = (summary_set) ? es->summary : es->analyze;
 
+	query = castNode(Query, stmt->query);
+	if (compute_query_id)
+		jstate = JumbleQuery(query, pstate->p_sourcetext);
+
+	if (post_parse_analyze_hook)
+		(*post_parse_analyze_hook) (pstate, query, jstate);
+
 	/*
 	 * Parse analysis was done already, but we still have to run the rule
 	 * rewriter.  We do not do AcquireRewriteLocks: we assume the query either
@@ -598,6 +608,14 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 	/* Create textual dump of plan tree */
 	ExplainPrintPlan(es, queryDesc);
 
+	if (es->verbose && plannedstmt->queryId != UINT64CONST(0))
+	{
+		char	buf[MAXINT8LEN+1];
+
+		pg_lltoa(plannedstmt->queryId, buf);
+		ExplainPropertyText("Query Identifier", buf, es);
+	}
+
 	/* Show buffer usage in planning */
 	if (bufusage)
 	{
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 791eba8511..1f8a3ead52 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -17,7 +17,7 @@ begin
     for ln in execute $1
     loop
         -- Replace any numeric word with just 'N'
-        ln := regexp_replace(ln, '\m\d+\M', 'N', 'g');
+        ln := regexp_replace(ln, '-?\m\d+\M', 'N', 'g');
         -- In sort output, the above won't match units-suffixed numbers
         ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g');
         -- Ignore text-mode buffers output because it varies depending
@@ -470,3 +470,12 @@ select jsonb_pretty(
 (1 row)
 
 rollback;
+set compute_query_id = on;
+select explain_filter('explain (verbose) select 1');
+             explain_filter             
+----------------------------------------
+ Result  (cost=N.N..N.N rows=N width=N)
+   Output: N
+ Query Identifier: N
+(3 rows)
+
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index f2eab030d6..468caf4037 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -19,7 +19,7 @@ begin
     for ln in execute $1
     loop
         -- Replace any numeric word with just 'N'
-        ln := regexp_replace(ln, '\m\d+\M', 'N', 'g');
+        ln := regexp_replace(ln, '-?\m\d+\M', 'N', 'g');
         -- In sort output, the above won't match units-suffixed numbers
         ln := regexp_replace(ln, '\m\d+kB', 'NkB', 'g');
         -- Ignore text-mode buffers output because it varies depending
@@ -103,3 +103,6 @@ select jsonb_pretty(
 );
 
 rollback;
+
+set compute_query_id = on;
+select explain_filter('explain (verbose) select 1');
-- 
2.30.1

Re: Feature improvement: can we add queryId for pg_catalog.pg_stat_activity view?

Reply via email to