> On Thu, Feb 02, 2023 at 04:05:54PM +0100, Dmitry Dolgov wrote:
> > On Thu, Feb 02, 2023 at 03:07:27PM +0100, Alvaro Herrera wrote:
> > This appears to have massive conflicts.  Would you please rebase?
>
> Sure, I was already mentally preparing myself to do so in the view of
> recent changes in query jumbling. Will post soon.

Here is the rebased version. To adapt to the latest changes, I've marked
ArrayExpr with custom_query_jumble to implement this functionality, but
tried to make the actual merge logic relatively independent. Otherwise,
everything is the same.
>From e72f6b8990dace82667d46b3578062bee92af472 Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthali...@gmail.com>
Date: Sun, 24 Jul 2022 11:43:25 +0200
Subject: [PATCH v12] Prevent jumbling of every element in ArrayExpr

pg_stat_statements produces multiple entries for queries like

    SELECT something FROM table WHERE col IN (1, 2, 3, ...)

depending on number of parameters, because every element of ArrayExpr is
jumbled. In certain situations it's undesirable, especially if the list
becomes too large.

Make Const expressions contribute nothing to the jumble hash if they're
a part of an ArrayExpr, which length is larger than specified threshold.
Allow to configure the threshold via the new GUC const_merge_threshold
with the default value zero, which disables this feature.

Reviewed-by: Zhihong Yu, Sergey Dudoladov, Robert Haas, Tom Lane
Tested-by: Chengxi Sun
---
 .../expected/pg_stat_statements.out           | 412 ++++++++++++++++++
 .../pg_stat_statements/pg_stat_statements.c   |  33 +-
 .../sql/pg_stat_statements.sql                | 107 +++++
 doc/src/sgml/config.sgml                      |  26 ++
 doc/src/sgml/pgstatstatements.sgml            |  28 +-
 src/backend/nodes/gen_node_support.pl         |   2 +-
 src/backend/nodes/queryjumblefuncs.c          | 102 ++++-
 src/backend/utils/misc/guc_tables.c           |  13 +
 src/backend/utils/misc/postgresql.conf.sample |   2 +-
 src/include/nodes/primnodes.h                 |   2 +
 src/include/nodes/queryjumble.h               |   5 +-
 11 files changed, 712 insertions(+), 20 deletions(-)

diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out
index fb9ccd920f..9acdb55c9a 100644
--- a/contrib/pg_stat_statements/expected/pg_stat_statements.out
+++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out
@@ -1142,4 +1142,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
      2
 (1 row)
 
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+-- IN queries
+-- No merging
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                     query                                      | calls 
+--------------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6)                  |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7)              |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8)          |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9)      |     1
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) |     1
+ SELECT pg_stat_statements_reset()                                              |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"         |     0
+(7 rows)
+
+-- Normal
+SET const_merge_threshold = 5;
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3)                      |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3)                      |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     5
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     6
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     6
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+ id | data 
+----+------
+(0 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     5
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4)                  |     1
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(4 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     2
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...)                             |     2
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     1
+(3 rows)
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge WHERE id IN (...) and data = $3               |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+ pg_stat_statements_reset 
+--------------------------
+ 
+(1 row)
+
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+ id | data 
+----+------
+(0 rows)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+                                 query                                  | calls 
+------------------------------------------------------------------------+-------
+ SELECT * FROM test_merge_numeric WHERE id IN (...)                     |     1
+ SELECT pg_stat_statements_reset()                                      |     1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" |     0
+(3 rows)
+
+-- Test find_const_walker
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+ result 
+--------
+(0 rows)
+
+RESET const_merge_threshold;
 DROP EXTENSION pg_stat_statements;
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index ad1fe44496..b26ae1f234 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -2666,6 +2666,9 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 				n_quer_loc = 0, /* Normalized query byte location */
 				last_off = 0,	/* Offset from start for previous tok */
 				last_tok_len = 0;	/* Length (in bytes) of that tok */
+	bool		skip = false; 	/* Signals that certain constants are
+								   merged together and have to be skipped */
+
 
 	/*
 	 * Get constants' lengths (core system only gives us locations).  Note
@@ -2689,7 +2692,6 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 	{
 		int			off,		/* Offset from start for cur tok */
 					tok_len;	/* Length (in bytes) of that tok */
-
 		off = jstate->clocations[i].location;
 		/* Adjust recorded location if we're dealing with partial string */
 		off -= query_loc;
@@ -2704,12 +2706,31 @@ generate_normalized_query(JumbleState *jstate, const char *query,
 		len_to_wrt -= last_tok_len;
 
 		Assert(len_to_wrt >= 0);
-		memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
-		n_quer_loc += len_to_wrt;
 
-		/* And insert a param symbol in place of the constant token */
-		n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
-							  i + 1 + jstate->highest_extern_param_id);
+		/* Normal path, non merged constant */
+		if (!jstate->clocations[i].merged)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* And insert a param symbol in place of the constant token */
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d",
+								  i + 1 + jstate->highest_extern_param_id);
+
+			/* In case previous constants were merged away, stop doing that */
+			if (skip)
+				skip = false;
+		}
+		/* The firsts merged constant */
+		else if (!skip)
+		{
+			memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt);
+			n_quer_loc += len_to_wrt;
+
+			/* Skip the following until a non merged constant appear */
+			skip = true;
+			n_quer_loc += sprintf(norm_query + n_quer_loc, "...");
+		}
 
 		quer_loc = off + tok_len;
 		last_off = off;
diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
index b82cddf16f..f561edbe83 100644
--- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql
+++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql
@@ -465,4 +465,111 @@ SELECT (
 
 SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%';
 
+--
+-- Consts merging
+--
+CREATE TABLE test_merge (id int, data int);
+
+-- IN queries
+
+-- No merging
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Normal
+SET const_merge_threshold = 5;
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On the merge threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- With gaps on the threshold
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- test constants after merge
+SELECT pg_stat_statements_reset();
+
+SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2;
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- On table, numeric type causes every constant being wrapped into functions.
+CREATE TABLE test_merge_numeric (id int, data numeric(5, 2));
+SELECT pg_stat_statements_reset();
+SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+
+-- Test find_const_walker
+WITH cte AS (
+    SELECT 'const' as const FROM test_merge
+)
+SELECT ARRAY['a', 'b', 'c', const::varchar] AS result
+FROM cte;
+
+RESET const_merge_threshold;
+
 DROP EXTENSION pg_stat_statements;
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index d190be1925..501a132a4a 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -8278,6 +8278,32 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-const-merge-threshold" xreflabel="const_merge_treshold">
+      <term><varname>const_merge_threshold</varname> (<type>integer</type>)
+      <indexterm>
+       <primary><varname>const_merge_threshold</varname> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        Specifies the minimal length of an array to be eligible for constants
+        collapsing.  Normally every element of an array contributes to a query
+        identifier, which means the same query containing an array of constants
+        could get multiple different identifiers, depending of size of the
+        array.  If this parameter is nonzero, the array contains only constants
+        and it's length is larger than <varname> const_merge_threshold </varname>,
+        then array elements will contribute nothing to the query identifier.
+        Thus the query will get the same identifier no matter how many constants
+        it contains.
+
+        Zero turns off collapsing, and it is the default value.
+
+        The <xref linkend="pgstatstatements"/> extension will represent such
+        collapsed constants via <literal>'(...)'</literal>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
 
     </sect2>
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
index efc36da602..f7e2e9fe85 100644
--- a/doc/src/sgml/pgstatstatements.sgml
+++ b/doc/src/sgml/pgstatstatements.sgml
@@ -519,10 +519,30 @@
   <para>
    In some cases, queries with visibly different texts might get merged into a
    single <structname>pg_stat_statements</structname> entry.  Normally this will happen
-   only for semantically equivalent queries, but there is a small chance of
-   hash collisions causing unrelated queries to be merged into one entry.
-   (This cannot happen for queries belonging to different users or databases,
-   however.)
+   only for semantically equivalent queries, for example when queries are
+   different only in values of constants they use. Another valid possibility for
+   merging queries into a single <structname>pg_stat_statements</structname>
+   entry is when <xref linkend="guc-const-merge-threshold"/> is nonzero and the
+   queries contain an array with more than <varname>const_merge_threshold</varname>
+   constants in it:
+
+<screen>
+=# SET const_merge_threshold = 5;
+=# SELECT pg_stat_statements_reset();
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7, 8, 9);
+=# SELECT * FROM test WHERE a IN (1, 2, 3, 4, 5, 6, 7);
+=# SELECT query, calls FROM pg_stat_statements;
+-[ RECORD 1 ]------------------------------
+query | SELECT * FROM test WHERE a IN (...)
+calls | 2
+-[ RECORD 2 ]------------------------------
+query | SELECT pg_stat_statements_reset()
+calls | 1
+</screen>
+
+   But there is a small chance of hash collisions causing unrelated queries to
+   be merged into one entry. (This cannot happen for queries belonging to
+   different users or databases, however.)
   </para>
 
   <para>
diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index 19ed29657c..86c4223ace 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -1306,7 +1306,7 @@ _jumble${n}(JumbleState *jstate, Node *node)
 			# Track the node's location only if directly requested.
 			if ($query_jumble_location)
 			{
-				print $jff "\tJUMBLE_LOCATION($f);\n"
+				print $jff "\tJUMBLE_LOCATION($f, false);\n"
 				  unless $query_jumble_ignore;
 			}
 		}
diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c
index 223d1bc826..06551f6a1b 100644
--- a/src/backend/nodes/queryjumblefuncs.c
+++ b/src/backend/nodes/queryjumblefuncs.c
@@ -42,12 +42,15 @@
 /* GUC parameters */
 int			compute_query_id = COMPUTE_QUERY_ID_AUTO;
 
+/* Minimal numer of constants in an array after which they will be merged */
+int			const_merge_threshold = 0;
+
 /* True when compute_query_id is ON, or AUTO and a module requests them */
 bool		query_id_enabled = false;
 
 static void AppendJumble(JumbleState *jstate,
 						 const unsigned char *item, Size size);
-static void RecordConstLocation(JumbleState *jstate, int location);
+static void RecordConstLocation(JumbleState *jstate, int location, bool merged);
 static void _jumbleNode(JumbleState *jstate, Node *node);
 static void _jumbleList(JumbleState *jstate, Node *node);
 static void _jumbleRangeTblEntry(JumbleState *jstate, Node *node);
@@ -109,7 +112,7 @@ JumbleQuery(Query *query, const char *querytext)
 	jstate->jumble_len = 0;
 	jstate->clocations_buf_size = 32;
 	jstate->clocations = (LocationLen *)
-		palloc(jstate->clocations_buf_size * sizeof(LocationLen));
+		palloc0(jstate->clocations_buf_size * sizeof(LocationLen));
 	jstate->clocations_count = 0;
 	jstate->highest_extern_param_id = 0;
 
@@ -185,11 +188,13 @@ AppendJumble(JumbleState *jstate, const unsigned char *item, Size size)
 }
 
 /*
- * Record location of constant within query string of query tree
- * that is currently being walked.
+ * Record location of constant within query string of query tree that is
+ * currently being walked. Merged argument signals that the constant do not
+ * contribute to the jumble hash, and any reader of constants array may want to
+ * use this information to represent such constants differently.
  */
 static void
-RecordConstLocation(JumbleState *jstate, int location)
+RecordConstLocation(JumbleState *jstate, int location, bool merged)
 {
 	/* -1 indicates unknown or undefined location */
 	if (location >= 0)
@@ -205,15 +210,81 @@ RecordConstLocation(JumbleState *jstate, int location)
 		}
 		jstate->clocations[jstate->clocations_count].location = location;
 		/* initialize lengths to -1 to simplify third-party module usage */
+		jstate->clocations[jstate->clocations_count].merged = merged;
 		jstate->clocations[jstate->clocations_count].length = -1;
 		jstate->clocations_count++;
 	}
 }
 
+/*
+ * Verify if the provided list contains only constant expressions and its
+ * length is greater than or equal to const_merge_threshold, thus making it
+ * eligible for constants merging.
+ *
+ * Note that this function searches only for Const directly and do not tries to
+ * simplify expressions.
+ */
+static bool
+IsConstList(List *elements, Const **firstConst, Const **lastConst)
+{
+	ListCell   *temp;
+	Node	   *firstExpr = NULL;
+	bool		allConst = true;
+
+	if (elements == NULL)
+		return false;
+
+	if (const_merge_threshold == 0)
+	{
+		/* Merging is disabled, process everything one by one. */
+		return false;
+	}
+
+	if (elements->length < const_merge_threshold)
+	{
+		/* The list is not large enough to collapse it. */
+		return false;
+	}
+
+	firstExpr = linitial(elements);
+
+	/*
+	 * If the first expression is a constant, verify if the following elements
+	 * are constants as well. If yes, the list is eligible for collapsing --
+	 * mark it as merged and return from the function.
+	 */
+	if (IsA(firstExpr, Const))
+	{
+		foreach(temp, elements)
+		{
+			Node 	*expr = (Node *) lfirst(temp);
+
+			if (!IsA(expr, Const))
+			{
+				allConst = false;
+				break;
+			}
+		}
+
+		if (allConst)
+		{
+			*firstConst = (Const *) firstExpr;
+			*lastConst = llast_node(Const, elements);
+			return true;
+		}
+	}
+
+	/*
+	 * If we end up here, it means no constants merging is possible, process
+	 * the list as usual.
+	 */
+	return false;
+}
+
 #define JUMBLE_NODE(item) \
 	_jumbleNode(jstate, (Node *) expr->item)
-#define JUMBLE_LOCATION(location) \
-	RecordConstLocation(jstate, expr->location)
+#define JUMBLE_LOCATION(location, merged) \
+	RecordConstLocation(jstate, expr->location, merged)
 #define JUMBLE_FIELD(item) \
 	AppendJumble(jstate, (const unsigned char *) &(expr->item), sizeof(expr->item))
 #define JUMBLE_FIELD_SINGLE(item) \
@@ -226,6 +297,23 @@ do { \
 
 #include "queryjumblefuncs.funcs.c"
 
+static void
+_jumbleArrayExpr(JumbleState *jstate, Node *node)
+{
+	ArrayExpr *expr = (ArrayExpr *) node;
+	Const *first, *last;
+
+	if (IsConstList(expr->elements, &first, &last))
+	{
+		RecordConstLocation(jstate, first->location, true);
+		RecordConstLocation(jstate, last->location, true);
+	}
+	else
+	{
+		JUMBLE_NODE(elements);
+	}
+}
+
 static void
 _jumbleNode(JumbleState *jstate, Node *node)
 {
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index b46e3b8c55..77ab38172b 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -3467,6 +3467,19 @@ struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"const_merge_threshold", PGC_SUSET, STATS_MONITORING,
+			gettext_noop("Sets the minimal numer of constants in an array"
+						 " after which they will be merged"),
+			gettext_noop("Computing query id for an array of constants"
+						 " will produce the same id for all arrays with length"
+						 " larger than this value. Zero turns off merging."),
+		},
+		&const_merge_threshold,
+		0, 0, INT_MAX,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index d06074b86f..0594eb17b2 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -627,7 +627,7 @@
 #log_parser_stats = off
 #log_planner_stats = off
 #log_executor_stats = off
-
+#const_merge_threshold = 0
 
 #------------------------------------------------------------------------------
 # AUTOVACUUM
diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h
index 6d740be5c0..8e568947bd 100644
--- a/src/include/nodes/primnodes.h
+++ b/src/include/nodes/primnodes.h
@@ -1287,6 +1287,8 @@ typedef struct CaseTestExpr
  */
 typedef struct ArrayExpr
 {
+	pg_node_attr(custom_query_jumble)
+
 	Expr		xpr;
 	/* type of expression result */
 	Oid			array_typeid pg_node_attr(query_jumble_ignore);
diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h
index 204b8f74fd..4410e2cf61 100644
--- a/src/include/nodes/queryjumble.h
+++ b/src/include/nodes/queryjumble.h
@@ -15,6 +15,7 @@
 #define QUERYJUBLE_H
 
 #include "nodes/parsenodes.h"
+#include "nodes/nodeFuncs.h"
 
 /*
  * Struct for tracking locations/lengths of constants during normalization
@@ -23,6 +24,8 @@ typedef struct LocationLen
 {
 	int			location;		/* start offset in query text */
 	int			length;			/* length in bytes, or -1 to ignore */
+	bool		merged;			/* whether or not the location was marked as
+								   not contributing to jumble */
 } LocationLen;
 
 /*
@@ -61,7 +64,7 @@ enum ComputeQueryIdType
 
 /* GUC parameters */
 extern PGDLLIMPORT int compute_query_id;
-
+extern PGDLLIMPORT int const_merge_threshold;
 
 extern const char *CleanQuerytext(const char *query, int *location, int *len);
 extern JumbleState *JumbleQuery(Query *query, const char *querytext);
-- 
2.32.0

Reply via email to