> On Thu, Mar 10, 2022 at 12:11:59PM -0500, Tom Lane wrote: > Dmitry Dolgov <9erthali...@gmail.com> writes: > > New status: Waiting on Author > > > This seems incorrect, as the only feedback I've got was "this is a bad > > idea", and no reaction on follow-up questions. > > I changed the status because it seems to me there is no chance of > this being committed as-is. > > 1. I think an absolute prerequisite before we could even consider > changing the query jumbler rules this much is to do the work that was > put off when the jumbler was moved into core: that is, provide some > honest support for multiple query-ID generation methods being used at > the same time. Even if you successfully make a case for > pg_stat_statements to act this way, other consumers of query IDs > aren't going to be happy with it. > > 2. You haven't made a case for it. The original complaint was > about different lengths of IN lists not being treated as equivalent, > but this patch has decided to do I'm-not-even-sure-quite-what > about treating different Params as equivalent. Plus you're trying > to invoke eval_const_expressions in the jumbler; that is absolutely > Not OK, for both safety and semantic reasons. > > If you backed off to just treating ArrayExprs containing different > numbers of Consts as equivalent, maybe that'd be something we could > adopt without fixing point 1. I don't think anything that fuzzes the > treatment of Params can get away with that, though.
Here is the limited version of list collapsing functionality, which doesn't utilize eval_const_expressions and ignores most of the stuff except ArrayExprs. Any thoughts/more suggestions?
>From ce9f2ed2466d28dbbef3310383d84eba58e5791b Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Sat, 12 Mar 2022 14:42:02 +0100 Subject: [PATCH v6] Prevent jumbling of every element in ArrayExpr pg_stat_statements produces multiple entries for queries like SELECT something FROM table WHERE col IN (1, 2, 3, ...) depending on number of parameters, because every element of ArrayExpr is jumbled. Make Consts contribute nothing to the jumble hash if they're part of a series and at position further that specified threshold. Reviewed-by: Zhihong Yu, Sergey Dudoladov Tested-by: Chengxi Sun --- .../expected/pg_stat_statements.out | 412 ++++++++++++++++++ .../pg_stat_statements/pg_stat_statements.c | 26 +- .../sql/pg_stat_statements.sql | 107 +++++ src/backend/utils/misc/guc.c | 13 + src/backend/utils/misc/queryjumble.c | 236 +++++++++- src/include/utils/queryjumble.h | 10 +- 6 files changed, 791 insertions(+), 13 deletions(-) diff --git a/contrib/pg_stat_statements/expected/pg_stat_statements.out b/contrib/pg_stat_statements/expected/pg_stat_statements.out index e0abe34bb6..e05a6f565a 100644 --- a/contrib/pg_stat_statements/expected/pg_stat_statements.out +++ b/contrib/pg_stat_statements/expected/pg_stat_statements.out @@ -1077,4 +1077,416 @@ SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; 2 (1 row) +-- +-- Consts merging +-- +CREATE TABLE test_merge (id int, data int); +-- IN queries +-- No merging +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(7 rows) + +-- Normal +SET const_merge_threshold = 5; +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +-- On the merge threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 6 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); + id | data +----+------ +(0 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 5 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4) | 1 + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(4 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) | 2 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 1 +(3 rows) + +-- test constants after merge +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +---------------------------------------------------------------------------+------- + SELECT * FROM test_merge WHERE id IN ($1, $2, $3, $4, ...) and data = $11 | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); + pg_stat_statements_reset +-------------------------- + +(1 row) + +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + id | data +----+------ +(0 rows) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +------------------------------------------------------------------------+------- + SELECT * FROM test_merge_numeric WHERE id IN ($1, $2, $3, $4, ...) | 1 + SELECT pg_stat_statements_reset() | 1 + SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0 +(3 rows) + +-- Test find_const_walker +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + result +-------- +(0 rows) + +RESET const_merge_threshold; DROP EXTENSION pg_stat_statements; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 082bfa8f77..b872490133 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2610,6 +2610,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ + bool merge = false; /* * Get constants' lengths (core system only gives us locations). Note @@ -2648,12 +2649,27 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); - n_quer_loc += len_to_wrt; - /* And insert a param symbol in place of the constant token */ - n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id); + if (!jstate->clocations[i].merged) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* And insert a param symbol in place of the constant token */ + n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", + i + 1 + jstate->highest_extern_param_id); + if (merge) + merge = false; + } + else if (!merge) + { + memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); + n_quer_loc += len_to_wrt; + + /* Merge until a non merged constant appear */ + merge = true; + n_quer_loc += sprintf(norm_query + n_quer_loc, "..."); + } quer_loc = off + tok_len; last_off = off; diff --git a/contrib/pg_stat_statements/sql/pg_stat_statements.sql b/contrib/pg_stat_statements/sql/pg_stat_statements.sql index dffd2c8c18..55ba3b35e7 100644 --- a/contrib/pg_stat_statements/sql/pg_stat_statements.sql +++ b/contrib/pg_stat_statements/sql/pg_stat_statements.sql @@ -442,4 +442,111 @@ SELECT ( SELECT COUNT(*) FROM pg_stat_statements WHERE query LIKE '%SELECT GROUPING%'; +-- +-- Consts merging +-- +CREATE TABLE test_merge (id int, data int); + +-- IN queries + +-- No merging +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Normal +SET const_merge_threshold = 5; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On the merge threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- With gaps on the threshold +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- test constants after merge +SELECT pg_stat_statements_reset(); + +SELECT * FROM test_merge WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10) and data = 2; +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- On table, numeric type causes every constant being wrapped into functions. +CREATE TABLE test_merge_numeric (id int, data numeric(5, 2)); +SELECT pg_stat_statements_reset(); +SELECT * FROM test_merge_numeric WHERE id IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + +-- Test find_const_walker +WITH cte AS ( + SELECT 'const' as const FROM test_merge +) +SELECT ARRAY['a', 'b', 'c', const::varchar] AS result +FROM cte; + +RESET const_merge_threshold; + DROP EXTENSION pg_stat_statements; diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index f505413a7f..45a6d593ce 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -3600,6 +3600,19 @@ static struct config_int ConfigureNamesInt[] = NULL, NULL, NULL }, + { + {"const_merge_threshold", PGC_SUSET, STATS_MONITORING, + gettext_noop("Sets the minimal numer of constants in an array" + " after which they will be merged"), + gettext_noop("Computing query id for an array of constants" + " will produce the same id for all arrays with length" + " larger than this value. Zero turns off merging."), + }, + &const_merge_threshold, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + /* End-of-list marker */ { {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL diff --git a/src/backend/utils/misc/queryjumble.c b/src/backend/utils/misc/queryjumble.c index a67487e5fe..563de94f9f 100644 --- a/src/backend/utils/misc/queryjumble.c +++ b/src/backend/utils/misc/queryjumble.c @@ -42,6 +42,9 @@ /* GUC parameters */ int compute_query_id = COMPUTE_QUERY_ID_AUTO; +/* Minimal numer of constants in an array after which they will be merged */ +int const_merge_threshold = 0; + /* True when compute_query_id is ON, or AUTO and a module requests them */ bool query_id_enabled = false; @@ -52,7 +55,8 @@ static void JumbleQueryInternal(JumbleState *jstate, Query *query); static void JumbleRangeTable(JumbleState *jstate, List *rtable); static void JumbleRowMarks(JumbleState *jstate, List *rowMarks); static void JumbleExpr(JumbleState *jstate, Node *node); -static void RecordConstLocation(JumbleState *jstate, int location); +static bool JumbleExprList(JumbleState *jstate, Node *node); +static void RecordConstLocation(JumbleState *jstate, int location, bool merged); /* * Given a possibly multi-statement source string, confine our attention to the @@ -119,7 +123,7 @@ JumbleQuery(Query *query, const char *querytext) jstate->jumble_len = 0; jstate->clocations_buf_size = 32; jstate->clocations = (LocationLen *) - palloc(jstate->clocations_buf_size * sizeof(LocationLen)); + palloc0(jstate->clocations_buf_size * sizeof(LocationLen)); jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; @@ -341,6 +345,225 @@ JumbleRowMarks(JumbleState *jstate, List *rowMarks) } } +/* + * find_const_walker + * Locate all the Const nodes in an expression tree. + * + * Caller must provide an empty list where constants will be collected. + */ +static bool +find_const_walker(Node *node, List **constants) +{ + if (node == NULL) + return false; + + if (IsA(node, Const)) + { + *constants = lappend(*constants, (Const *) node); + return false; + } + + return expression_tree_walker(node, find_const_walker, (void *) constants); +} + +static bool +JumbleExprList(JumbleState *jstate, Node *node) +{ + ListCell *temp; + Node *firstExpr = NULL; + bool merged = false; + bool allConst = true; + int currentExprIdx; + int lastExprLength = 0; + + if (node == NULL) + return merged; + + if (const_merge_threshold == 0) + { + /* Merging is disabled, process everything one by one. */ + JumbleExpr(jstate, node); + return merged; + } + + /* Guard against stack overflow due to overly complex expressions */ + check_stack_depth(); + + Assert(IsA(node, List)); + firstExpr = (Node *) lfirst(list_head((List *) node)); + + /* + * We always emit the node's NodeTag, then any additional fields that are + * considered significant, and then we recurse to any child nodes. + */ + APP_JUMB(node->type); + + /* + * If the first expression is a constant or a list of constants, try to + * merge the following if they're constants as well. Otherwise do + * JumbleExpr as usual. + */ + switch (nodeTag(firstExpr)) + { + case T_List: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + List *expr = (List *) lfirst(temp); + ListCell *lc; + + foreach(lc, expr) + { + Node * subExpr = (Node *) lfirst(lc); + + if (!IsA(subExpr, Const)) + { + allConst = false; + break; + } + } + + if (allConst && currentExprIdx >= const_merge_threshold - 1) + { + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == const_merge_threshold - 1) + { + JumbleExpr(jstate, (Node *) expr); + + /* + * An expr consisting of constants is already found, + * JumbleExpr must record it. Mark all the constants as + * merged, they will be the first merged but still + * present in the statement query. + */ + Assert(jstate->clocations_count > lastExprLength - 1); + for (int i = 1; i < lastExprLength + 1; i++) + { + LocationLen *loc; + loc = &jstate->clocations[jstate->clocations_count - i]; + loc->merged = true; + } + currentExprIdx++; + } + else + foreach(lc, expr) + { + /* + * Find the last constant to provide information + * for generate_normalized_query where the current + * expression actually ends. + */ + Const *lastConst; + List *constants = NIL; + find_const_walker((Node *) lfirst(lc), &constants); + + /* We should be able to find some constants, as + * they were discovered before. */ + Assert(constants != NIL); + lastConst = (Const *) llast(constants); + RecordConstLocation(jstate, lastConst->location, true); + } + + continue; + } + + JumbleExpr(jstate, (Node *) expr); + currentExprIdx++; + lastExprLength = expr->length; + } + break; + + case T_Const: + currentExprIdx = 0; + + foreach(temp, (List *) node) + { + Node *expr = (Node *) lfirst(temp); + + if (IsA(expr, Const) && currentExprIdx >= const_merge_threshold - 1) + { + merged = true; + + /* + * This hash is going to accumulate the following merged + * statements + */ + if (currentExprIdx == const_merge_threshold - 1) + { + JumbleExpr(jstate, expr); + + /* + * A const expr is already found, so JumbleExpr must + * record it. Mark it as merged, it will be the first + * merged but still present in the statement query. + */ + Assert(jstate->clocations_count > lastExprLength - 1); + for (int i = 1; i < lastExprLength + 1; i++) + { + LocationLen *loc; + loc = &jstate->clocations[jstate->clocations_count - i]; + loc->merged = true; + } + currentExprIdx++; + } + else + { + /* + * Find the last constant to provide information + * for generate_normalized_query where the current + * expression actually ends. + */ + Const *lastConst; + List *constants = NIL; + find_const_walker(expr, &constants); + + /* We should be able to find some constants, as + * they were discovered before. */ + Assert(constants != NIL); + lastConst = (Const *) llast(constants); + RecordConstLocation(jstate, lastConst->location, true); + } + + continue; + } + + JumbleExpr(jstate, expr); + currentExprIdx++; + + if (currentExprIdx == const_merge_threshold -1) + { + /* + * The next expression will be eligible for merging check. + * For it to happen correctly remember the number of + * constants in the previous expression. + */ + List *constants = NIL; + find_const_walker(expr, &constants); + + /* The expression we work with here could be anything, so + * no constants found is a possible outcome. */ + if (constants != NIL) + lastExprLength = constants->length; + else + lastExprLength = 1; + } + } + break; + + default: + JumbleExpr(jstate, node); + break; + } + + return merged; +} + /* * Jumble an expression tree * @@ -390,7 +613,7 @@ JumbleExpr(JumbleState *jstate, Node *node) /* We jumble only the constant's type, not its value */ APP_JUMB(c->consttype); /* Also, record its parse location for query normalization */ - RecordConstLocation(jstate, c->location); + RecordConstLocation(jstate, c->location, false); } break; case T_Param: @@ -579,7 +802,7 @@ JumbleExpr(JumbleState *jstate, Node *node) } break; case T_ArrayExpr: - JumbleExpr(jstate, (Node *) ((ArrayExpr *) node)->elements); + JumbleExprList(jstate, (Node *) ((ArrayExpr *) node)->elements); break; case T_RowExpr: JumbleExpr(jstate, (Node *) ((RowExpr *) node)->args); @@ -832,11 +1055,11 @@ JumbleExpr(JumbleState *jstate, Node *node) } /* - * Record location of constant within query string of query tree + * Record location of constant or a parameter within query string of query tree * that is currently being walked. */ static void -RecordConstLocation(JumbleState *jstate, int location) +RecordConstLocation(JumbleState *jstate, int location, bool merged) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -851,6 +1074,7 @@ RecordConstLocation(JumbleState *jstate, int location) sizeof(LocationLen)); } jstate->clocations[jstate->clocations_count].location = location; + jstate->clocations[jstate->clocations_count].merged = merged; /* initialize lengths to -1 to simplify third-party module usage */ jstate->clocations[jstate->clocations_count].length = -1; jstate->clocations_count++; diff --git a/src/include/utils/queryjumble.h b/src/include/utils/queryjumble.h index a4c277269e..157e3f777a 100644 --- a/src/include/utils/queryjumble.h +++ b/src/include/utils/queryjumble.h @@ -15,6 +15,7 @@ #define QUERYJUBLE_H #include "nodes/parsenodes.h" +#include "nodes/nodeFuncs.h" #define JUMBLE_SIZE 1024 /* query serialization buffer size */ @@ -25,6 +26,8 @@ typedef struct LocationLen { int location; /* start offset in query text */ int length; /* length in bytes, or -1 to ignore */ + bool merged; /* whether or not the location was marked as + duplicate */ } LocationLen; /* @@ -39,7 +42,10 @@ typedef struct JumbleState /* Number of bytes used in jumble[] */ Size jumble_len; - /* Array of locations of constants that should be removed */ + /* + * Array of locations of constants that should be removed, or parameters + * that are already replaced, but could be also processed to be merged + */ LocationLen *clocations; /* Allocated length of clocations array */ @@ -62,7 +68,7 @@ enum ComputeQueryIdType /* GUC parameters */ extern int compute_query_id; - +extern int const_merge_threshold; extern const char *CleanQuerytext(const char *query, int *location, int *len); extern JumbleState *JumbleQuery(Query *query, const char *querytext); -- 2.32.0