Hello, I have pushed that now, and here's a rebase of patch 0003 to add support for PARAM_EXTERN. I'm not really sure about this one yet ...
-- Álvaro Herrera PostgreSQL Developer — https://www.EnterpriseDB.com/ "La victoria es para quien se atreve a estar solo"
>From 0a836189a2e3af3beeb7e3c55d7d0e4ce99b4e8e Mon Sep 17 00:00:00 2001 From: Sami Imseih <sims...@amazon.com> Date: Fri, 30 May 2025 11:42:56 -0500 Subject: [PATCH v10] Support Squashing of External Parameters 62d712ec introduced the concept of element squashing for quwry normalization purposes. However, it did not account for external parameters passed to a list of elements. This adds support to these types of values and simplifies the squashing logic further. --- .../pg_stat_statements/expected/extended.out | 36 ++++++--- .../pg_stat_statements/expected/squashing.out | 26 +++--- .../pg_stat_statements/pg_stat_statements.c | 4 + contrib/pg_stat_statements/sql/extended.sql | 5 +- contrib/pg_stat_statements/sql/squashing.sql | 4 +- src/backend/nodes/queryjumblefuncs.c | 80 ++++++++++++------- src/include/nodes/primnodes.h | 6 +- src/include/nodes/queryjumble.h | 16 +++- 8 files changed, 118 insertions(+), 59 deletions(-) diff --git a/contrib/pg_stat_statements/expected/extended.out b/contrib/pg_stat_statements/expected/extended.out index 7da308ba84f..6f2c231bf2a 100644 --- a/contrib/pg_stat_statements/expected/extended.out +++ b/contrib/pg_stat_statements/expected/extended.out @@ -69,13 +69,13 @@ SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; (4 rows) -- Various parameter numbering patterns +-- Unique query IDs with parameter numbers switched. SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- t (1 row) --- Unique query IDs with parameter numbers switched. SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g -- (0 rows) @@ -96,7 +96,24 @@ SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g -- (0 rows) +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +--------------------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE $1::int IN ($2 /*, ... */) | 1 + SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 + SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(6 rows) + -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g -- (1 row) @@ -114,15 +131,10 @@ SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ---------------------------------------------------------------+------- - SELECT WHERE $1::int IN ($2::int, $3::int) | 1 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($1::int, $3::int) | 2 - SELECT WHERE $2::int IN ($3::int, $1::int) | 1 - SELECT WHERE $3::int IN ($1::int, $2::int) | 1 - SELECT WHERE ($1::int, $4) IN (($5, $2::int), ($3::int, $6)) | 1 - SELECT WHERE ($2::int, $4) IN (($5, $3::int), ($1::int, $6)) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 -(8 rows) + query | calls +----------------------------------------------------+------- + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT WHERE $1::int IN ($2 /*, ... */) | 2 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(3 rows) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b935d464ec..e5dd9337165 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -103,7 +103,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -123,14 +123,14 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls ----------------------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | 1 - SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +----------------------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1 /*, ... */]) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; t @@ -155,12 +155,12 @@ EXECUTE p1(1, 2, 3, 4, 5); DEALLOCATE p1; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - DEALLOCATE $1 | 2 - PREPARE p1(int, int, int, int, int) AS +| 2 - SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) | - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + DEALLOCATE $1 | 2 + PREPARE p1(int, int, int, int, int) AS +| 2 + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) -- More conditions in the query diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index ecc7f2fb266..97b62b635bc 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2841,6 +2841,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, int off, /* Offset from start for cur tok */ tok_len; /* Length (in bytes) of that tok */ + if (jstate->clocations[i].extern_param && + !jstate->has_squashed_lists) + continue; + off = jstate->clocations[i].location; /* Adjust recorded location if we're dealing with partial string */ diff --git a/contrib/pg_stat_statements/sql/extended.sql b/contrib/pg_stat_statements/sql/extended.sql index a366658a53a..ffb5b162819 100644 --- a/contrib/pg_stat_statements/sql/extended.sql +++ b/contrib/pg_stat_statements/sql/extended.sql @@ -21,17 +21,18 @@ SELECT $1 \bind 'unnamed_val1' \g SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Various parameter numbering patterns -SELECT pg_stat_statements_reset() IS NOT NULL AS t; -- Unique query IDs with parameter numbers switched. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE ($1::int, 7) IN ((8, $2::int), ($3::int, 9)) \bind '1' '2' '3' \g SELECT WHERE ($2::int, 10) IN ((11, $3::int), ($1::int, 12)) \bind '1' '2' '3' \g SELECT WHERE $1::int IN ($2::int, $3::int) \bind '1' '2' '3' \g SELECT WHERE $2::int IN ($3::int, $1::int) \bind '1' '2' '3' \g SELECT WHERE $3::int IN ($1::int, $2::int) \bind '1' '2' '3' \g +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; -- Two groups of two queries with the same query ID. +SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT WHERE '1'::int IN ($1::int, '2'::int) \bind '1' \g SELECT WHERE '4'::int IN ($1::int, '5'::int) \bind '2' \g SELECT WHERE $2::int IN ($1::int, '1'::int) \bind '1' '2' \g SELECT WHERE $2::int IN ($1::int, '2'::int) \bind '3' '4' \g - SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index bd3243ec9cd..1e36708778a 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -32,7 +32,7 @@ SELECT WHERE 1 IN (1, int4(1), int4(2), 2); SELECT WHERE 1 = ANY (ARRAY[1, int4(1), int4(2), 2]); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- external parameters will not be squashed +-- external parameters will be squashed SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT * FROM test_squash WHERE id IN ($1, $2, $3, $4, $5) \bind 1 2 3 4 5 ; @@ -40,7 +40,7 @@ SELECT * FROM test_squash WHERE id::text = ANY(ARRAY[$1, $2, $3, $4, $5]) \bind ; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; --- neither are prepared statements +-- prepared statements will also be squashed -- the IN and ARRAY forms of this statement will have the same queryId SELECT pg_stat_statements_reset() IS NOT NULL AS t; PREPARE p1(int, int, int, int, int) AS diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index fb33e6931ad..0f81a08704d 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -61,6 +61,7 @@ static void AppendJumble(JumbleState *jstate, const unsigned char *value, Size size); static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, + bool extern_param, int location, int len); static void _jumbleNode(JumbleState *jstate, Node *node); static void _jumbleElements(JumbleState *jstate, List *elements, Node *node); @@ -70,6 +71,7 @@ static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); static void _jumbleRangeTblEntry_eref(JumbleState *jstate, RangeTblEntry *rte, Alias *expr); +static void _jumbleParam(JumbleState *jstate, Node *node); /* * Given a possibly multi-statement source string, confine our attention to the @@ -185,6 +187,7 @@ InitJumble(void) jstate->clocations_count = 0; jstate->highest_extern_param_id = 0; jstate->pending_nulls = 0; + jstate->has_squashed_lists = false; #ifdef USE_ASSERT_CHECKING jstate->total_jumble_len = 0; #endif @@ -207,6 +210,10 @@ DoJumble(JumbleState *jstate, Node *node) if (jstate->pending_nulls > 0) FlushPendingNulls(jstate); + /* Squashed list found, reset highest_extern_param_id */ + if (jstate->has_squashed_lists) + jstate->highest_extern_param_id = 0; + /* Process the jumble buffer and produce the hash value */ return DatumGetInt64(hash_any_extended(jstate->jumble, jstate->jumble_len, @@ -376,14 +383,14 @@ FlushPendingNulls(JumbleState *jstate) * Record the location of some kind of constant within a query string. * These are not only bare constants but also expressions that ultimately * constitute a constant, such as those inside casts and simple function - * calls. + * calls; if extern_param, then it corresponds to a PARAM_EXTERN Param. * * If length is -1, it indicates a single such constant element. If * it's a positive integer, it indicates the length of a squashable * list of them. */ static void -RecordConstLocation(JumbleState *jstate, int location, int len) +RecordConstLocation(JumbleState *jstate, bool extern_param, int location, int len) { /* -1 indicates unknown or undefined location */ if (location >= 0) @@ -406,6 +413,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len) Assert(len > -1 || len == -1); jstate->clocations[jstate->clocations_count].length = len; jstate->clocations[jstate->clocations_count].squashed = (len > -1); + jstate->clocations[jstate->clocations_count].extern_param = extern_param; jstate->clocations_count++; } } @@ -422,6 +430,7 @@ RecordConstLocation(JumbleState *jstate, int location, int len) static bool IsSquashableConstant(Node *element) { + /* Unwrap RelabelType and CoerceViaIO layers */ if (IsA(element, RelabelType)) element = (Node *) ((RelabelType *) element)->arg; @@ -430,6 +439,12 @@ IsSquashableConstant(Node *element) switch (nodeTag(element)) { + case T_Param: + return castNode(Param, element)->paramkind == PARAM_EXTERN; + + case T_Const: + return true; + case T_FuncExpr: { FuncExpr *func = (FuncExpr *) element; @@ -468,11 +483,8 @@ IsSquashableConstant(Node *element) } default: - if (!IsA(element, Const)) - return false; + return false; } - - return true; } /* @@ -508,7 +520,7 @@ IsSquashableConstantList(List *elements) #define JUMBLE_ELEMENTS(list, node) \ _jumbleElements(jstate, (List *) expr->list, node) #define JUMBLE_LOCATION(location) \ - RecordConstLocation(jstate, expr->location, -1) + RecordConstLocation(jstate, false, expr->location, -1) #define JUMBLE_FIELD(item) \ do { \ if (sizeof(expr->item) == 8) \ @@ -558,9 +570,11 @@ _jumbleElements(JumbleState *jstate, List *elements, Node *node) if (aexpr->list_start > 0 && aexpr->list_end > 0) { RecordConstLocation(jstate, + false, aexpr->list_start + 1, (aexpr->list_end - aexpr->list_start) - 1); normalize_list = true; + jstate->has_squashed_lists = true; } } } @@ -612,26 +626,6 @@ _jumbleNode(JumbleState *jstate, Node *node) break; } - /* Special cases to handle outside the automated code */ - switch (nodeTag(expr)) - { - case T_Param: - { - Param *p = (Param *) node; - - /* - * Update the highest Param id seen, in order to start - * normalization correctly. - */ - if (p->paramkind == PARAM_EXTERN && - p->paramid > jstate->highest_extern_param_id) - jstate->highest_extern_param_id = p->paramid; - } - break; - default: - break; - } - /* Ensure we added something to the jumble buffer */ Assert(jstate->total_jumble_len > prev_jumble_len); } @@ -734,3 +728,35 @@ _jumbleRangeTblEntry_eref(JumbleState *jstate, */ JUMBLE_STRING(aliasname); } + +/* + * Custom query jumble function for _jumbleParam. + */ +static void +_jumbleParam(JumbleState *jstate, Node *node) +{ + Param *expr = (Param *) node; + + JUMBLE_FIELD(paramkind); + JUMBLE_FIELD(paramid); + JUMBLE_FIELD(paramtype); + + if (expr->paramkind == PARAM_EXTERN) + { + /* + * At this point, only external parameter locations outside of + * squashable lists will be recorded. + */ + RecordConstLocation(jstate, true, expr->location, -1); + + /* + * Update the highest Param id seen, in order to start normalization + * correctly. + * + * Note: This value is reset at the end of jumbling if there exists a + * squashable list. See the comment in the definition of JumbleState. + */ + if (expr->paramid > jstate->highest_extern_param_id) + jstate->highest_extern_param_id = expr->paramid; + } +} diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 01510b01b64..6dfca3cb35b 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -389,14 +389,16 @@ typedef enum ParamKind typedef struct Param { + pg_node_attr(custom_query_jumble) + Expr xpr; ParamKind paramkind; /* kind of parameter. See above */ int paramid; /* numeric ID for parameter */ Oid paramtype; /* pg_type OID of parameter's datatype */ /* typmod value, if known */ - int32 paramtypmod pg_node_attr(query_jumble_ignore); + int32 paramtypmod; /* OID of collation, or InvalidOid if none */ - Oid paramcollid pg_node_attr(query_jumble_ignore); + Oid paramcollid; /* token location, or -1 if unknown */ ParseLoc location; } Param; diff --git a/src/include/nodes/queryjumble.h b/src/include/nodes/queryjumble.h index da7c7abed2e..bab971162dc 100644 --- a/src/include/nodes/queryjumble.h +++ b/src/include/nodes/queryjumble.h @@ -29,6 +29,13 @@ typedef struct LocationLen * of squashed constants. */ bool squashed; + + /* + * Indicates whether a location is that of an external parameter, so it + * can be decided during normalization whether the parameter number should + * be replaced or kept as is. + */ + bool extern_param; } LocationLen; /* @@ -52,8 +59,15 @@ typedef struct JumbleState /* Current number of valid entries in clocations array */ int clocations_count; - /* highest Param id we've seen, in order to start normalization correctly */ + /* + * Highest Param id we've seen, in order to start normalization correctly. + * However, if the jumble contains at least one squashed list, we + * disregard the highest_extern_param_id value because parameters can + * exist within the squashed list and are no longer considered for + * normalization. + */ int highest_extern_param_id; + bool has_squashed_lists; /* * Count of the number of NULL nodes seen since last appending a value. -- 2.39.5