> On Thu, May 08, 2025 at 02:22:00PM GMT, Michael Paquier wrote: > On Wed, May 07, 2025 at 10:41:22AM +0200, Dmitry Dolgov wrote: > > Ah, I see what you mean. I think the idea is fine, it will simplify > > certain things as well as address the issue. But I'm afraid adding > > start/end location to A_Expr is a bit too invasive, as it's being used > > for many other purposes. How about introducing a new expression for this > > purposes, and use it only in in_expr/array_expr, and wrap the > > corresponding expressions into it? This way the change could be applied > > in a more targeted fashion. > > Yes, that feels invasive. The use of two static variables to track > the start and the end positions in an expression list can also be a > bit unstable to rely on, I think. It seems to me that this part > could be handled in a new Node that takes care of tracking the two > positions, instead, be it a start/end couple or a location/length > couple? That seems necessary to have when going through > jumbleElements().
To clarify, I had in mind something like in the attached patch. The idea is to make start/end location capturing relatively independent from the constants squashing. The new parsing node conveys the location information, which is then getting transformed to be a part of an ArrayExpr. It's done for in_expr only here, something similar would be needed for array_expr as well. Feedback is appreciated.
>From bcab1a8364979dce146b36e31865e9b670bd892b Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Thu, 8 May 2025 16:41:01 +0200 Subject: [PATCH v1 1/2] Introduce LocationExpr Add LocationExpr wrapper node to capture start and end location of an expression in a query. Use it in to wrap expr_list in in_expr and convery location information to ArrayExpr. --- src/backend/nodes/nodeFuncs.c | 36 ++++++++++++++++++++++++++++++++ src/backend/parser/gram.y | 11 +++++++++- src/backend/parser/parse_expr.c | 28 ++++++++++++++++++++++++- src/include/nodes/parsenodes.h | 15 +++++++++++++ src/include/nodes/primnodes.h | 2 ++ src/tools/pgindent/typedefs.list | 1 + 6 files changed, 91 insertions(+), 2 deletions(-) diff --git a/src/backend/nodes/nodeFuncs.c b/src/backend/nodes/nodeFuncs.c index 7bc823507f1..6f6a7079d55 100644 --- a/src/backend/nodes/nodeFuncs.c +++ b/src/backend/nodes/nodeFuncs.c @@ -284,6 +284,12 @@ exprType(const Node *expr) case T_PlaceHolderVar: type = exprType((Node *) ((const PlaceHolderVar *) expr)->phexpr); break; + case T_LocationExpr: + { + const LocationExpr *n = (const LocationExpr *) expr; + type = exprType((Node *) n->expr); + } + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); type = InvalidOid; /* keep compiler quiet */ @@ -536,6 +542,11 @@ exprTypmod(const Node *expr) return exprTypmod((Node *) ((const ReturningExpr *) expr)->retexpr); case T_PlaceHolderVar: return exprTypmod((Node *) ((const PlaceHolderVar *) expr)->phexpr); + case T_LocationExpr: + { + const LocationExpr *n = (const LocationExpr *) expr; + return exprTypmod((Node *) n->expr); + } default: break; } @@ -1058,6 +1069,9 @@ exprCollation(const Node *expr) case T_PlaceHolderVar: coll = exprCollation((Node *) ((const PlaceHolderVar *) expr)->phexpr); break; + case T_LocationExpr: + coll = exprCollation((Node *) ((const LocationExpr *) expr)->expr); + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); coll = InvalidOid; /* keep compiler quiet */ @@ -1306,6 +1320,10 @@ exprSetCollation(Node *expr, Oid collation) /* NextValueExpr's result is an integer type ... */ Assert(!OidIsValid(collation)); /* ... so never set a collation */ break; + case T_LocationExpr: + exprSetCollation((Node *) ((LocationExpr *) expr)->expr, + collation); + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); break; @@ -1803,6 +1821,9 @@ exprLocation(const Node *expr) case T_PartitionRangeDatum: loc = ((const PartitionRangeDatum *) expr)->location; break; + case T_LocationExpr: + loc = ((const LocationExpr *) expr)->start_location; + break; default: /* for any other node type it's just unknown... */ loc = -1; @@ -2668,6 +2689,8 @@ expression_tree_walker_impl(Node *node, return true; } break; + case T_LocationExpr: + return WALK(((LocationExpr *) node)->expr); default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -3744,6 +3767,17 @@ expression_tree_mutator_impl(Node *node, return (Node *) newnode; } break; + case T_LocationExpr: + { + LocationExpr *expr = (LocationExpr *) node; + LocationExpr *newnode; + + FLATCOPY(newnode, expr, LocationExpr); + MUTATE(newnode->expr, expr->expr, Node *); + + return (Node *) newnode; + } + break; default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); @@ -4705,6 +4739,8 @@ raw_expression_tree_walker_impl(Node *node, return true; } break; + case T_LocationExpr: + return WALK(((LocationExpr *) node)->expr); default: elog(ERROR, "unrecognized node type: %d", (int) nodeTag(node)); diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index 3c4268b271a..3ffa5335f4e 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -16895,7 +16895,16 @@ in_expr: select_with_parens /* other fields will be filled later */ $$ = (Node *) n; } - | '(' expr_list ')' { $$ = (Node *) $2; } + | '(' expr_list ')' + { + LocationExpr *n = makeNode(LocationExpr); + + n->expr = (Node *) $2; + n->start_location = @1 + 1; + n->end_location = @3 - 1; + + $$ = (Node *) n; + } ; /* diff --git a/src/backend/parser/parse_expr.c b/src/backend/parser/parse_expr.c index 1f8e2d54673..30d52c801eb 100644 --- a/src/backend/parser/parse_expr.c +++ b/src/backend/parser/parse_expr.c @@ -370,6 +370,22 @@ transformExprRecurse(ParseState *pstate, Node *expr) result = transformJsonFuncExpr(pstate, (JsonFuncExpr *) expr); break; + case T_LocationExpr: + { + LocationExpr *loc = (LocationExpr *) expr; + if (IsA(loc->expr, ArrayExpr)) + { + ArrayExpr *arr = (ArrayExpr *) loc->expr; + arr->loc_range = list_make2_int(loc->start_location, + loc->end_location); + + result = (Node *) arr; + } + else + result = (Node *) loc->expr; + } + break; + default: /* should not reach here */ elog(ERROR, "unrecognized node type: %d", (int) nodeTag(expr)); @@ -1125,6 +1141,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a) { Node *result = NULL; Node *lexpr; + LocationExpr *location = NULL; List *rexprs; List *rvars; List *rnonvars; @@ -1139,6 +1156,9 @@ transformAExprIn(ParseState *pstate, A_Expr *a) else useOr = true; + if (IsA(a->rexpr, LocationExpr)) + location = (LocationExpr *) a->rexpr; + /* * We try to generate a ScalarArrayOpExpr from IN/NOT IN, but this is only * possible if there is a suitable array type available. If not, we fall @@ -1152,7 +1172,7 @@ transformAExprIn(ParseState *pstate, A_Expr *a) */ lexpr = transformExprRecurse(pstate, a->lexpr); rexprs = rvars = rnonvars = NIL; - foreach(l, (List *) a->rexpr) + foreach(l, (List *) transformExprRecurse(pstate, a->rexpr)) { Node *rexpr = transformExprRecurse(pstate, lfirst(l)); @@ -1224,6 +1244,12 @@ transformAExprIn(ParseState *pstate, A_Expr *a) newa->elements = aexprs; newa->multidims = false; newa->location = -1; + if (location) + newa->loc_range = list_make2_int(location->start_location, + location->end_location); + else + newa->loc_range = NIL; + result = (Node *) make_scalar_array_op(pstate, a->name, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 4610fc61293..5107bfad9a6 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -504,6 +504,21 @@ typedef struct A_ArrayExpr ParseLoc location; /* token location, or -1 if unknown */ } A_ArrayExpr; +/* + * A wrapper expression to record start and end location + */ +typedef struct LocationExpr +{ + NodeTag type; + + /* the node to be wrapped */ + Node *expr; + /* token location, or -1 if unknown */ + ParseLoc start_location; + /* token location, or -1 if unknown */ + ParseLoc end_location; +} LocationExpr; + /* * ResTarget - * result target (used in target list of pre-transformed parse trees) diff --git a/src/include/nodes/primnodes.h b/src/include/nodes/primnodes.h index 7d3b4198f26..ffee0f7768f 100644 --- a/src/include/nodes/primnodes.h +++ b/src/include/nodes/primnodes.h @@ -1399,6 +1399,8 @@ typedef struct ArrayExpr bool multidims pg_node_attr(query_jumble_ignore); /* token location, or -1 if unknown */ ParseLoc location; + + List *loc_range pg_node_attr(query_jumble_ignore); } ArrayExpr; /* diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index e5879e00dff..e6fcba24396 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1501,6 +1501,7 @@ LOCALLOCK LOCALLOCKOWNER LOCALLOCKTAG LOCALPREDICATELOCK +LocationExpr LOCK LOCKMASK LOCKMETHODID base-commit: b0635bfda0535a7fc36cd11d10eecec4e2a96330 -- 2.45.1
>From b4d325d11d1ec6912a4bb07c4f3ff2ce079212b8 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthali...@gmail.com> Date: Thu, 8 May 2025 16:41:20 +0200 Subject: [PATCH v1 2/2] Use LocationExpr in squashing For the purpose of constants squashing we have only start location of an expression, which is not enouch if the constant is wrapped e.g. in a cast function. Apply information conveyed via LocationExpr to improve squashing of constants. Based on an idea from Sami Imseih. --- .../pg_stat_statements/expected/squashing.out | 42 +++++++--- .../pg_stat_statements/pg_stat_statements.c | 35 +++------ contrib/pg_stat_statements/sql/squashing.sql | 8 +- src/backend/nodes/queryjumblefuncs.c | 76 ++++++++++++------- 4 files changed, 97 insertions(+), 64 deletions(-) diff --git a/contrib/pg_stat_statements/expected/squashing.out b/contrib/pg_stat_statements/expected/squashing.out index 7b138af098c..df9ff9d5637 100644 --- a/contrib/pg_stat_statements/expected/squashing.out +++ b/contrib/pg_stat_statements/expected/squashing.out @@ -147,6 +147,24 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (3 rows) +-- Parsing +SELECT pg_stat_statements_reset() IS NOT NULL AS t; + t +--- + t +(1 row) + +SELECT WHERE 1 IN (1, 2, int4(1)); +-- +(1 row) + +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + query | calls +----------------------------------------------------+------- + SELECT WHERE $1 IN ($2 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 +(2 rows) + -- FuncExpr -- Verify multiple type representation end up with the same query_id CREATE TABLE test_float (data float); @@ -246,7 +264,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_bigint WHERE data IN +| 1 - ($1 /*, ... */::bigint) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -353,7 +371,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_cast WHERE data IN +| 1 - ($1 /*, ... */::int4::casttesttype) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -376,7 +394,7 @@ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; query | calls ----------------------------------------------------+------- SELECT * FROM test_squash_jsonb WHERE data IN +| 1 - (($1 /*, ... */)::jsonb) | + ($1 /*, ... */) | SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) @@ -393,10 +411,10 @@ SELECT * FROM test_squash WHERE id IN (1::oid, 2::oid, 3::oid, 4::oid, 5::oid, 6 (0 rows) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -------------------------------------------------------------+------- - SELECT * FROM test_squash WHERE id IN ($1 /*, ... */::oid) | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT * FROM test_squash WHERE id IN ($1 /*, ... */) | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) -- Test constants evaluation in a CTE, which was causing issues in the past @@ -409,7 +427,7 @@ FROM cte; -------- (0 rows) --- Simple array would be squashed as well +-- Simple array is not squashed yet SELECT pg_stat_statements_reset() IS NOT NULL AS t; t --- @@ -423,9 +441,9 @@ SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; (1 row) SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; - query | calls -----------------------------------------------------+------- - SELECT ARRAY[$1 /*, ... */] | 1 - SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 + query | calls +-------------------------------------------------------+------- + SELECT ARRAY[$1, $2, $3, $4, $5, $6, $7, $8, $9, $10] | 1 + SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1 (2 rows) diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 9778407cba3..314e065b364 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -2825,10 +2825,6 @@ generate_normalized_query(JumbleState *jstate, const char *query, n_quer_loc = 0, /* Normalized query byte location */ last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ - bool in_squashed = false; /* in a run of squashed consts? */ - int skipped_constants = 0; /* Position adjustment of later - * constants after squashed ones */ - /* * Get constants' lengths (core system only gives us locations). Note @@ -2886,12 +2882,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, /* ... and then a param symbol replacing the constant itself */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* In case previous constants were merged away, stop doing that */ - in_squashed = false; + i + 1 + jstate->highest_extern_param_id); } - else if (!in_squashed) + else { /* * This location is the start position of a run of constants to be @@ -2903,27 +2896,12 @@ generate_normalized_query(JumbleState *jstate, const char *query, len_to_wrt = off - last_off; len_to_wrt -= last_tok_len; Assert(len_to_wrt >= 0); - Assert(i + 1 < jstate->clocations_count); - Assert(jstate->clocations[i + 1].squashed); memcpy(norm_query + n_quer_loc, query + quer_loc, len_to_wrt); n_quer_loc += len_to_wrt; /* ... and then start a run of squashed constants */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d /*, ... */", - i + 1 + jstate->highest_extern_param_id - skipped_constants); - - /* The next location will match the block below, to end the run */ - in_squashed = true; - - skipped_constants++; - } - else - { - /* - * The second location of a run of squashable elements; this - * indicates its end. - */ - in_squashed = false; + i + 1 + jstate->highest_extern_param_id); } /* Otherwise the constant is squashed away -- move forward */ @@ -3012,6 +2990,13 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, int loc = locs[i].location; int tok; + /* Squashed constants are recorded with a length set already */ + if (locs[i].squashed) + { + Assert(locs[i].length != -1); + continue; + } + /* Adjust recorded location if we're dealing with partial string */ loc -= query_loc; diff --git a/contrib/pg_stat_statements/sql/squashing.sql b/contrib/pg_stat_statements/sql/squashing.sql index 908be81ff2b..d30541a275c 100644 --- a/contrib/pg_stat_statements/sql/squashing.sql +++ b/contrib/pg_stat_statements/sql/squashing.sql @@ -49,6 +49,12 @@ SELECT * FROM test_squash WHERE id IN (@ '-1', @ '-2', @ '-3', @ '-4', @ '-5', @ '-6', @ '-7', @ '-8', @ '-9'); SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; +-- Parsing + +SELECT pg_stat_statements_reset() IS NOT NULL AS t; +SELECT WHERE 1 IN (1, 2, int4(1)); +SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; + -- FuncExpr -- Verify multiple type representation end up with the same query_id @@ -163,7 +169,7 @@ WITH cte AS ( SELECT ARRAY['a', 'b', 'c', const::varchar] AS result FROM cte; --- Simple array would be squashed as well +-- Simple array is not squashed yet SELECT pg_stat_statements_reset() IS NOT NULL AS t; SELECT ARRAY[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C"; diff --git a/src/backend/nodes/queryjumblefuncs.c b/src/backend/nodes/queryjumblefuncs.c index d1e82a63f09..ae26406bbbc 100644 --- a/src/backend/nodes/queryjumblefuncs.c +++ b/src/backend/nodes/queryjumblefuncs.c @@ -62,8 +62,10 @@ static void AppendJumble(JumbleState *jstate, static void FlushPendingNulls(JumbleState *jstate); static void RecordConstLocation(JumbleState *jstate, int location, bool squashed); +static void RecordConstLocationRange(JumbleState *jstate, + int start, int end, bool squashed); static void _jumbleNode(JumbleState *jstate, Node *node); -static void _jumbleElements(JumbleState *jstate, List *elements); +static void _jumbleElements(JumbleState *jstate, List *elements, Node *expr); static void _jumbleA_Const(JumbleState *jstate, Node *node); static void _jumbleList(JumbleState *jstate, Node *node); static void _jumbleVariableSetStmt(JumbleState *jstate, Node *node); @@ -403,6 +405,32 @@ RecordConstLocation(JumbleState *jstate, int location, bool squashed) } } +/* + * Similar to RecordConstLocation, RecordConstLocationRange stores a constant + * with location start and end boundaries. + */ +static void +RecordConstLocationRange(JumbleState *jstate, int start, int end, bool squashed) +{ + /* -1 indicates unknown or undefined location */ + if (start >= 0 && end >= 0) + { + /* enlarge array if needed */ + if (jstate->clocations_count >= jstate->clocations_buf_size) + { + jstate->clocations_buf_size *= 2; + jstate->clocations = (LocationLen *) + repalloc(jstate->clocations, + jstate->clocations_buf_size * + sizeof(LocationLen)); + } + jstate->clocations[jstate->clocations_count].location = start; + jstate->clocations[jstate->clocations_count].squashed = squashed; + jstate->clocations[jstate->clocations_count].length = end - start + 1; + jstate->clocations_count++; + } +} + /* * Subroutine for _jumbleElements: Verify a few simple cases where we can * deduce that the expression is a constant: @@ -461,7 +489,7 @@ IsSquashableConst(Node *element) * expressions. */ static bool -IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) +IsSquashableConstList(List *elements) { ListCell *temp; @@ -473,13 +501,8 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) return false; foreach(temp, elements) - { if (!IsSquashableConst(lfirst(temp))) return false; - } - - *firstExpr = linitial(elements); - *lastExpr = llast(elements); return true; } @@ -487,7 +510,7 @@ IsSquashableConstList(List *elements, Node **firstExpr, Node **lastExpr) #define JUMBLE_NODE(item) \ _jumbleNode(jstate, (Node *) expr->item) #define JUMBLE_ELEMENTS(list) \ - _jumbleElements(jstate, (List *) expr->list) + _jumbleElements(jstate, (List *) expr->list, (Node *) expr) #define JUMBLE_LOCATION(location) \ RecordConstLocation(jstate, expr->location, false) #define JUMBLE_FIELD(item) \ @@ -523,28 +546,29 @@ do { \ * elements in the list. */ static void -_jumbleElements(JumbleState *jstate, List *elements) +_jumbleElements(JumbleState *jstate, List *elements, Node *expr) { - Node *first, - *last; - - if (IsSquashableConstList(elements, &first, &last)) + if (IsSquashableConstList(elements)) { + ArrayExpr *array; + /* - * If this list of elements is squashable, keep track of the location - * of its first and last elements. When reading back the locations - * array, we'll see two consecutive locations with ->squashed set to - * true, indicating the location of initial and final elements of this - * list. - * - * For the limited set of cases we support now (implicit coerce via - * FuncExpr, Const) it's fine to use exprLocation of the 'last' - * expression, but if more complex composite expressions are to be - * supported (e.g., OpExpr or FuncExpr as an explicit call), more - * sophisticated tracking will be needed. + * Currenlty only ArrayExpr provides location information, needed for + * squashing. */ - RecordConstLocation(jstate, exprLocation(first), true); - RecordConstLocation(jstate, exprLocation(last), true); + Assert(IsA(expr, ArrayExpr)); + array = (ArrayExpr *) expr; + + /* + * If the parent ArrayExpr has location information, i.e. start and the + * end of the expression, use it as boundaries for squashing. + */ + if (array->loc_range != NIL) + RecordConstLocationRange(jstate, + linitial_int(array->loc_range), + lsecond_int(array->loc_range), true); + else + _jumbleNode(jstate, (Node *) elements); } else { -- 2.45.1