On Thu, Mar 26, 2026 at 06:50:20PM -0700, Lukas Fittl wrote: > On Thu, Mar 26, 2026 at 10:19 AM Sami Imseih <[email protected]> wrote: > > Both of those changes belong in 0002. See the attached v7. > > Looks good! > > I've also done a quick follow-up test with pg_tracing and that > simplifies its logic as expected [0] to be able to extract inline > parameter values.
I have been looking at what you have here. As mentioned upthread, I am on board with aiming for JumbleState to be a const so as we enforce as policy that no extension is allowed to touch what the core code has computed. However, I am not convinced by most of the patch. The logic to recompute the lengths of the constants is a concept proper to PGSS. Perhaps we could reconsider moving that into core at some point, but I am honestly not sure to see the range of benefits we'd gain from that. This line of arguments is stronger for the normalization of the query string. Why should the core code decide what a normalized string should look like when it comes to the detection of the constants, if any? Instead of a dollar-quoted number, we could enforce a bunch of things, like a '?' or a '$woozah$' at these locations. Saying that, the key point of the patch is to take a copy of the JumbleState locations, and recompute it for the needs of PGSS for the sake of the normalized query representation. Hence, why don't we just do that at the end? That should be enough to enforce the const marker for the JumbleState across all the loaded extensions that want to look at it. This leads me to the simpler patch attached. Comments or tomatoes? That's simpler, and I'd be OK with just that for v19. That would be much better than the current state of affairs, where PGSS decides to enforce its own ideas in the JumbleState, ideas fed to anything looping into the post-parse-analyze hook after PGSS. -- Michael
From 336e6aa38d6dc3901c3fa1f746796a0b8fc87cd2 Mon Sep 17 00:00:00 2001 From: Michael Paquier <[email protected]> Date: Fri, 27 Mar 2026 14:13:00 +0900 Subject: [PATCH v8] Make JumbleState const in post_parse_analyze hook Change the post_parse_analyze_hook_type signature to take a const JumbleState parameter, preventing hooks from modifying the jumble state during query analysis. This improves API safety by making it clear that hooks should only read from the jumble state, not modify it. Update pg_stat_statements and related functions to match the new const signature. Refactor fill_in_constant_lengths() to return a newly allocated LocationLen array instead of modifying the JumbleState, so as PGSS does not touch the now-const JumbleState. The routine is renamed to compute_constant_lengths(). Discussion: https://postgr.es/m/caa5rz0tzp5qu0ikzeeqjnxvdsngh1dwv80sb-k4qaumimoo...@mail.gmail.com --- src/include/parser/analyze.h | 2 +- .../pg_stat_statements/pg_stat_statements.c | 75 ++++++++++++------- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/include/parser/analyze.h b/src/include/parser/analyze.h index e10270ff0ffd..b2db6fa4e8c4 100644 --- a/src/include/parser/analyze.h +++ b/src/include/parser/analyze.h @@ -21,7 +21,7 @@ /* Hook for plugins to get control at end of parse analysis */ typedef void (*post_parse_analyze_hook_type) (ParseState *pstate, Query *query, - JumbleState *jstate); + const JumbleState *jstate); extern PGDLLIMPORT post_parse_analyze_hook_type post_parse_analyze_hook; diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 6cb14824ec3b..0b6be5e255a4 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -335,7 +335,7 @@ static void pgss_shmem_request(void); static void pgss_shmem_startup(void); static void pgss_shmem_shutdown(int code, Datum arg); static void pgss_post_parse_analyze(ParseState *pstate, Query *query, - JumbleState *jstate); + const JumbleState *jstate); static PlannedStmt *pgss_planner(Query *parse, const char *query_string, int cursorOptions, @@ -359,7 +359,7 @@ static void pgss_store(const char *query, int64 queryId, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, - JumbleState *jstate, + const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin); @@ -378,13 +378,14 @@ static char *qtext_fetch(Size query_offset, int query_len, static bool need_gc_qtexts(void); static void gc_qtexts(void); static TimestampTz entry_reset(Oid userid, Oid dbid, int64 queryid, bool minmax_only); -static char *generate_normalized_query(JumbleState *jstate, const char *query, +static char *generate_normalized_query(const JumbleState *jstate, + const char *query, int query_loc, int *query_len_p); -static void fill_in_constant_lengths(JumbleState *jstate, const char *query, - int query_loc); +static LocationLen *compute_constant_lengths(const JumbleState *jstate, + const char *query, + int query_loc); static int comp_location(const void *a, const void *b); - /* * Module load callback */ @@ -840,7 +841,7 @@ error: * Post-parse-analysis hook: mark query with a queryId */ static void -pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate) +pgss_post_parse_analyze(ParseState *pstate, Query *query, const JumbleState *jstate) { if (prev_post_parse_analyze_hook) prev_post_parse_analyze_hook(pstate, query, jstate); @@ -1297,7 +1298,7 @@ pgss_store(const char *query, int64 queryId, const BufferUsage *bufusage, const WalUsage *walusage, const struct JitInstrumentation *jitusage, - JumbleState *jstate, + const JumbleState *jstate, int parallel_workers_to_launch, int parallel_workers_launched, PlannedStmtOrigin planOrigin) @@ -2845,7 +2846,7 @@ release_lock: * Returns a palloc'd string. */ static char * -generate_normalized_query(JumbleState *jstate, const char *query, +generate_normalized_query(const JumbleState *jstate, const char *query, int query_loc, int *query_len_p) { char *norm_query; @@ -2857,12 +2858,14 @@ generate_normalized_query(JumbleState *jstate, const char *query, last_off = 0, /* Offset from start for previous tok */ last_tok_len = 0; /* Length (in bytes) of that tok */ int num_constants_replaced = 0; + LocationLen *locs = NULL; /* - * Get constants' lengths (core system only gives us locations). Note - * this also ensures the items are sorted by location. + * Determine constants' lengths (core system only gives us locations), + * and return a sorted copy of jstate's LocationLen data with lengths + * filled in. */ - fill_in_constant_lengths(jstate, query, query_loc); + locs = compute_constant_lengths(jstate, query, query_loc); /* * Allow for $n symbols to be longer than the constants they replace. @@ -2888,15 +2891,15 @@ generate_normalized_query(JumbleState *jstate, const char *query, * the parameter in the next iteration (or after the loop is done), * which is a bit odd but seems to work okay in most cases. */ - if (jstate->clocations[i].extern_param && !jstate->has_squashed_lists) + if (locs[i].extern_param && !jstate->has_squashed_lists) continue; - off = jstate->clocations[i].location; + off = locs[i].location; /* Adjust recorded location if we're dealing with partial string */ off -= query_loc; - tok_len = jstate->clocations[i].length; + tok_len = locs[i].length; if (tok_len < 0) continue; /* ignore any duplicates */ @@ -2915,7 +2918,7 @@ generate_normalized_query(JumbleState *jstate, const char *query, */ n_quer_loc += sprintf(norm_query + n_quer_loc, "$%d%s", num_constants_replaced + 1 + jstate->highest_extern_param_id, - jstate->clocations[i].squashed ? " /*, ... */" : ""); + locs[i].squashed ? " /*, ... */" : ""); num_constants_replaced++; /* move forward */ @@ -2924,6 +2927,10 @@ generate_normalized_query(JumbleState *jstate, const char *query, last_tok_len = tok_len; } + /* Clean up, if needed */ + if (locs) + pfree(locs); + /* * We've copied up until the last ignorable constant. Copy over the * remaining bytes of the original query string. @@ -2942,8 +2949,9 @@ generate_normalized_query(JumbleState *jstate, const char *query, } /* - * Given a valid SQL string and an array of constant-location records, - * fill in the textual lengths of those constants. + * Given a valid SQL string and an array of constant-location records, return + * the textual lengths of those constants in a newly allocated LocationLen + * array, or NULL if there are no constants. * * The constants may use any allowed constant syntax, such as float literals, * bit-strings, single-quoted strings and dollar-quoted strings. This is @@ -2959,16 +2967,19 @@ generate_normalized_query(JumbleState *jstate, const char *query, * past the first to -1 so that they can later be ignored. * * If query_loc > 0, then "query" has been advanced by that much compared to - * the original string start, so we need to translate the provided locations - * to compensate. (This lets us avoid re-scanning statements before the one - * of interest, so it's worth doing.) + * the original string start, as is the case with multi-statement strings, so + * we need to translate the provided locations to compensate. (This lets us + * avoid re-scanning statements before the one of interest, so it's worth + * doing.) * * N.B. There is an assumption that a '-' character at a Const location begins * a negative numeric constant. This precludes there ever being another * reason for a constant to start with a '-'. + * + * It is the caller's responsibility to free the result, if necessary. */ -static void -fill_in_constant_lengths(JumbleState *jstate, const char *query, +static LocationLen * +compute_constant_lengths(const JumbleState *jstate, const char *query, int query_loc) { LocationLen *locs; @@ -2977,14 +2988,20 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, core_YYSTYPE yylval; YYLTYPE yylloc; + if (jstate->clocations_count == 0) + return NULL; + + /* Copy constant locations to avoid modifying jstate */ + locs = palloc_array(LocationLen, jstate->clocations_count); + memcpy(locs, jstate->clocations, jstate->clocations_count * sizeof(LocationLen)); + /* * Sort the records by location so that we can process them in order while * scanning the query text. */ if (jstate->clocations_count > 1) - qsort(jstate->clocations, jstate->clocations_count, + qsort(locs, jstate->clocations_count, sizeof(LocationLen), comp_location); - locs = jstate->clocations; /* initialize the flex scanner --- should match raw_parser() */ yyscanner = scanner_init(query, @@ -3008,7 +3025,11 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, if (locs[i].squashed) continue; /* squashable list, ignore */ - /* Adjust recorded location if we're dealing with partial string */ + /* + * Adjust the constant's location using the provided starting location + * of the current statement. This allows us to avoid scanning a + * multi-statement string from the beginning. + */ loc = locs[i].location - query_loc; Assert(loc >= 0); @@ -3064,6 +3085,8 @@ fill_in_constant_lengths(JumbleState *jstate, const char *query, } scanner_finish(yyscanner); + + return locs; } /* -- 2.53.0
signature.asc
Description: PGP signature
