Hi hackers, Aleksandra Bondar and I are proposing the following patch for pg_stat_statements.
The idea. --------- Currently, to get statistics on a specific query, you should execute SELECT * FROM pg_stat_statements WHERE queryid = specific_queryid; This takes a long time because the pg_stat_statements() function forms tuples for all statistics it has first, and then they are filtered by the WHERE clause. If we provide a function like pg_stat_statements_by_queryid(queryid bigint), which would filter statistics by queryid while scanning pgss_hash and return only statistics with the specified queryid, that would be much faster. We can also easily add filtration by userid and dbid, which would lead us to a function like pg_stat_statements_filtered(queryid bigint, userid Oid, dbid Oid). In case some parameter is not specified, its default value is 0, and it means that no filtration is needed on this parameter. Kind of like pg_stat_statements_reset() chooses what statistics should be cleaned. If no parameter is specified, pg_stat_statements_filtered() should return all statistics that pg_stat_statements() would return. This led me to the idea that we should rather extend the pg_stat_statements() function than add a new function. The old way to call pg_stat_statements() will produce the same results, and specifying new parameters will produce filtered results. The patch. ---------- The extended pg_stat_statements() function idea is implemented in the patch attached. I can always rewrite the patch to add a new function and leave pg_stat_statements() as it is, though, if you think it's better to have a separate function for filtering. We've only written the code so far and want to get your opinion on that. If you like the idea, we'll also provide tests and docs. Any suggestions are welcome. Benchmarking. ------------- We prepared a simple test case here to show performance improvement. Download the attached script pg_stat_statements_prepare.sql and run the following in psql. CREATE EXTENSION pg_stat_statements; -- Fill in pg_stat_statements statistics \i /path/to/pgpro_stats_prepare_script.sql -- Get random query ID SELECT queryid AS rand_queryid FROM pg_stat_statements WHERE queryid IS NOT NULL ORDER BY random() LIMIT 1 \gset -- Turn on time measuring \timing -- Get statistics in the old way SELECT * FROM pg_stat_statements WHERE queryid = :rand_queryid; -- Get statistics in the new way SELECT * FROM pg_stat_statements(true, queryid => :rand_queryid); I'm getting that the new way is at least two times faster on my machine. I also compared the time for the old way on master with and without the patch. I get that the difference is within standard deviation. Best regards, Karina Litskevich Postgres Professional: http://postgrespro.com/
From 1e4f15b9adbfeeef68a3810b1a325dc656549e16 Mon Sep 17 00:00:00 2001 From: Karina Litskevich <[email protected]> Date: Wed, 3 Sep 2025 11:44:26 +0300 Subject: [PATCH v1] pg_stat_statements: add ability to filter statistics while sacnning pgss_hash Author: Aleksandra Bondar Author: Karina Litskevich --- .../pg_stat_statements--1.12--1.13.sql | 3 + .../pg_stat_statements/pg_stat_statements.c | 61 +++++++++++++++---- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql index 2f0eaf14ec3..613835f8c99 100644 --- a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql +++ b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql @@ -13,6 +13,9 @@ DROP FUNCTION pg_stat_statements(boolean); /* Now redefine */ CREATE FUNCTION pg_stat_statements(IN showtext boolean, + IN userid oid DEFAULT 0, + IN dbid oid DEFAULT 0, + IN queryid bigint DEFAULT 0, OUT userid oid, OUT dbid oid, OUT toplevel bool, diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c index 0bb0f933399..ba25a477b2c 100644 --- a/contrib/pg_stat_statements/pg_stat_statements.c +++ b/contrib/pg_stat_statements/pg_stat_statements.c @@ -148,6 +148,19 @@ typedef struct pgssHashKey bool toplevel; /* query executed at top level */ } pgssHashKey; +/* + * Values to filter hashtable entries by. If a field is 0, this means no filtring + * is needed by this identifier. + */ +typedef struct pgssFilter +{ + Oid userid; /* user OID */ + Oid dbid; /* database OID */ + int64 queryid; /* query identifier */ +} pgssFilter; + +#define NO_FILTERING (&(pgssFilter) {0}) + /* * The actual stats counters kept within pgssEntry. */ @@ -362,7 +375,8 @@ static void pgss_store(const char *query, int64 queryId, PlannedStmtOrigin planOrigin); static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, - bool showtext); + bool showtext, + const pgssFilter *filter); static Size pgss_memsize(void); static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len, int encoding, bool sticky); @@ -1594,8 +1608,12 @@ Datum pg_stat_statements_1_13(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); + Oid userid = PG_GETARG_OID(1); + Oid dbid = PG_GETARG_OID(2); + int64 queryid = PG_GETARG_INT64(3); + pgssFilter filter = {userid, dbid, queryid}; - pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext, &filter); return (Datum) 0; } @@ -1605,7 +1623,7 @@ pg_stat_statements_1_12(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext, NO_FILTERING); return (Datum) 0; } @@ -1615,7 +1633,7 @@ pg_stat_statements_1_11(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext, NO_FILTERING); return (Datum) 0; } @@ -1625,7 +1643,7 @@ pg_stat_statements_1_10(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext, NO_FILTERING); return (Datum) 0; } @@ -1635,7 +1653,7 @@ pg_stat_statements_1_9(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext, NO_FILTERING); return (Datum) 0; } @@ -1645,7 +1663,7 @@ pg_stat_statements_1_8(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext, NO_FILTERING); return (Datum) 0; } @@ -1655,7 +1673,7 @@ pg_stat_statements_1_3(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext, NO_FILTERING); return (Datum) 0; } @@ -1665,7 +1683,7 @@ pg_stat_statements_1_2(PG_FUNCTION_ARGS) { bool showtext = PG_GETARG_BOOL(0); - pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext); + pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext, NO_FILTERING); return (Datum) 0; } @@ -1678,16 +1696,34 @@ Datum pg_stat_statements(PG_FUNCTION_ARGS) { /* If it's really API 1.1, we'll figure that out below */ - pg_stat_statements_internal(fcinfo, PGSS_V1_0, true); + pg_stat_statements_internal(fcinfo, PGSS_V1_0, true, NO_FILTERING); return (Datum) 0; } +static inline bool +entry_matches_filter(const pgssEntry *entry, const pgssFilter *filter) +{ + Assert(filter != NULL); + + if (filter->queryid != 0 && entry->key.queryid != filter->queryid) + return false; + + if (filter->userid != 0 && entry->key.userid != filter->userid) + return false; + + if (filter->dbid != 0 && entry->key.dbid != filter->dbid) + return false; + + return true; +} + /* Common code for all versions of pg_stat_statements() */ static void pg_stat_statements_internal(FunctionCallInfo fcinfo, pgssVersion api_version, - bool showtext) + bool showtext, + const pgssFilter *filter) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; Oid userid = GetUserId(); @@ -1838,6 +1874,9 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo, TimestampTz stats_since; TimestampTz minmax_stats_since; + if (!entry_matches_filter(entry, filter)) + continue; + memset(values, 0, sizeof(values)); memset(nulls, 0, sizeof(nulls)); -- 2.34.1
pg_stat_statements_prepare.sql
Description: application/sql
