Hi hackers,

Aleksandra Bondar and I are proposing the following patch for
pg_stat_statements.


The idea.
---------

Currently, to get statistics on a specific query, you should execute

SELECT * FROM pg_stat_statements WHERE queryid = specific_queryid;

This takes a long time because the pg_stat_statements() function forms
tuples for all statistics it has first, and then they are filtered by
the WHERE clause. If we provide a function like
pg_stat_statements_by_queryid(queryid bigint), which would filter
statistics by queryid while scanning pgss_hash and return only
statistics with the specified queryid, that would be much faster.

We can also easily add filtration by userid and dbid, which would lead
us to a function like
pg_stat_statements_filtered(queryid bigint, userid Oid, dbid Oid). In
case some parameter is not specified, its default value is 0, and it
means that no filtration is needed on this parameter. Kind of like
pg_stat_statements_reset() chooses what statistics should be cleaned.
If no parameter is specified, pg_stat_statements_filtered() should
return all statistics that pg_stat_statements() would return.

This led me to the idea that we should rather extend the
pg_stat_statements() function than add a new function. The old way to
call pg_stat_statements() will produce the same results, and
specifying new parameters will produce filtered results.


The patch.
----------

The extended pg_stat_statements() function idea is implemented in the
patch attached. I can always rewrite the patch to add a new function
and leave pg_stat_statements() as it is, though, if you think it's
better to have a separate function for filtering.

We've only written the code so far and want to get your opinion on
that. If you like the idea, we'll also provide tests and docs. Any
suggestions are welcome.


Benchmarking.
-------------

We prepared a simple test case here to show performance improvement.
Download the attached script pg_stat_statements_prepare.sql and run
the following in psql.

CREATE EXTENSION pg_stat_statements;
-- Fill in pg_stat_statements statistics
\i /path/to/pgpro_stats_prepare_script.sql
-- Get random query ID
SELECT queryid AS rand_queryid
FROM pg_stat_statements
WHERE queryid IS NOT NULL
ORDER BY random()
LIMIT 1 \gset
-- Turn on time measuring
\timing
-- Get statistics in the old way
SELECT * FROM pg_stat_statements WHERE queryid = :rand_queryid;
-- Get statistics in the new way
SELECT * FROM pg_stat_statements(true, queryid => :rand_queryid);

I'm getting that the new way is at least two times faster on my
machine. I also compared the time for the old way on master with
and without the patch. I get that the difference is within standard
deviation.


Best regards,
Karina Litskevich
Postgres Professional: http://postgrespro.com/
From 1e4f15b9adbfeeef68a3810b1a325dc656549e16 Mon Sep 17 00:00:00 2001
From: Karina Litskevich <[email protected]>
Date: Wed, 3 Sep 2025 11:44:26 +0300
Subject: [PATCH v1] pg_stat_statements: add ability to filter statistics while
 sacnning pgss_hash

Author: Aleksandra Bondar
Author: Karina Litskevich
---
 .../pg_stat_statements--1.12--1.13.sql        |  3 +
 .../pg_stat_statements/pg_stat_statements.c   | 61 +++++++++++++++----
 2 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql
index 2f0eaf14ec3..613835f8c99 100644
--- a/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql
+++ b/contrib/pg_stat_statements/pg_stat_statements--1.12--1.13.sql
@@ -13,6 +13,9 @@ DROP FUNCTION pg_stat_statements(boolean);
 
 /* Now redefine */
 CREATE FUNCTION pg_stat_statements(IN showtext boolean,
+    IN userid oid DEFAULT 0,
+    IN dbid oid DEFAULT 0,
+    IN queryid bigint DEFAULT 0,
     OUT userid oid,
     OUT dbid oid,
     OUT toplevel bool,
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index 0bb0f933399..ba25a477b2c 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -148,6 +148,19 @@ typedef struct pgssHashKey
 	bool		toplevel;		/* query executed at top level */
 } pgssHashKey;
 
+/*
+ * Values to filter hashtable entries by. If a field is 0, this means no filtring
+ * is needed by this identifier.
+ */
+typedef struct pgssFilter
+{
+	Oid			userid;			/* user OID */
+	Oid			dbid;			/* database OID */
+	int64		queryid;		/* query identifier */
+} pgssFilter;
+
+#define NO_FILTERING (&(pgssFilter) {0})
+
 /*
  * The actual stats counters kept within pgssEntry.
  */
@@ -362,7 +375,8 @@ static void pgss_store(const char *query, int64 queryId,
 					   PlannedStmtOrigin planOrigin);
 static void pg_stat_statements_internal(FunctionCallInfo fcinfo,
 										pgssVersion api_version,
-										bool showtext);
+										bool showtext,
+										const pgssFilter *filter);
 static Size pgss_memsize(void);
 static pgssEntry *entry_alloc(pgssHashKey *key, Size query_offset, int query_len,
 							  int encoding, bool sticky);
@@ -1594,8 +1608,12 @@ Datum
 pg_stat_statements_1_13(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
+	Oid			userid = PG_GETARG_OID(1);
+	Oid			dbid = PG_GETARG_OID(2);
+	int64		queryid = PG_GETARG_INT64(3);
+	pgssFilter	filter = {userid, dbid, queryid};
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_13, showtext, &filter);
 
 	return (Datum) 0;
 }
@@ -1605,7 +1623,7 @@ pg_stat_statements_1_12(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_12, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1615,7 +1633,7 @@ pg_stat_statements_1_11(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_11, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1625,7 +1643,7 @@ pg_stat_statements_1_10(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_10, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1635,7 +1653,7 @@ pg_stat_statements_1_9(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_9, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1645,7 +1663,7 @@ pg_stat_statements_1_8(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_8, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1655,7 +1673,7 @@ pg_stat_statements_1_3(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_3, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1665,7 +1683,7 @@ pg_stat_statements_1_2(PG_FUNCTION_ARGS)
 {
 	bool		showtext = PG_GETARG_BOOL(0);
 
-	pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_2, showtext, NO_FILTERING);
 
 	return (Datum) 0;
 }
@@ -1678,16 +1696,34 @@ Datum
 pg_stat_statements(PG_FUNCTION_ARGS)
 {
 	/* If it's really API 1.1, we'll figure that out below */
-	pg_stat_statements_internal(fcinfo, PGSS_V1_0, true);
+	pg_stat_statements_internal(fcinfo, PGSS_V1_0, true, NO_FILTERING);
 
 	return (Datum) 0;
 }
 
+static inline bool
+entry_matches_filter(const pgssEntry *entry, const pgssFilter *filter)
+{
+	Assert(filter != NULL);
+
+	if (filter->queryid != 0 && entry->key.queryid != filter->queryid)
+		return false;
+
+	if (filter->userid != 0 && entry->key.userid != filter->userid)
+		return false;
+
+	if (filter->dbid != 0 && entry->key.dbid != filter->dbid)
+		return false;
+
+	return true;
+}
+
 /* Common code for all versions of pg_stat_statements() */
 static void
 pg_stat_statements_internal(FunctionCallInfo fcinfo,
 							pgssVersion api_version,
-							bool showtext)
+							bool showtext,
+							const pgssFilter *filter)
 {
 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
 	Oid			userid = GetUserId();
@@ -1838,6 +1874,9 @@ pg_stat_statements_internal(FunctionCallInfo fcinfo,
 		TimestampTz stats_since;
 		TimestampTz minmax_stats_since;
 
+		if (!entry_matches_filter(entry, filter))
+			continue;
+
 		memset(values, 0, sizeof(values));
 		memset(nulls, 0, sizeof(nulls));
 
-- 
2.34.1

Attachment: pg_stat_statements_prepare.sql
Description: application/sql

Reply via email to