Hi! Thank you for the work with this subject.
I looked at your patch and noticed that this part of the code is
repeated several times:
if (nesting_level == 0)
{
if (!IsParallelWorker())
current_query_sampled =
pg_prng_double(&pg_global_prng_state) < pgss_sample_rate;
else
current_query_sampled = false;
}
I think you should put this in a function like
update_current_query_sampled. I've attached a diff file with the changes.
--
Regards,
Alena Rybakina
Postgres Professional
diff --git a/contrib/pg_stat_statements/expected/select.out b/contrib/pg_stat_statements/expected/select.out
index 37a30af034a..558d93fb468 100644
--- a/contrib/pg_stat_statements/expected/select.out
+++ b/contrib/pg_stat_statements/expected/select.out
@@ -153,6 +153,82 @@ SELECT pg_stat_statements_reset() IS NOT NULL AS t;
t
(1 row)
+--
+-- sample statements
+--
+SET pg_stat_statements.sample_rate = 0.0;
+SELECT 1 AS "int";
+ int
+-----
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+----------------------------------------------------+-------
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+(1 row)
+
+SET pg_stat_statements.sample_rate = 1.0;
+SELECT 1 AS "int";
+ int
+-----
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT $1 AS "int" | 1
+ SELECT pg_stat_statements_reset() IS NOT NULL AS t | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(3 rows)
+
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+set pg_stat_statements.sample_rate = 0.0;
+select pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
+SELECT 1 \parse stmt
+\bind_named stmt \g
+ ?column?
+----------
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+-------+-------
+(0 rows)
+
+set pg_stat_statements.sample_rate = 1.0;
+\bind_named stmt \g
+ ?column?
+----------
+ 1
+(1 row)
+
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+ query | calls
+------------------------------------------------------------------------+-------
+ SELECT $1 | 1
+ SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C" | 0
+(2 rows)
+
+select pg_stat_statements_reset() IS NOT NULL AS t;
+ t
+---
+ t
+(1 row)
+
--
-- queries with locking clauses
--
diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
index bebf8134eb0..73795458364 100644
--- a/contrib/pg_stat_statements/pg_stat_statements.c
+++ b/contrib/pg_stat_statements/pg_stat_statements.c
@@ -50,6 +50,7 @@
#include "access/parallel.h"
#include "catalog/pg_authid.h"
#include "common/int.h"
+#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "funcapi.h"
#include "jit/jit.h"
@@ -294,6 +295,10 @@ static bool pgss_track_utility = true; /* whether to track utility commands */
static bool pgss_track_planning = false; /* whether to track planning
* duration */
static bool pgss_save = true; /* whether to save stats across shutdown */
+static double pgss_sample_rate = 1;
+
+/* Is the current top-level query to be sampled? */
+static bool current_query_sampled = false;
#define pgss_enabled(level) \
@@ -414,6 +419,19 @@ _PG_init(void)
NULL,
NULL);
+ DefineCustomRealVariable("pg_stat_statements.sample_rate",
+ "Fraction of queries to track.",
+ NULL,
+ &pgss_sample_rate,
+ 1.0,
+ 0.0,
+ 1.0,
+ PGC_SUSET,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
DefineCustomEnumVariable("pg_stat_statements.track",
"Selects which statements are tracked by pg_stat_statements.",
NULL,
@@ -877,6 +895,18 @@ pgss_post_parse_analyze(ParseState *pstate, Query *query, JumbleState *jstate)
0);
}
+static void
+update_current_query_sampled()
+{
+ if (nesting_level == 0)
+ {
+ if (!IsParallelWorker())
+ current_query_sampled = pg_prng_double(&pg_global_prng_state) < pgss_sample_rate;
+ else
+ current_query_sampled = false;
+
+ }
+}
/*
* Planner hook: forward to regular planner, but measure planning time
* if needed.
@@ -889,12 +919,15 @@ pgss_planner(Query *parse,
{
PlannedStmt *result;
+ if (nesting_level == 0)
+ update_current_query_sampled();
+
/*
* We can't process the query if no query_string is provided, as
* pgss_store needs it. We also ignore query without queryid, as it would
* be treated as a utility statement, which may not be the case.
*/
- if (pgss_enabled(nesting_level)
+ if (pgss_enabled(nesting_level) && current_query_sampled
&& pgss_track_planning && query_string
&& parse->queryId != UINT64CONST(0))
{
@@ -994,12 +1027,16 @@ pgss_ExecutorStart(QueryDesc *queryDesc, int eflags)
else
standard_ExecutorStart(queryDesc, eflags);
+ if (nesting_level == 0)
+ update_current_query_sampled();
+
/*
* If query has queryId zero, don't track it. This prevents double
* counting of optimizable statements that are directly contained in
* utility statements.
*/
- if (pgss_enabled(nesting_level) && queryDesc->plannedstmt->queryId != UINT64CONST(0))
+ if (pgss_enabled(nesting_level) && current_query_sampled &&
+ queryDesc->plannedstmt->queryId != UINT64CONST(0))
{
/*
* Set up to track total elapsed time in ExecutorRun. Make sure the
@@ -1111,7 +1148,13 @@ pgss_ProcessUtility(PlannedStmt *pstmt, const char *queryString,
uint64 saved_queryId = pstmt->queryId;
int saved_stmt_location = pstmt->stmt_location;
int saved_stmt_len = pstmt->stmt_len;
- bool enabled = pgss_track_utility && pgss_enabled(nesting_level);
+ bool enabled;
+
+ if (nesting_level == 0)
+ update_current_query_sampled();
+
+ /* current_query_sampled is in pgss_enabled */
+ enabled = pgss_track_utility && pgss_enabled(nesting_level) && current_query_sampled;
/*
* Force utility statements to get queryId zero. We do this even in cases
diff --git a/contrib/pg_stat_statements/sql/select.sql b/contrib/pg_stat_statements/sql/select.sql
index e0be58d5e24..21f09ef94a6 100644
--- a/contrib/pg_stat_statements/sql/select.sql
+++ b/contrib/pg_stat_statements/sql/select.sql
@@ -59,6 +59,27 @@ DEALLOCATE pgss_test;
SELECT calls, rows, query FROM pg_stat_statements ORDER BY query COLLATE "C";
SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+--
+-- sample statements
+--
+SET pg_stat_statements.sample_rate = 0.0;
+SELECT 1 AS "int";
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+SET pg_stat_statements.sample_rate = 1.0;
+SELECT 1 AS "int";
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+SELECT pg_stat_statements_reset() IS NOT NULL AS t;
+
+set pg_stat_statements.sample_rate = 0.0;
+select pg_stat_statements_reset() IS NOT NULL AS t;
+SELECT 1 \parse stmt
+\bind_named stmt \g
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+set pg_stat_statements.sample_rate = 1.0;
+\bind_named stmt \g
+SELECT query, calls FROM pg_stat_statements ORDER BY query COLLATE "C";
+select pg_stat_statements_reset() IS NOT NULL AS t;
+
--
-- queries with locking clauses
--