This is an automated email from the ASF dual-hosted git repository. chenjinbao1989 pushed a commit to branch cbdb-postgres-merge in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 9c52e158039234045302de9725141e2e6c639c60 Author: liushengsong <[email protected]> AuthorDate: Sat Mar 7 05:23:40 2026 +0800 feat: collect QE relation stats on QD to enable auto-ANALYZE for distributed tables In GPDB, DML (INSERT/UPDATE/DELETE) executes on QE segments, but autovacuum lives on the QD coordinator. Before this change the QD never received modification counts from QEs, so n_mod_since_analyze remained 0 and auto-ANALYZE never triggered for distributed tables. Add two functions: - pgstat_send_qd_tabstats() (QE side, pgstat.c): reads per-table counts from pgStatXactStack before finish_xact_command() NULLs it, serialises them as PgStatTabRecordFromQE[] and attaches the array to the libpq PGresult via extras/PGExtraTypeTableStats. - pgstat_combine_from_qe() (QD side, pgstat_relation.c): called from mppExecutorFinishup/mppExecutorWait after dispatch; iterates QE results, deserialises PgStatTabRecordFromQE arrays and merges each record into the QD's pending stats via pgstat_prep_relation_pending(), then calls pgstat_force_next_flush() to bypass rate-limiting. Using the pending path (rather than writing directly to shared memory) is critical when auto_stats triggers ANALYZE in the same command: pgstat_report_analyze() resets mod_since_analyze to 0, but pending counts are added back when pgstat_report_stat() flushes after ANALYZE. Also add pgstat_get_current_xact_stack() to read pgStatXactStack without side effects, and update the Makefile to include libpq_srcdir for libpq-int.h (PGExtraType, pg_result internals). The autovacuum-analyze isolation2 test verifies all four auto-ANALYZE scenarios: plain heap table, partition leaf tables, lock-conflict abort, and coexistence with auto_stats (on_no_stats / on_change / below threshold). Co-Authored-By: Claude Opus 4.6 <[email protected]> --- src/backend/cdb/dispatcher/cdbdisp_query.c | 6 + src/backend/executor/execUtils.c | 12 ++ src/backend/tcop/postgres.c | 8 + src/backend/utils/activity/Makefile | 3 + src/backend/utils/activity/pgstat.c | 137 +++++++++++++ src/backend/utils/activity/pgstat_relation.c | 108 ++++++++++ src/backend/utils/activity/pgstat_xact.c | 10 + src/include/pgstat.h | 21 ++ src/include/utils/pgstat_internal.h | 1 + src/test/isolation2/expected/qe_qd_pgstat.out | 278 ++++++++++++++++++++++++++ src/test/isolation2/sql/qe_qd_pgstat.sql | 136 +++++++++++++ 11 files changed, 720 insertions(+) diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c b/src/backend/cdb/dispatcher/cdbdisp_query.c index 004050f018b..c69bf54460d 100644 --- a/src/backend/cdb/dispatcher/cdbdisp_query.c +++ b/src/backend/cdb/dispatcher/cdbdisp_query.c @@ -526,6 +526,12 @@ cdbdisp_dispatchCommandInternal(DispatchCommandQueryParms *pQueryParms, ThrowErrorData(qeError); } + /* + * GPDB: Merge relation stats sent by QEs so QD's mod_since_analyze + * stays up to date for autovacuum triggering. + */ + pgstat_combine_from_qe(pr); + cdbdisp_returnResults(pr, cdb_pgresults); cdbdisp_destroyDispatcherState(ds); diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index f9d5719fbe3..5f9473c379e 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -2144,6 +2144,12 @@ void mppExecutorFinishup(QueryDesc *queryDesc) if (ProcessDispatchResult_hook) ProcessDispatchResult_hook(ds); + /* + * GPDB: Merge relation stats sent by QEs so QD's mod_since_analyze + * stays up to date for autovacuum triggering. + */ + pgstat_combine_from_qe(pr); + /* get num of rows processed from writer QEs. */ estate->es_processed += cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex); @@ -2225,6 +2231,12 @@ uint64 mppExecutorWait(QueryDesc *queryDesc) LocallyExecutingSliceIndex(queryDesc->estate), estate->showstatctx); } + /* + * GPDB: Merge relation stats sent by QEs so QD's mod_since_analyze + * stays up to date for autovacuum triggering. + */ + pgstat_combine_from_qe(pr); + /* get num of rows processed from writer QEs. */ es_processed += cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index 20be306646e..92f744e673f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -1498,6 +1498,14 @@ exec_mpp_query(const char *query_string, PortalDrop(portal, false); + /* + * GPDB: Send pending relation stats to QD before closing the + * transaction. The stats are in pgStatXactStack (transaction-level + * counts); finish_xact_command() will call AtEOXact_PgStat() which + * NULLs pgStatXactStack, so we must capture the stats first. + */ + pgstat_send_qd_tabstats(); + /* * Close down transaction statement before reporting command-complete. * This is so that any end-of-transaction errors are reported before diff --git a/src/backend/utils/activity/Makefile b/src/backend/utils/activity/Makefile index d7d7d6c6b0d..3ff4f0e5235 100644 --- a/src/backend/utils/activity/Makefile +++ b/src/backend/utils/activity/Makefile @@ -13,6 +13,9 @@ subdir = src/backend/utils/activity top_builddir = ../../../.. include $(top_builddir)/src/Makefile.global +# GPDB: needed for libpq-int.h (PGExtraType, pg_result struct) +override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS) + OBJS = \ backend_progress.o \ backend_status.o \ diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c index ab7768154c2..926800c9c09 100644 --- a/src/backend/utils/activity/pgstat.c +++ b/src/backend/utils/activity/pgstat.c @@ -96,6 +96,8 @@ #include "access/transam.h" #include "access/xact.h" #include "lib/dshash.h" +#include "libpq/pqformat.h" +#include "libpq-int.h" #include "pgstat.h" #include "port/atomics.h" #include "storage/fd.h" @@ -107,6 +109,8 @@ #include "utils/memutils.h" #include "utils/pgstat_internal.h" #include "utils/timestamp.h" +#include "catalog/gp_distribution_policy.h" +#include "cdb/cdbvars.h" /* ---------- @@ -1744,3 +1748,136 @@ assign_stats_fetch_consistency(int newval, void *extra) if (pgstat_fetch_consistency != newval) force_stats_snapshot_clear = true; } + + +/* ----------------------------------------------------------------------- + * GPDB: QE→QD pgstat collection. + * + * After a DML statement completes on QE, send the accumulated pending + * relation stats (from pgStatPending) to the QD via a 'y' protocol message. + * The QD collects these in pgstat_combine_from_qe() and merges them into + * its own pending stats, so autovacuum can see modification counts. + * ----------------------------------------------------------------------- + */ + +/* + * pgstat_send_qd_tabstats -- QE side: send relation stats to QD. + * + * Must be called only on QE (Gp_role == GP_ROLE_EXECUTE), BEFORE + * finish_xact_command(). At call time the transaction-level per-table + * counts are still in pgStatXactStack. finish_xact_command() calls + * AtEOXact_PgStat() which NULLs pgStatXactStack, so we must read the + * stats before that happens. + */ +void +pgstat_send_qd_tabstats(void) +{ + PgStat_SubXactStatus *xact_state; + StringInfoData buf; + PgStatTabRecordFromQE *records; + int nrecords = 0; + int capacity = 64; + + if (Gp_role != GP_ROLE_EXECUTE || !Gp_is_writer) + return; + + /* + * On QE inside a distributed transaction, stats for the current + * statement are in pgStatXactStack (not yet merged to pgStatPending, + * because the top-level transaction hasn't committed yet). Read the + * current nesting level's per-table insert/update/delete counts. + */ + xact_state = pgstat_get_current_xact_stack(); + + if (xact_state == NULL) + return; + + records = (PgStatTabRecordFromQE *) + palloc(capacity * sizeof(PgStatTabRecordFromQE)); + + /* + * Send only the current nesting level's per-table insert/update/delete + * counts. QD will place these into its own transactional state (trans), + * letting PG's normal AtEOXact_PgStat_Relations / AtEOSubXact_PgStat_Relations + * machinery handle delta_live_tuples, delta_dead_tuples, changed_tuples, + * and subtransaction commit/abort correctly. + */ + { + PgStat_TableXactStatus *trans; + + for (trans = xact_state->first; trans != NULL; trans = trans->next) + { + PgStat_TableStatus *tabstat = trans->parent; + PgStat_Counter ins, upd, del; + + ins = trans->tuples_inserted; + upd = trans->tuples_updated; + del = trans->tuples_deleted; + + if (ins == 0 && upd == 0 && del == 0 && !trans->truncdropped) + continue; + + /* + * Filter by distribution policy: skip catalog tables (QD has + * the same updates) and deduplicate replicated tables (only + * one segment reports, to avoid overcounting). + */ + { + GpPolicy *gppolicy = GpPolicyFetch(tabstat->id); + + switch (gppolicy->ptype) + { + case POLICYTYPE_ENTRY: + pfree(gppolicy); + continue; + case POLICYTYPE_REPLICATED: + if (GpIdentity.segindex != tabstat->id % gppolicy->numsegments) + { + pfree(gppolicy); + continue; + } + break; + case POLICYTYPE_PARTITIONED: + break; + default: + elog(ERROR, "unrecognized policy type %d", + gppolicy->ptype); + } + pfree(gppolicy); + } + + /* New entry — each table appears at most once per nesting level */ + if (nrecords >= capacity) + { + capacity *= 2; + records = (PgStatTabRecordFromQE *) + repalloc(records, capacity * sizeof(PgStatTabRecordFromQE)); + } + + records[nrecords].t_id = tabstat->id; + records[nrecords].t_shared = tabstat->shared; + records[nrecords].truncdropped = trans->truncdropped; + records[nrecords].tuples_inserted = ins; + records[nrecords].tuples_updated = upd; + records[nrecords].tuples_deleted = del; + nrecords++; + } + } + + if (nrecords == 0) + { + pfree(records); + return; + } + + pq_beginmessage(&buf, 'y'); + pq_sendstring(&buf, "PGSTAT"); + pq_sendbyte(&buf, false); /* result not complete yet */ + pq_sendint(&buf, PGExtraTypeTableStats, sizeof(PGExtraType)); + pq_sendint(&buf, nrecords * sizeof(PgStatTabRecordFromQE), sizeof(int)); + pq_sendbytes(&buf, (char *) records, nrecords * sizeof(PgStatTabRecordFromQE)); + pq_endmessage(&buf); + + pfree(records); + +} diff --git a/src/backend/utils/activity/pgstat_relation.c b/src/backend/utils/activity/pgstat_relation.c index f24091c5078..33532b0030e 100644 --- a/src/backend/utils/activity/pgstat_relation.c +++ b/src/backend/utils/activity/pgstat_relation.c @@ -20,12 +20,16 @@ #include "access/twophase_rmgr.h" #include "access/xact.h" #include "catalog/partition.h" +#include "libpq-int.h" #include "postmaster/autovacuum.h" #include "utils/memutils.h" #include "utils/pgstat_internal.h" #include "utils/rel.h" #include "utils/timestamp.h" #include "catalog/catalog.h" +#include "cdb/cdbdispatchresult.h" +#include "utils/faultinjector.h" +#include "utils/lsyscache.h" /* Record that's written to 2PC state file when pgstat state is persisted */ @@ -953,3 +957,107 @@ restore_truncdrop_counters(PgStat_TableXactStatus *trans) trans->tuples_deleted = trans->deleted_pre_truncdrop; } } + + +/* ----------------------------------------------------------------------- + * GPDB: QD side — merge relation stats received from QEs. + * + * Called from mppExecutorFinishup() / mppExecutorWait() after QEs have + * completed their work. Each QE sends a 'y' protocol message containing + * an array of PgStatTabRecordFromQE — the current nesting level's + * per-table insert/update/delete counts. + * + * We place these into the QD's own transactional state (PgStat_TableXactStatus) + * at the current nesting level. This lets PG's normal end-of-transaction + * machinery (AtEOXact_PgStat_Relations / AtEOSubXact_PgStat_Relations) handle + * delta_live_tuples, delta_dead_tuples, changed_tuples, and subtransaction + * commit/abort correctly — just as if the DML had happened locally. + * + * Because QE sends cumulative values for the current nesting level (trans + * accumulates within a level across statements), we must zero QD's trans + * before re-accumulating from all segments. On first encounter of each + * relation we zero its trans, then sum across all segments in one pass. + * ----------------------------------------------------------------------- + */ +void +pgstat_combine_from_qe(CdbDispatchResults *primaryResults) +{ + int i, + j; + int nest_level = GetCurrentTransactionNestLevel(); + List *zeroed_rels = NIL; + + if (primaryResults == NULL) + return; + + for (i = 0; i < primaryResults->resultCount; i++) + { + CdbDispatchResult *dispResult = &primaryResults->resultArray[i]; + int nres = cdbdisp_numPGresult(dispResult); + + for (j = 0; j < nres; j++) + { + PGresult *pgresult = cdbdisp_getPGresult(dispResult, j); + PgStatTabRecordFromQE *records; + int nrecords, + k; + + if (pgresult == NULL || + pgresult->extras == NULL || + pgresult->extraType != PGExtraTypeTableStats) + continue; + + records = (PgStatTabRecordFromQE *) pgresult->extras; + nrecords = pgresult->extraslen / sizeof(PgStatTabRecordFromQE); + + for (k = 0; k < nrecords; k++) + { + PgStatTabRecordFromQE *rec = &records[k]; + PgStat_TableStatus *tabstat; + PgStat_TableXactStatus *trans; + +#ifdef FAULT_INJECTOR + if (*numActiveFaults_ptr > 0) + { + char *relname = get_rel_name(rec->t_id); + if (relname) + { + FaultInjector_InjectFaultIfSet( + "gp_pgstat_report_on_master", DDLNotSpecified, + "", relname); + pfree(relname); + } + } +#endif + + tabstat = pgstat_prep_relation_pending(rec->t_id, rec->t_shared); + + /* Ensure a trans exists at current nesting level */ + if (tabstat->trans == NULL || + tabstat->trans->nest_level != nest_level) + add_tabstat_xact_level(tabstat, nest_level); + + trans = tabstat->trans; + + /* Zero on first encounter to undo previous merge */ + if (!list_member_oid(zeroed_rels, rec->t_id)) + { + trans->tuples_inserted = 0; + trans->tuples_updated = 0; + trans->tuples_deleted = 0; + trans->truncdropped = false; + zeroed_rels = lappend_oid(zeroed_rels, rec->t_id); + } + + /* Accumulate QE counts from this segment */ + trans->tuples_inserted += rec->tuples_inserted; + trans->tuples_updated += rec->tuples_updated; + trans->tuples_deleted += rec->tuples_deleted; + if (rec->truncdropped) + trans->truncdropped = true; + } + } + } + + list_free(zeroed_rels); +} diff --git a/src/backend/utils/activity/pgstat_xact.c b/src/backend/utils/activity/pgstat_xact.c index 369239d5014..110ddca3c91 100644 --- a/src/backend/utils/activity/pgstat_xact.c +++ b/src/backend/utils/activity/pgstat_xact.c @@ -253,6 +253,16 @@ pgstat_get_xact_stack_level(int nest_level) return xact_state; } +/* + * GPDB: Return the current xact stats stack without allocating new levels. + * Used by pgstat_send_qd_tabstats() to read in-progress transaction stats. + */ +PgStat_SubXactStatus * +pgstat_get_current_xact_stack(void) +{ + return pgStatXactStack; +} + /* * Get stat items that need to be dropped at commit / abort. * diff --git a/src/include/pgstat.h b/src/include/pgstat.h index c4f3b88444a..2a85773c30c 100644 --- a/src/include/pgstat.h +++ b/src/include/pgstat.h @@ -719,6 +719,27 @@ extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry_ext(bool shared, Oid reloid); extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id); +/* + * GPDB: Data structure for transmitting per-table stats from QE to QD. + * Used by pgstat_send_qd_tabstats() and pgstat_combine_from_qe(). + */ +typedef struct PgStatTabRecordFromQE +{ + Oid t_id; /* table OID */ + bool t_shared; /* is it a shared catalog? */ + bool truncdropped; /* was it truncated/dropped? */ + PgStat_Counter tuples_inserted; + PgStat_Counter tuples_updated; + PgStat_Counter tuples_deleted; +} PgStatTabRecordFromQE; + +/* GPDB: QE sends pending relation stats to QD via 'y' protocol message */ +extern void pgstat_send_qd_tabstats(void); + +/* GPDB: QD combines relation stats received from QEs after dispatch */ +struct CdbDispatchResults; +extern void pgstat_combine_from_qe(struct CdbDispatchResults *primaryResults); + /* * Functions in pgstat_replslot.c diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h index 2b3da610a31..861dd7d7184 100644 --- a/src/include/utils/pgstat_internal.h +++ b/src/include/utils/pgstat_internal.h @@ -705,6 +705,7 @@ extern void pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header, */ extern PgStat_SubXactStatus *pgstat_get_xact_stack_level(int nest_level); +extern PgStat_SubXactStatus *pgstat_get_current_xact_stack(void); extern void pgstat_drop_transactional(PgStat_Kind kind, Oid dboid, Oid objoid); extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid objoid); diff --git a/src/test/isolation2/expected/qe_qd_pgstat.out b/src/test/isolation2/expected/qe_qd_pgstat.out new file mode 100644 index 00000000000..762295c0d75 --- /dev/null +++ b/src/test/isolation2/expected/qe_qd_pgstat.out @@ -0,0 +1,278 @@ +-- Test: QE→QD pgstat collection +-- Verifies that DML stats from QE segments reach the QD coordinator's +-- pg_stat_user_tables, enabling autovacuum to see modification counts. +-- Also verifies that gp_stat_user_tables_summary remains accurate on QEs. + +-- +-- Setup: disable autovacuum and auto_stats to prevent interference. +-- +ALTER SYSTEM SET autovacuum = off; +ALTER +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) +SELECT pg_sleep(0.5); + pg_sleep +---------- + +(1 row) +SET gp_autostats_mode = none; +SET + +-- +-- Test 1: Distributed (hash) table — INSERT/UPDATE/DELETE stats reach QD +-- +CREATE TABLE test_pgstat_dist(id int, val int) DISTRIBUTED BY (id); +CREATE + +INSERT INTO test_pgstat_dist SELECT i, 0 FROM generate_series(1, 1000) i; +INSERT 1000 +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +-- QD should see the stats sent from QEs +SELECT n_tup_ins, n_mod_since_analyze FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; + n_tup_ins | n_mod_since_analyze +-----------+--------------------- + 1000 | 1000 +(1 row) + +-- QE summary should also show the same counts +SELECT n_tup_ins, n_mod_since_analyze FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + n_tup_ins | n_mod_since_analyze +-----------+--------------------- + 1000 | 1000 +(1 row) + +-- UPDATE non-distribution-key column so it's a real update, not split update +UPDATE test_pgstat_dist SET val = 1 WHERE id <= 100; +UPDATE 100 +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT n_tup_upd FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; + n_tup_upd +----------- + 100 +(1 row) +SELECT n_tup_upd FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + n_tup_upd +----------- + 100 +(1 row) + +DELETE FROM test_pgstat_dist WHERE id <= 50; +DELETE 50 +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; + n_tup_del +----------- + 50 +(1 row) +SELECT n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + n_tup_del +----------- + 50 +(1 row) + +-- +-- Test 2: Replicated table — stats not double-counted +-- With 3 segments, each segment has all 500 rows, but only one segment +-- should report stats to QD. +-- +CREATE TABLE test_pgstat_repl(id int) DISTRIBUTED REPLICATED; +CREATE + +INSERT INTO test_pgstat_repl SELECT i FROM generate_series(1, 500) i; +INSERT 500 +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +-- QD should show exactly 500, not 1500 (3 segments * 500) +SELECT n_tup_ins, n_mod_since_analyze FROM pg_stat_user_tables WHERE relname = 'test_pgstat_repl'; + n_tup_ins | n_mod_since_analyze +-----------+--------------------- + 500 | 500 +(1 row) + +-- QE summary divides replicated table stats by numsegments, so also 500 +SELECT n_tup_ins, n_mod_since_analyze FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_repl'; + n_tup_ins | n_mod_since_analyze +-----------+--------------------- + 500 | 500 +(1 row) + +-- +-- Test 3: Transaction — committed DML stats are counted +-- +CREATE TABLE test_pgstat_xact(id int) DISTRIBUTED BY (id); +CREATE + +BEGIN; +BEGIN +INSERT INTO test_pgstat_xact SELECT i FROM generate_series(1, 300) i; +INSERT 300 +DELETE FROM test_pgstat_xact WHERE id <= 100; +DELETE 100 +COMMIT; +COMMIT +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +SELECT n_tup_ins, n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_xact'; + n_tup_ins | n_tup_del +-----------+----------- + 300 | 100 +(1 row) +SELECT n_tup_ins, n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_xact'; + n_tup_ins | n_tup_del +-----------+----------- + 300 | 100 +(1 row) +SELECT count(*) FROM test_pgstat_xact; + count +------- + 200 +(1 row) + +-- +-- Test 4: Subtransaction rollback — n_tup_ins counts all attempted inserts +-- (PG counts attempted actions regardless of commit/abort) +-- +CREATE TABLE test_pgstat_subxact(id int) DISTRIBUTED BY (id); +CREATE + +BEGIN; +BEGIN +INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(1, 200) i; +INSERT 200 +SAVEPOINT sp1; +SAVEPOINT +INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(201, 700) i; +INSERT 500 +ROLLBACK TO sp1; +ROLLBACK +COMMIT; +COMMIT +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +-- n_tup_ins counts all attempted inserts (200 + 500 = 700) +-- but only 200 rows are actually in the table +SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_subxact'; + n_tup_ins +----------- + 700 +(1 row) +SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_subxact'; + n_tup_ins +----------- + 700 +(1 row) +SELECT count(*) FROM test_pgstat_subxact; + count +------- + 200 +(1 row) + +-- +-- Test 5: Nested subtransactions — RELEASE merges into parent, ROLLBACK TO discards +-- +CREATE TABLE test_pgstat_nested(id int) DISTRIBUTED BY (id); +CREATE + +BEGIN; +BEGIN +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(1, 100) i; +INSERT 100 +SAVEPOINT sp1; +SAVEPOINT +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(101, 200) i; +INSERT 100 +SAVEPOINT sp2; +SAVEPOINT +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(201, 300) i; +INSERT 100 +RELEASE SAVEPOINT sp2; +RELEASE +ROLLBACK TO sp1; +ROLLBACK +COMMIT; +COMMIT +SELECT gp_stat_force_next_flush(); + gp_stat_force_next_flush +-------------------------- + +(1 row) + +-- All 300 attempted inserts counted (100 outer + 100 sp1 + 100 sp2) +-- but only 100 rows remain (sp1 rollback discards sp1 and released sp2) +SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_nested'; + n_tup_ins +----------- + 300 +(1 row) +SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_nested'; + n_tup_ins +----------- + 300 +(1 row) +SELECT count(*) FROM test_pgstat_nested; + count +------- + 100 +(1 row) + +-- +-- Test 6: Catalog (entry) table — QE doesn't crash on catalog DML +-- Catalog tables are filtered out (POLICYTYPE_ENTRY), so the stats +-- infrastructure should simply skip them without error. +-- +CREATE FUNCTION test_pgstat_func() RETURNS void AS $$ BEGIN END; $$ LANGUAGE plpgsql; +CREATE +DROP FUNCTION test_pgstat_func(); +DROP + +-- +-- Cleanup +-- +DROP TABLE test_pgstat_dist; +DROP +DROP TABLE test_pgstat_repl; +DROP +DROP TABLE test_pgstat_xact; +DROP +DROP TABLE test_pgstat_subxact; +DROP +DROP TABLE test_pgstat_nested; +DROP + +ALTER SYSTEM RESET autovacuum; +ALTER +SELECT pg_reload_conf(); + pg_reload_conf +---------------- + t +(1 row) diff --git a/src/test/isolation2/sql/qe_qd_pgstat.sql b/src/test/isolation2/sql/qe_qd_pgstat.sql new file mode 100644 index 00000000000..1d25ed5a623 --- /dev/null +++ b/src/test/isolation2/sql/qe_qd_pgstat.sql @@ -0,0 +1,136 @@ +-- Test: QE→QD pgstat collection +-- Verifies that DML stats from QE segments reach the QD coordinator's +-- pg_stat_user_tables, enabling autovacuum to see modification counts. +-- Also verifies that gp_stat_user_tables_summary remains accurate on QEs. + +-- +-- Setup: disable autovacuum and auto_stats to prevent interference. +-- +ALTER SYSTEM SET autovacuum = off; +SELECT pg_reload_conf(); +SELECT pg_sleep(0.5); +SET gp_autostats_mode = none; + +-- +-- Test 1: Distributed (hash) table — INSERT/UPDATE/DELETE stats reach QD +-- +CREATE TABLE test_pgstat_dist(id int, val int) DISTRIBUTED BY (id); + +INSERT INTO test_pgstat_dist SELECT i, 0 FROM generate_series(1, 1000) i; +SELECT gp_stat_force_next_flush(); + +-- QD should see the stats sent from QEs +SELECT n_tup_ins, n_mod_since_analyze + FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; + +-- QE summary should also show the same counts +SELECT n_tup_ins, n_mod_since_analyze + FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + +-- UPDATE non-distribution-key column so it's a real update, not split update +UPDATE test_pgstat_dist SET val = 1 WHERE id <= 100; +SELECT gp_stat_force_next_flush(); + +SELECT n_tup_upd FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; +SELECT n_tup_upd FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + +DELETE FROM test_pgstat_dist WHERE id <= 50; +SELECT gp_stat_force_next_flush(); + +SELECT n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist'; +SELECT n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist'; + +-- +-- Test 2: Replicated table — stats not double-counted +-- With 3 segments, each segment has all 500 rows, but only one segment +-- should report stats to QD. +-- +CREATE TABLE test_pgstat_repl(id int) DISTRIBUTED REPLICATED; + +INSERT INTO test_pgstat_repl SELECT i FROM generate_series(1, 500) i; +SELECT gp_stat_force_next_flush(); + +-- QD should show exactly 500, not 1500 (3 segments * 500) +SELECT n_tup_ins, n_mod_since_analyze + FROM pg_stat_user_tables WHERE relname = 'test_pgstat_repl'; + +-- QE summary divides replicated table stats by numsegments, so also 500 +SELECT n_tup_ins, n_mod_since_analyze + FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_repl'; + +-- +-- Test 3: Transaction — committed DML stats are counted +-- +CREATE TABLE test_pgstat_xact(id int) DISTRIBUTED BY (id); + +BEGIN; +INSERT INTO test_pgstat_xact SELECT i FROM generate_series(1, 300) i; +DELETE FROM test_pgstat_xact WHERE id <= 100; +COMMIT; +SELECT gp_stat_force_next_flush(); + +SELECT n_tup_ins, n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_xact'; +SELECT n_tup_ins, n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_xact'; +SELECT count(*) FROM test_pgstat_xact; + +-- +-- Test 4: Subtransaction rollback — n_tup_ins counts all attempted inserts +-- (PG counts attempted actions regardless of commit/abort) +-- +CREATE TABLE test_pgstat_subxact(id int) DISTRIBUTED BY (id); + +BEGIN; +INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(1, 200) i; +SAVEPOINT sp1; +INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(201, 700) i; +ROLLBACK TO sp1; +COMMIT; +SELECT gp_stat_force_next_flush(); + +-- n_tup_ins counts all attempted inserts (200 + 500 = 700) +-- but only 200 rows are actually in the table +SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_subxact'; +SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_subxact'; +SELECT count(*) FROM test_pgstat_subxact; + +-- +-- Test 5: Nested subtransactions — RELEASE merges into parent, ROLLBACK TO discards +-- +CREATE TABLE test_pgstat_nested(id int) DISTRIBUTED BY (id); + +BEGIN; +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(1, 100) i; +SAVEPOINT sp1; +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(101, 200) i; +SAVEPOINT sp2; +INSERT INTO test_pgstat_nested SELECT i FROM generate_series(201, 300) i; +RELEASE SAVEPOINT sp2; +ROLLBACK TO sp1; +COMMIT; +SELECT gp_stat_force_next_flush(); + +-- All 300 attempted inserts counted (100 outer + 100 sp1 + 100 sp2) +-- but only 100 rows remain (sp1 rollback discards sp1 and released sp2) +SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_nested'; +SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_nested'; +SELECT count(*) FROM test_pgstat_nested; + +-- +-- Test 6: Catalog (entry) table — QE doesn't crash on catalog DML +-- Catalog tables are filtered out (POLICYTYPE_ENTRY), so the stats +-- infrastructure should simply skip them without error. +-- +CREATE FUNCTION test_pgstat_func() RETURNS void AS $$ BEGIN END; $$ LANGUAGE plpgsql; +DROP FUNCTION test_pgstat_func(); + +-- +-- Cleanup +-- +DROP TABLE test_pgstat_dist; +DROP TABLE test_pgstat_repl; +DROP TABLE test_pgstat_xact; +DROP TABLE test_pgstat_subxact; +DROP TABLE test_pgstat_nested; + +ALTER SYSTEM RESET autovacuum; +SELECT pg_reload_conf(); --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
