Hi,
At Tue, 19 Sep 2017 16:55:38 -0700, Peter Geoghegan <[email protected]> wrote in
<cah2-wzn0-3zxgrp_qp1oaexy7h1w0-w_vcfo0ndv0k_+kab...@mail.gmail.com>
> On Tue, Sep 19, 2017 at 4:47 PM, Claudio Freire <[email protected]>
> wrote:
> > Maybe this is looking at the problem from the wrong direction.
> >
> > Why can't the page be added to the FSM immediately and the check be
> > done at runtime when looking for a reusable page?
> >
> > Index FSMs currently store only 0 or 255, couldn't they store 128 for
> > half-recyclable pages and make the caller re-check reusability before
> > using it?
>
> No, because it's impossible for them to know whether or not the page
> that their index scan just landed on recycled just a second ago, or
> was like this since before their xact began/snapshot was acquired.
>
> For your reference, this RecentGlobalXmin interlock stuff is what
> Lanin & Shasha call "The Drain Technique" within "2.5 Freeing Empty
> Nodes". Seems pretty hard to do it any other way.
Anyway(:p) the attached first patch is a PoC for the
cleanup-state-in-stats method works only for btree. Some
LOG-level debugging messages are put in the patch to show how it
works.
The following steps makes a not-recyclable page but I'm not sure
it is general enough, and I couldn't generate half-dead pages.
The pg_sleep() in the following steps is inserted in order to see
the updated values in stats.
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a int);
CREATE INDEX ON t1 (a);
INSERT INTO t1 (SELECT a FROM generate_series(0, 800000) a);
DELETE FROM t1 WHERE a > 416700 AND a < 417250;
VACUUM t1;
DELETE FROM t1;
VACUUM t1; -- 1 (or wait for autovacuum)
select pg_sleep(1);
VACUUM t1; -- 2 (autovacuum doesn't work)
select pg_sleep(1);
VACUUM t1; -- 3 (ditto)
The following logs are emited while the three VACUUMs are issued.
# VACUUM t1; -- 1 (or wait for autovacuum)
LOG: btvacuumscan(t1_a_idx) result: deleted = 2185, notrecyclable = 1,
hafldead = 0, no_cleanup_needed = false
LOG: Vacuum cleanup of index t1_a_idx is NOT skipped
LOG: btvacuumcleanup on index t1_a_idx is skipped since bulkdelete has run
just before.
# VACUUM t1; -- 2
LOG: Vacuum cleanup of index t1_a_idx is NOT skipped
LOG: btvacuumscan(t1_a_idx) result: deleted = 2192, notrecyclable = 0,
hafldead = 0, no_cleanup_needed = true
# VACUUM t1; -- 3
LOG: Vacuum cleanup of index t1_a_idx is skipped
VACUUM #1 leaves a unrecyclable page and requests the next cleanup.
VACUUM #2 leaves no unrecyclable page and inhibits the next cleanup.
VACUUM #3 (and ever after) no vacuum cleanup executed.
# I suppose it is a known issue that the cleanup cycles are not
# executed automatically unless new dead tuples are generated.
- Getting stats takes a very long time to fail during
initdb. Since I couldn't find the right way to cope with this,
I added a tentative function pgstat_live(), which checks that
the backend has a valid stats socket.
- The patch calls pg_stat_get_vac_cleanup_needed using
DirectFunctionCall. It might be better be wrapped.
As a byproduct, this enables us to run extra autovacuum rounds fo
r index cleanup. With the second attached, autovacuum works as
follows.
DROP TABLE IF EXISTS t1;
CREATE TABLE t1 (a int);
CREATE INDEX ON t1 (a);
INSERT INTO t1 (SELECT a FROM generate_series(0, 800000) a);
DELETE FROM t1 WHERE a > 416700 AND a < 417250;
(autovacuum on t1 runs)
> LOG: btvacuumscan(t1_a_idx) result: deleted = 0, notrecyclable = 0, hafldead
> = 0, no_cleanup_needed = true
> LOG: Vacuum cleanup of index t1_a_idx is skipped
> LOG: automatic vacuum of table "postgres.public.t1": index scans: 1
DELETE FROM t1;
(autovacuum on t1 runs)
> LOG: btvacuumscan(t1_a_idx) result: deleted = 2185, notrecyclable = 1,
> hafldead = 0, no_cleanup_needed = false
> LOG: Vacuum cleanup of index t1_a_idx is NOT skipped
> LOG: btvacuumcleanup on index t1_a_idx is skipped since bulkdelete has run
> just before.
> LOG: automatic vacuum of table "postgres.public.t1": index scans: 1
(cleanup vacuum runs for t1 in the next autovac timing)
> LOG: Vacuum cleanup of index t1_a_idx is NOT skipped
> LOG: btvacuumscan(t1_a_idx) result: deleted = 2192, notrecyclable = 0,
> hafldead = 0, no_cleanup_needed = true
> LOG: automatic vacuum of table "postgres.public.t1": index scans: 0
Any suggestions are welcome.
regards,
--
Kyotaro Horiguchi
NTT Open Source Software Center
*** a/src/backend/access/nbtree/nbtpage.c
--- b/src/backend/access/nbtree/nbtpage.c
***************
*** 1110,1116 **** _bt_pagedel(Relation rel, Buffer buf)
{
int ndeleted = 0;
BlockNumber rightsib;
! bool rightsib_empty;
Page page;
BTPageOpaque opaque;
--- 1110,1116 ----
{
int ndeleted = 0;
BlockNumber rightsib;
! bool rightsib_empty = false;
Page page;
BTPageOpaque opaque;
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 63,68 **** typedef struct
--- 63,70 ----
BlockNumber lastBlockLocked; /* highest blkno we've cleanup-locked */
BlockNumber totFreePages; /* true total # of free pages */
MemoryContext pagedelcontext;
+ uint32 pages_notrecyclable; /* # of not-yet-recyclable pages */
+ uint32 pages_halfdead; /* # of half-dead pages */
} BTVacState;
/*
***************
*** 945,950 **** btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 947,954 ----
IndexBulkDeleteResult *
btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
{
+ extern char *get_rel_name(Oid);
+
/* No-op in ANALYZE ONLY mode */
if (info->analyze_only)
return stats;
***************
*** 963,968 **** btvacuumcleanup(IndexVacuumInfo *info, IndexBulkDeleteResult *stats)
--- 967,977 ----
stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult));
btvacuumscan(info, stats, NULL, NULL, 0);
}
+ else
+ ereport(LOG,
+ (errmsg ("btvacuumcleanup on index %s is skipped since bulkdelete has run just before.",
+ get_rel_name(info->index->rd_id)),
+ errhidestmt (true)));
/* Finally, vacuum the FSM */
IndexFreeSpaceMapVacuum(info->index);
***************
*** 1004,1009 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1013,1019 ----
BlockNumber num_pages;
BlockNumber blkno;
bool needLock;
+ extern char *get_rel_name(Oid);
/*
* Reset counts that will be incremented during the scan; needed in case
***************
*** 1022,1027 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1032,1039 ----
vstate.lastBlockVacuumed = BTREE_METAPAGE; /* Initialise at first block */
vstate.lastBlockLocked = BTREE_METAPAGE;
vstate.totFreePages = 0;
+ vstate.pages_notrecyclable = 0;
+ vstate.pages_halfdead = 0;
/* Create a temporary memory context to run _bt_pagedel in */
vstate.pagedelcontext = AllocSetContextCreate(CurrentMemoryContext,
***************
*** 1111,1116 **** btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
--- 1123,1139 ----
/* update statistics */
stats->num_pages = num_pages;
stats->pages_free = vstate.totFreePages;
+
+ /* check if we need no further clenaup */
+ if (vstate.pages_notrecyclable == 0 && vstate.pages_halfdead == 0)
+ stats->no_cleanup_needed = true;
+
+ ereport(LOG,
+ (errmsg ("btvacuumscan(%s) result: deleted = %d, notrecyclable = %d, hafldead = %d, no_cleanup_needed = %s",
+ get_rel_name(rel->rd_id), stats->pages_deleted,
+ vstate.pages_notrecyclable, vstate.pages_halfdead,
+ stats->no_cleanup_needed ? "true":"false"),
+ errhidestmt(true)));
}
/*
***************
*** 1190,1195 **** restart:
--- 1213,1219 ----
{
/* Already deleted, but can't recycle yet */
stats->pages_deleted++;
+ vstate->pages_notrecyclable++;
}
else if (P_ISHALFDEAD(opaque))
{
***************
*** 1359,1364 **** restart:
--- 1383,1390 ----
/* count only this page, else may double-count parent */
if (ndel)
stats->pages_deleted++;
+ else if (P_ISHALFDEAD(opaque))
+ vstate->pages_halfdead++; /* Still half-dead */
MemoryContextSwitchTo(oldcontext);
/* pagedel released buffer, so we shouldn't */
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 56,61 ****
--- 56,62 ----
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
+ #include "utils/fmgrprotos.h"
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/pg_rusage.h"
***************
*** 129,134 **** typedef struct LVRelStats
--- 130,137 ----
int num_index_scans;
TransactionId latestRemovedXid;
bool lock_waiter_detected;
+ int num_index_stats;
+ PgStat_MsgVacuum_indstate *indstats;
} LVRelStats;
***************
*** 152,158 **** static void lazy_vacuum_index(Relation indrel,
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
static void lazy_cleanup_index(Relation indrel,
! IndexBulkDeleteResult *stats,
LVRelStats *vacrelstats);
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
--- 155,161 ----
IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
static void lazy_cleanup_index(Relation indrel,
! IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats);
static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer,
int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer);
***************
*** 342,348 **** lazy_vacuum_rel(Relation onerel, int options, VacuumParams *params,
pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
new_live_tuples,
! vacrelstats->new_dead_tuples);
pgstat_progress_end_command();
/* and log the action if appropriate */
--- 345,352 ----
pgstat_report_vacuum(RelationGetRelid(onerel),
onerel->rd_rel->relisshared,
new_live_tuples,
! vacrelstats->new_dead_tuples,
! vacrelstats->num_index_stats, vacrelstats->indstats);
pgstat_progress_end_command();
/* and log the action if appropriate */
***************
*** 496,501 **** lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
--- 500,508 ----
indstats = (IndexBulkDeleteResult **)
palloc0(nindexes * sizeof(IndexBulkDeleteResult *));
+ vacrelstats->num_index_stats = nindexes;
+ vacrelstats->indstats = (PgStat_MsgVacuum_indstate *)
+ palloc0(nindexes * MAXALIGN(sizeof(PgStat_MsgVacuum_indstate)));
nblocks = RelationGetNumberOfBlocks(onerel);
vacrelstats->rel_pages = nblocks;
***************
*** 1320,1326 **** lazy_scan_heap(Relation onerel, int options, LVRelStats *vacrelstats,
/* Do post-vacuum cleanup and statistics update for each index */
for (i = 0; i < nindexes; i++)
! lazy_cleanup_index(Irel[i], indstats[i], vacrelstats);
/* If no indexes, make log report that lazy_vacuum_heap would've made */
if (vacuumed_pages)
--- 1327,1344 ----
/* Do post-vacuum cleanup and statistics update for each index */
for (i = 0; i < nindexes; i++)
! {
! lazy_cleanup_index(Irel[i], &indstats[i], vacrelstats);
!
! /* update stats if indstats exists */
! if (indstats[i])
! {
! /* prepare to record the result to stats */
! vacrelstats->indstats[i].indexoid = Irel[i]->rd_id;
! vacrelstats->indstats[i].vac_cleanup_needed =
! !(indstats[i] && indstats[i]->no_cleanup_needed);
! }
! }
/* If no indexes, make log report that lazy_vacuum_heap would've made */
if (vacuumed_pages)
***************
*** 1622,1632 **** lazy_vacuum_index(Relation indrel,
*/
static void
lazy_cleanup_index(Relation indrel,
! IndexBulkDeleteResult *stats,
LVRelStats *vacrelstats)
{
IndexVacuumInfo ivinfo;
PGRUsage ru0;
pg_rusage_init(&ru0);
--- 1640,1652 ----
*/
static void
lazy_cleanup_index(Relation indrel,
! IndexBulkDeleteResult **stats,
LVRelStats *vacrelstats)
{
IndexVacuumInfo ivinfo;
PGRUsage ru0;
+ bool run_cleanup = true;
+ extern char *get_rel_name(Oid);
pg_rusage_init(&ru0);
***************
*** 1637,1655 **** lazy_cleanup_index(Relation indrel,
ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
ivinfo.strategy = vac_strategy;
! stats = index_vacuum_cleanup(&ivinfo, stats);
! if (!stats)
return;
/*
* Now update statistics in pg_class, but only if the index says the count
* is accurate.
*/
! if (!stats->estimated_count)
vac_update_relstats(indrel,
! stats->num_pages,
! stats->num_index_tuples,
0,
false,
InvalidTransactionId,
--- 1657,1696 ----
ivinfo.num_heap_tuples = vacrelstats->new_rel_tuples;
ivinfo.strategy = vac_strategy;
! /*
! * If lazy_vacuum_index tells me that no cleanup is required, or stats
! * tells so, skip cleanup.
! */
! if (*stats)
! {
! if ((*stats)->no_cleanup_needed)
! run_cleanup =false;
! }
! else
! run_cleanup = DatumGetBool(
! DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
! ObjectIdGetDatum(indrel->rd_id)));
! ereport(LOG,
! (errmsg ("Vacuum cleanup of index %s is %sskipped",
! get_rel_name(indrel->rd_id),
! run_cleanup ? "NOT ": ""),
! errhidestmt (true)));
!
! if (run_cleanup)
! *stats = index_vacuum_cleanup(&ivinfo, *stats);
!
! if (!*stats)
return;
/*
* Now update statistics in pg_class, but only if the index says the count
* is accurate.
*/
! if (!(*stats)->estimated_count)
vac_update_relstats(indrel,
! (*stats)->num_pages,
! (*stats)->num_index_tuples,
0,
false,
InvalidTransactionId,
***************
*** 1659,1674 **** lazy_cleanup_index(Relation indrel,
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
RelationGetRelationName(indrel),
! stats->num_index_tuples,
! stats->num_pages),
errdetail("%.0f index row versions were removed.\n"
"%u index pages have been deleted, %u are currently reusable.\n"
"%s.",
! stats->tuples_removed,
! stats->pages_deleted, stats->pages_free,
pg_rusage_show(&ru0))));
-
- pfree(stats);
}
/*
--- 1700,1713 ----
ereport(elevel,
(errmsg("index \"%s\" now contains %.0f row versions in %u pages",
RelationGetRelationName(indrel),
! (*stats)->num_index_tuples,
! (*stats)->num_pages),
errdetail("%.0f index row versions were removed.\n"
"%u index pages have been deleted, %u are currently reusable.\n"
"%s.",
! (*stats)->tuples_removed,
! (*stats)->pages_deleted, (*stats)->pages_free,
pg_rusage_show(&ru0))));
}
/*
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 1403,1423 **** pgstat_report_autovac(Oid dboid)
*/
void
pgstat_report_vacuum(Oid tableoid, bool shared,
! PgStat_Counter livetuples, PgStat_Counter deadtuples)
{
! PgStat_MsgVacuum msg;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
return;
! pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_VACUUM);
! msg.m_databaseid = shared ? InvalidOid : MyDatabaseId;
! msg.m_tableoid = tableoid;
! msg.m_autovacuum = IsAutoVacuumWorkerProcess();
! msg.m_vacuumtime = GetCurrentTimestamp();
! msg.m_live_tuples = livetuples;
! msg.m_dead_tuples = deadtuples;
! pgstat_send(&msg, sizeof(msg));
}
/* --------
--- 1403,1437 ----
*/
void
pgstat_report_vacuum(Oid tableoid, bool shared,
! PgStat_Counter livetuples, PgStat_Counter deadtuples,
! int nindstats, PgStat_MsgVacuum_indstate *stats)
{
! PgStat_MsgVacuum *msg;
! int i;
! int msgsize;
if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
return;
+ msgsize = offsetof(PgStat_MsgVacuum, m_indvacstates) +
+ MAXALIGN(sizeof(PgStat_MsgVacuum_indstate)) * nindstats;
! msg = (PgStat_MsgVacuum *) palloc(msgsize);
! pgstat_setheader(&msg->m_hdr, PGSTAT_MTYPE_VACUUM);
! msg->m_databaseid = shared ? InvalidOid : MyDatabaseId;
! msg->m_tableoid = tableoid;
! msg->m_autovacuum = IsAutoVacuumWorkerProcess();
! msg->m_vacuumtime = GetCurrentTimestamp();
! msg->m_live_tuples = livetuples;
! msg->m_dead_tuples = deadtuples;
! msg->m_n_indvac_states = nindstats;
!
! for (i = 0 ; i < nindstats ; i++)
! {
! msg->m_indvacstates[i].indexoid = stats[i].indexoid;
! msg->m_indvacstates[i].vac_cleanup_needed = stats[i].vac_cleanup_needed;
! }
!
! pgstat_send(msg, msgsize);
}
/* --------
***************
*** 1535,1541 **** pgstat_report_tempfile(size_t filesize)
pgstat_send(&msg, sizeof(msg));
}
!
/* ----------
* pgstat_ping() -
*
--- 1549,1561 ----
pgstat_send(&msg, sizeof(msg));
}
! bool
! pgstat_live(void)
! {
! if (pgStatSock == PGINVALID_SOCKET)
! return false;
! return true;
! }
/* ----------
* pgstat_ping() -
*
***************
*** 4587,4592 **** pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid tableoid, bool create)
--- 4607,4613 ----
result->analyze_count = 0;
result->autovac_analyze_timestamp = 0;
result->autovac_analyze_count = 0;
+ result->needs_vacuum_cleanup = true;
}
return result;
***************
*** 5718,5723 **** pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
--- 5739,5745 ----
tabentry->analyze_count = 0;
tabentry->autovac_analyze_timestamp = 0;
tabentry->autovac_analyze_count = 0;
+ tabentry->needs_vacuum_cleanup = true;
}
else
{
***************
*** 5963,5968 **** pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
--- 5985,5991 ----
{
PgStat_StatDBEntry *dbentry;
PgStat_StatTabEntry *tabentry;
+ int i;
/*
* Store the data in the table's hashtable entry.
***************
*** 5984,5989 **** pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len)
--- 6007,6023 ----
tabentry->vacuum_timestamp = msg->m_vacuumtime;
tabentry->vacuum_count++;
}
+
+ /* store index vacuum stats */
+ for (i = 0 ; i < msg->m_n_indvac_states ; i++)
+ {
+ PgStat_StatTabEntry *indtabentry;
+ Oid indoid = msg->m_indvacstates[i].indexoid;
+ bool vac_cleanup_needed = msg->m_indvacstates[i].vac_cleanup_needed;
+
+ indtabentry = pgstat_get_tab_entry(dbentry, indoid, true);
+ indtabentry->needs_vacuum_cleanup = vac_cleanup_needed;
+ }
}
/* ----------
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 27,32 ****
--- 27,33 ----
#include "utils/acl.h"
#include "utils/builtins.h"
#include "utils/inet.h"
+ #include "utils/syscache.h"
#include "utils/timestamp.h"
#define UINT32_ACCESS_ONCE(var) ((uint32)(*((volatile uint32 *)&(var))))
***************
*** 328,333 **** pg_stat_get_autovacuum_count(PG_FUNCTION_ARGS)
--- 329,366 ----
}
Datum
+ pg_stat_get_vac_cleanup_needed(PG_FUNCTION_ARGS)
+ {
+ Oid relid = PG_GETARG_OID(0);
+ bool result;
+ PgStat_StatTabEntry *tabentry;
+ HeapTuple reltup;
+ bool is_index = false;
+
+ if (!pgstat_live())
+ return true;
+
+ reltup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
+ if (HeapTupleIsValid(reltup))
+ {
+ if (((Form_pg_class) GETSTRUCT(reltup))->relkind == RELKIND_INDEX)
+ is_index = true;
+
+ ReleaseSysCache(reltup);
+ }
+
+ if (!is_index)
+ PG_RETURN_NULL();
+
+ if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+ result = true;
+ else
+ result = tabentry->needs_vacuum_cleanup;
+
+ PG_RETURN_BOOL(result);
+ }
+
+ Datum
pg_stat_get_analyze_count(PG_FUNCTION_ARGS)
{
Oid relid = PG_GETARG_OID(0);
*** a/src/include/access/genam.h
--- b/src/include/access/genam.h
***************
*** 77,82 **** typedef struct IndexBulkDeleteResult
--- 77,83 ----
double tuples_removed; /* # removed during vacuum operation */
BlockNumber pages_deleted; /* # unused pages in index */
BlockNumber pages_free; /* # pages available for reuse */
+ bool no_cleanup_needed; /* true if no cleanup needed */
} IndexBulkDeleteResult;
/* Typedef for callback function to determine if a tuple is bulk-deletable */
*** a/src/include/access/nbtree.h
--- b/src/include/access/nbtree.h
***************
*** 416,421 **** typedef struct BTScanOpaqueData
--- 416,422 ----
typedef BTScanOpaqueData *BTScanOpaque;
+
/*
* We use some private sk_flags bits in preprocessed scan keys. We're allowed
* to use bits 16-31 (see skey.h). The uppermost bits are copied from the
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 2873,2878 **** DATA(insert OID = 3054 ( pg_stat_get_vacuum_count PGNSP PGUID 12 1 0 0 0 f f f f
--- 2873,2880 ----
DESCR("statistics: number of manual vacuums for a table");
DATA(insert OID = 3055 ( pg_stat_get_autovacuum_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_autovacuum_count _null_ _null_ _null_ ));
DESCR("statistics: number of auto vacuums for a table");
+ DATA(insert OID = 3419 ( pg_stat_get_vac_cleanup_needed PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 16 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_vac_cleanup_needed _null_ _null_ _null_ ));
+ DESCR("statistics: whether vacuum on a relation requires cleanup");
DATA(insert OID = 3056 ( pg_stat_get_analyze_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_analyze_count _null_ _null_ _null_ ));
DESCR("statistics: number of manual analyzes for a table");
DATA(insert OID = 3057 ( pg_stat_get_autoanalyze_count PGNSP PGUID 12 1 0 0 0 f f f f t f s r 1 0 20 "26" _null_ _null_ _null_ _null_ _null_ pg_stat_get_autoanalyze_count _null_ _null_ _null_ ));
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 360,365 **** typedef struct PgStat_MsgAutovacStart
--- 360,372 ----
* after VACUUM
* ----------
*/
+ typedef struct PgStat_MsgVacuum_indstate
+ {
+ Oid indexoid;
+ bool vac_cleanup_needed;
+ } PgStat_MsgVacuum_indstate;
+
+
typedef struct PgStat_MsgVacuum
{
PgStat_MsgHdr m_hdr;
***************
*** 369,377 **** typedef struct PgStat_MsgVacuum
TimestampTz m_vacuumtime;
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
} PgStat_MsgVacuum;
-
/* ----------
* PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
* after ANALYZE
--- 376,385 ----
TimestampTz m_vacuumtime;
PgStat_Counter m_live_tuples;
PgStat_Counter m_dead_tuples;
+ int m_n_indvac_states;
+ PgStat_MsgVacuum_indstate m_indvacstates[FLEXIBLE_ARRAY_MEMBER];
} PgStat_MsgVacuum;
/* ----------
* PgStat_MsgAnalyze Sent by the backend or autovacuum daemon
* after ANALYZE
***************
*** 641,646 **** typedef struct PgStat_StatTabEntry
--- 649,656 ----
PgStat_Counter analyze_count;
TimestampTz autovac_analyze_timestamp; /* autovacuum initiated */
PgStat_Counter autovac_analyze_count;
+
+ bool needs_vacuum_cleanup; /* This index needs vac cleanup */
} PgStat_StatTabEntry;
***************
*** 1159,1166 **** extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
! PgStat_Counter livetuples, PgStat_Counter deadtuples);
! extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
--- 1169,1177 ----
extern void pgstat_report_autovac(Oid dboid);
extern void pgstat_report_vacuum(Oid tableoid, bool shared,
! PgStat_Counter livetuples, PgStat_Counter deadtuples,
! int nindstats, PgStat_MsgVacuum_indstate *states);
! extern void pgstat_report_analyze(Relation rel,
PgStat_Counter livetuples, PgStat_Counter deadtuples,
bool resetcounter);
***************
*** 1172,1177 **** extern void pgstat_bestart(void);
--- 1183,1189 ----
extern void pgstat_report_activity(BackendState state, const char *cmd_str);
extern void pgstat_report_tempfile(size_t filesize);
+ extern bool pgstat_live(void);
extern void pgstat_report_appname(const char *appname);
extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
extern const char *pgstat_get_wait_event(uint32 wait_event_info);
*** a/src/backend/postmaster/autovacuum.c
--- b/src/backend/postmaster/autovacuum.c
***************
*** 2791,2796 **** table_recheck_autovac(Oid relid, HTAB *table_toast_map,
--- 2791,2803 ----
effective_multixact_freeze_max_age,
&dovacuum, &doanalyze, &wraparound);
+ /* force vacuum if any index on the rel is requesting cleanup scan */
+ if (!dovacuum)
+ dovacuum =
+ DatumGetBool(
+ DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
+ ObjectIdGetDatum(relid)));
+
/* ignore ANALYZE for toast tables */
if (classForm->relkind == RELKIND_TOASTVALUE)
doanalyze = false;
***************
*** 3045,3050 **** relation_needs_vacanalyze(Oid relid,
--- 3052,3064 ----
/* Determine if this table needs vacuum or analyze. */
*dovacuum = force_vacuum || (vactuples > vacthresh);
*doanalyze = (anltuples > anlthresh);
+
+ /* still force vacuum if index cleanup is requested */
+ if (!*dovacuum)
+ *dovacuum =
+ DatumGetBool(
+ DirectFunctionCall1(pg_stat_get_vac_cleanup_needed,
+ ObjectIdGetDatum(relid)));
}
else
{
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 349,361 **** pg_stat_get_vac_cleanup_needed(PG_FUNCTION_ARGS)
ReleaseSysCache(reltup);
}
if (!is_index)
! PG_RETURN_NULL();
! if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
! result = true;
else
! result = tabentry->needs_vacuum_cleanup;
PG_RETURN_BOOL(result);
}
--- 349,393 ----
ReleaseSysCache(reltup);
}
+ /*
+ * If normal relaion is specified, return true if any index of the
+ * relation is explicitly requesting cleanup.
+ */
if (!is_index)
! {
! Relation indrel;
! SysScanDesc indscan;
! HeapTuple indtup;
! result = false;
! indrel = heap_open(IndexRelationId, AccessShareLock);
! indscan = systable_beginscan(indrel, InvalidOid, false, NULL, 0, NULL);
! while (HeapTupleIsValid(indtup = systable_getnext(indscan)) &&
! !result)
! {
! Form_pg_index ind = (Form_pg_index) GETSTRUCT(indtup);
!
! if (ind->indrelid != relid)
! continue;
!
! if ((tabentry = pgstat_fetch_stat_tabentry(ind->indexrelid)))
! result |= tabentry->needs_vacuum_cleanup;
! }
! systable_endscan(indscan);
! heap_close(indrel, AccessShareLock);
! }
else
! {
! /*
! * Elsewise reutrn the status of the index. As somewhat inconsistent
! * behavior with the normal relation case above, *true* is returned
! * for indexes with no stats here.
! */
! if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
! result = true;
! else
! result = tabentry->needs_vacuum_cleanup;
! }
PG_RETURN_BOOL(result);
}
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers