On Sun, Oct 12, 2025 at 07:27:10PM +1300, David Rowley wrote:
> On Sat, 11 Oct 2025 at 07:43, Robert Haas <[email protected]> wrote:
>> I think this is a reasonable starting point, although I'm surprised
>> that you chose to combine the sub-scores using + rather than Max.
>
> Adding up the component scores doesn't make sense to me either. That
> means you could have 0.5 for inserted tuples, 0.5 for dead tuples and,
> say 0.1 for analyze threshold, which all add up to 1.1, but neither
> component score is high enough for auto-vacuum to have to do anything
> yet. With Max(), we'd clearly see that there's nothing to do since the
> overall score isn't >= 1.0.
In v3, I switched to Max().
> Maybe the score calculation could change when the relevant age() goes
> above vacuum_failsafe_age / vacuum_multixact_failsafe_age and start
> scaling it very aggressively beyond that. There's plenty to debate,
> but at a first cut, maybe something like the following (coded in SQL
> for ease of result viewing):
>
> select xidage as "age(relfrozenxid)",case xidage::float8 <
> current_setting('vacuum_failsafe_age')::float8 when true then xidage /
> current_setting('autovacuum_freeze_max_age')::float8 else power(xidage
> / current_setting('autovacuum_freeze_max_age')::float8,xidage::float8
> / 100_000_000) end xid_age_score from
> generate_series(0,2_000_000_000,100_000_000) xidage;
>
> which gives 1e+20 for age of 2 billion. It would take quite an
> unreasonable amount of bloat to score higher than that.
>
> I guess someone might argue that we should start taking it more
> seriously before the table's relfrozenxid age gets to
> vacuum_failsafe_age. Maybe that's true. I just don't know what. In any
> case, if a table's age gets that old, then something's probably not
> configured very well and needs attention. I did think maybe we could
> keep the addressing of auto-vacuum being configured to run too slowly
> as a separate thread.
I did something similar to this in v3, although I used the *_freeze_max_age
parameters as the point to start scaling aggressively, and I simply raised
the score to the power of 10.
I've yet to do any real testing with this stuff.
--
nathan
>From a54fce8b43134381e504cf3f7fd4a5d4fff8415e Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Fri, 10 Oct 2025 12:28:37 -0500
Subject: [PATCH v3 1/1] autovacuum scheduling improvements
---
src/backend/postmaster/autovacuum.c | 102 ++++++++++++++++++++++++----
src/tools/pgindent/typedefs.list | 1 +
2 files changed, 88 insertions(+), 15 deletions(-)
diff --git a/src/backend/postmaster/autovacuum.c
b/src/backend/postmaster/autovacuum.c
index 5084af7dfb6..075d4e87a86 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -97,6 +97,7 @@
#include "storage/procsignal.h"
#include "storage/smgr.h"
#include "tcop/tcopprot.h"
+#include "utils/float.h"
#include "utils/fmgroids.h"
#include "utils/fmgrprotos.h"
#include "utils/guc_hooks.h"
@@ -310,6 +311,12 @@ static AutoVacuumShmemStruct *AutoVacuumShmem;
static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList);
static MemoryContext DatabaseListCxt = NULL;
+typedef struct
+{
+ Oid oid;
+ double score;
+} TableToProcess;
+
/*
* Dummy pointer to persuade Valgrind that we've not leaked the array of
* avl_dbase structs. Make it global to ensure the compiler doesn't
@@ -351,7 +358,8 @@ static void relation_needs_vacanalyze(Oid relid,
AutoVacOpts *relopts,
Form_pg_class classForm,
PgStat_StatTabEntry *tabentry,
int
effective_multixact_freeze_max_age,
- bool
*dovacuum, bool *doanalyze, bool *wraparound);
+ bool
*dovacuum, bool *doanalyze, bool *wraparound,
+
double *score);
static void autovacuum_do_vac_analyze(autovac_table *tab,
BufferAccessStrategy bstrategy);
@@ -1889,6 +1897,15 @@ get_database_list(void)
return dblist;
}
+static int
+TableToProcessComparator(const ListCell *a, const ListCell *b)
+{
+ TableToProcess *t1 = (TableToProcess *) lfirst(a);
+ TableToProcess *t2 = (TableToProcess *) lfirst(b);
+
+ return float8_cmp_internal(t2->score, t1->score);
+}
+
/*
* Process a database table-by-table
*
@@ -1902,7 +1919,7 @@ do_autovacuum(void)
HeapTuple tuple;
TableScanDesc relScan;
Form_pg_database dbForm;
- List *table_oids = NIL;
+ List *tables_to_process = NIL;
List *orphan_oids = NIL;
HASHCTL ctl;
HTAB *table_toast_map;
@@ -2014,6 +2031,7 @@ do_autovacuum(void)
bool dovacuum;
bool doanalyze;
bool wraparound;
+ double score = 0.0;
if (classForm->relkind != RELKIND_RELATION &&
classForm->relkind != RELKIND_MATVIEW)
@@ -2054,11 +2072,19 @@ do_autovacuum(void)
/* Check if it needs vacuum or analyze */
relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
effective_multixact_freeze_max_age,
- &dovacuum,
&doanalyze, &wraparound);
+ &dovacuum,
&doanalyze, &wraparound,
+ &score);
- /* Relations that need work are added to table_oids */
+ /* Relations that need work are added to tables_to_process */
if (dovacuum || doanalyze)
- table_oids = lappend_oid(table_oids, relid);
+ {
+ TableToProcess *table = palloc(sizeof(TableToProcess));
+
+ table->oid = relid;
+ table->score = score;
+
+ tables_to_process = lappend(tables_to_process, table);
+ }
/*
* Remember TOAST associations for the second pass. Note: we
must do
@@ -2114,6 +2140,7 @@ do_autovacuum(void)
bool dovacuum;
bool doanalyze;
bool wraparound;
+ double score = 0.0;
/*
* We cannot safely process other backends' temp tables, so
skip 'em.
@@ -2146,11 +2173,19 @@ do_autovacuum(void)
relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
effective_multixact_freeze_max_age,
- &dovacuum,
&doanalyze, &wraparound);
+ &dovacuum,
&doanalyze, &wraparound,
+ &score);
/* ignore analyze for toast tables */
if (dovacuum)
- table_oids = lappend_oid(table_oids, relid);
+ {
+ TableToProcess *table = palloc(sizeof(TableToProcess));
+
+ table->oid = relid;
+ table->score = score;
+
+ tables_to_process = lappend(tables_to_process, table);
+ }
/* Release stuff to avoid leakage */
if (free_relopts)
@@ -2274,6 +2309,8 @@ do_autovacuum(void)
MemoryContextSwitchTo(AutovacMemCxt);
}
+ list_sort(tables_to_process, TableToProcessComparator);
+
/*
* Optionally, create a buffer access strategy object for VACUUM to use.
* We use the same BufferAccessStrategy object for all tables VACUUMed
by
@@ -2302,9 +2339,9 @@ do_autovacuum(void)
/*
* Perform operations on collected tables.
*/
- foreach(cell, table_oids)
+ foreach_ptr(TableToProcess, table, tables_to_process)
{
- Oid relid = lfirst_oid(cell);
+ Oid relid = table->oid;
HeapTuple classTup;
autovac_table *tab;
bool isshared;
@@ -2535,7 +2572,7 @@ deleted:
pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance);
}
- list_free(table_oids);
+ list_free_deep(tables_to_process);
/*
* Perform additional work items, as requested by backends.
@@ -2934,6 +2971,7 @@ recheck_relation_needs_vacanalyze(Oid relid,
bool
*wraparound)
{
PgStat_StatTabEntry *tabentry;
+ double score;
/* fetch the pgstat table entry */
tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared,
@@ -2941,7 +2979,8 @@ recheck_relation_needs_vacanalyze(Oid relid,
relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
effective_multixact_freeze_max_age,
- dovacuum, doanalyze,
wraparound);
+ dovacuum, doanalyze,
wraparound,
+ &score);
/* Release tabentry to avoid leakage */
if (tabentry)
@@ -3000,7 +3039,8 @@ relation_needs_vacanalyze(Oid relid,
/* output params below */
bool *dovacuum,
bool *doanalyze,
- bool *wraparound)
+ bool *wraparound,
+ double *score)
{
bool force_vacuum;
bool av_enabled;
@@ -3029,7 +3069,10 @@ relation_needs_vacanalyze(Oid relid,
int multixact_freeze_max_age;
TransactionId xidForceLimit;
TransactionId relfrozenxid;
+ TransactionId relminmxid;
MultiXactId multiForceLimit;
+ double xid_score;
+ double mxid_score;
Assert(classForm != NULL);
Assert(OidIsValid(relid));
@@ -3081,17 +3124,17 @@ relation_needs_vacanalyze(Oid relid,
av_enabled = (relopts ? relopts->enabled : true);
+ relfrozenxid = classForm->relfrozenxid;
+ relminmxid = classForm->relminmxid;
+
/* Force vacuum if table is at risk of wraparound */
xidForceLimit = recentXid - freeze_max_age;
if (xidForceLimit < FirstNormalTransactionId)
xidForceLimit -= FirstNormalTransactionId;
- relfrozenxid = classForm->relfrozenxid;
force_vacuum = (TransactionIdIsNormal(relfrozenxid) &&
TransactionIdPrecedes(relfrozenxid,
xidForceLimit));
if (!force_vacuum)
{
- MultiXactId relminmxid = classForm->relminmxid;
-
multiForceLimit = recentMulti - multixact_freeze_max_age;
if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId;
@@ -3100,6 +3143,23 @@ relation_needs_vacanalyze(Oid relid,
}
*wraparound = force_vacuum;
+ /*
+ * To calculate the (M)XID age portion of the score, divide the age by
its
+ * respective *_freeze_max_age parameter.
+ */
+ xid_score = (double) relfrozenxid / freeze_max_age;
+ mxid_score = (double) relminmxid / multixact_freeze_max_age;
+ *score = Max(xid_score, mxid_score);
+
+ /*
+ * To ensure tables are given increased priority once they surpass their
+ * *_freeze_max_age parameters, we raise it to the power of 10. This
+ * ensures that the age becomes the main priority the closer we are to
+ * wraparound.
+ */
+ if (*score > 1.0)
+ *score = pow(*score, 10.0);
+
/* User disabled it in pg_class.reloptions? (But ignore if at risk) */
if (!av_enabled && !force_vacuum)
{
@@ -3173,6 +3233,18 @@ relation_needs_vacanalyze(Oid relid,
*dovacuum = force_vacuum || (vactuples > vacthresh) ||
(vac_ins_base_thresh >= 0 && instuples > vacinsthresh);
*doanalyze = (anltuples > anlthresh);
+
+ /*
+ * Update the score, being careful to avoid division by zero
and to
+ * skip cases where auto-analyze does not apply, i.e.,
pg_statistic
+ * and TOAST tables.
+ */
+ *score = Max(*score, (double) vactuples / Max(vacthresh, 1));
+ if (relid != StatisticRelationId &&
+ classForm->relkind != RELKIND_TOASTVALUE)
+ *score = Max(*score, (double) anltuples /
Max(anlthresh, 1));
+ if (vac_ins_base_thresh >= 0)
+ *score = Max(*score, (double) instuples /
Max(vacinsthresh, 1));
}
else
{
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 377a7946585..3389cf5ebae 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3007,6 +3007,7 @@ TableScanDesc
TableScanDescData
TableSpaceCacheEntry
TableSpaceOpts
+TableToProcess
TablespaceList
TablespaceListCell
TapeBlockTrailer
--
2.39.5 (Apple Git-154)