Still catching up on the latest discussion, but here's a v8 patch that
amends the DEBUG3 in relation_needs_vacanalyze() to also log the score.  I
might attempt to add some sort of brief documentation about autovacuum
prioritization next.

>From skimming the latest discussion, I gather we might want to consider
re-sorting the list periodically.  Is the idea that we'll re-sort the
remaining tables in the list, or that we'll basically restart
do_autovacuum()?  If it's the latter, then we'll need to come up with some
way to decide when to stop for the current database.  Right now, we just go
through pg_class and call it a day.

-- 
nathan
>From fdb05afb9445dd00f4cad9071ef51af4f22854b0 Mon Sep 17 00:00:00 2001
From: Nathan Bossart <[email protected]>
Date: Fri, 10 Oct 2025 12:28:37 -0500
Subject: [PATCH v8 1/1] autovacuum scheduling improvements

---
 src/backend/postmaster/autovacuum.c | 211 +++++++++++++++++++++-------
 src/tools/pgindent/typedefs.list    |   1 +
 2 files changed, 165 insertions(+), 47 deletions(-)

diff --git a/src/backend/postmaster/autovacuum.c 
b/src/backend/postmaster/autovacuum.c
index ed19c74bb19..326055f962f 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -310,6 +310,12 @@ static AutoVacuumShmemStruct *AutoVacuumShmem;
 static dlist_head DatabaseList = DLIST_STATIC_INIT(DatabaseList);
 static MemoryContext DatabaseListCxt = NULL;
 
+typedef struct
+{
+       Oid                     oid;
+       double          score;
+} TableToProcess;
+
 /*
  * Dummy pointer to persuade Valgrind that we've not leaked the array of
  * avl_dbase structs.  Make it global to ensure the compiler doesn't
@@ -351,7 +357,8 @@ static void relation_needs_vacanalyze(Oid relid, 
AutoVacOpts *relopts,
                                                                          
Form_pg_class classForm,
                                                                          
PgStat_StatTabEntry *tabentry,
                                                                          int 
effective_multixact_freeze_max_age,
-                                                                         bool 
*dovacuum, bool *doanalyze, bool *wraparound);
+                                                                         bool 
*dovacuum, bool *doanalyze, bool *wraparound,
+                                                                         
double *score);
 
 static void autovacuum_do_vac_analyze(autovac_table *tab,
                                                                          
BufferAccessStrategy bstrategy);
@@ -1889,6 +1896,15 @@ get_database_list(void)
        return dblist;
 }
 
+static int
+TableToProcessComparator(const ListCell *a, const ListCell *b)
+{
+       TableToProcess *t1 = (TableToProcess *) lfirst(a);
+       TableToProcess *t2 = (TableToProcess *) lfirst(b);
+
+       return (t2->score < t1->score) ? -1 : (t2->score > t1->score) ? 1 : 0;
+}
+
 /*
  * Process a database table-by-table
  *
@@ -1902,7 +1918,7 @@ do_autovacuum(void)
        HeapTuple       tuple;
        TableScanDesc relScan;
        Form_pg_database dbForm;
-       List       *table_oids = NIL;
+       List       *tables_to_process = NIL;
        List       *orphan_oids = NIL;
        HASHCTL         ctl;
        HTAB       *table_toast_map;
@@ -2014,6 +2030,7 @@ do_autovacuum(void)
                bool            dovacuum;
                bool            doanalyze;
                bool            wraparound;
+               double          score = 0.0;
 
                if (classForm->relkind != RELKIND_RELATION &&
                        classForm->relkind != RELKIND_MATVIEW)
@@ -2054,11 +2071,19 @@ do_autovacuum(void)
                /* Check if it needs vacuum or analyze */
                relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
                                                                  
effective_multixact_freeze_max_age,
-                                                                 &dovacuum, 
&doanalyze, &wraparound);
+                                                                 &dovacuum, 
&doanalyze, &wraparound,
+                                                                 &score);
 
-               /* Relations that need work are added to table_oids */
+               /* Relations that need work are added to tables_to_process */
                if (dovacuum || doanalyze)
-                       table_oids = lappend_oid(table_oids, relid);
+               {
+                       TableToProcess *table = palloc(sizeof(TableToProcess));
+
+                       table->oid = relid;
+                       table->score = score;
+
+                       tables_to_process = lappend(tables_to_process, table);
+               }
 
                /*
                 * Remember TOAST associations for the second pass.  Note: we 
must do
@@ -2114,6 +2139,7 @@ do_autovacuum(void)
                bool            dovacuum;
                bool            doanalyze;
                bool            wraparound;
+               double          score = 0.0;
 
                /*
                 * We cannot safely process other backends' temp tables, so 
skip 'em.
@@ -2146,11 +2172,19 @@ do_autovacuum(void)
 
                relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
                                                                  
effective_multixact_freeze_max_age,
-                                                                 &dovacuum, 
&doanalyze, &wraparound);
+                                                                 &dovacuum, 
&doanalyze, &wraparound,
+                                                                 &score);
 
                /* ignore analyze for toast tables */
                if (dovacuum)
-                       table_oids = lappend_oid(table_oids, relid);
+               {
+                       TableToProcess *table = palloc(sizeof(TableToProcess));
+
+                       table->oid = relid;
+                       table->score = score;
+
+                       tables_to_process = lappend(tables_to_process, table);
+               }
 
                /* Release stuff to avoid leakage */
                if (free_relopts)
@@ -2274,6 +2308,8 @@ do_autovacuum(void)
                MemoryContextSwitchTo(AutovacMemCxt);
        }
 
+       list_sort(tables_to_process, TableToProcessComparator);
+
        /*
         * Optionally, create a buffer access strategy object for VACUUM to use.
         * We use the same BufferAccessStrategy object for all tables VACUUMed 
by
@@ -2302,9 +2338,9 @@ do_autovacuum(void)
        /*
         * Perform operations on collected tables.
         */
-       foreach(cell, table_oids)
+       foreach_ptr(TableToProcess, table, tables_to_process)
        {
-               Oid                     relid = lfirst_oid(cell);
+               Oid                     relid = table->oid;
                HeapTuple       classTup;
                autovac_table *tab;
                bool            isshared;
@@ -2535,7 +2571,7 @@ deleted:
                pg_atomic_test_set_flag(&MyWorkerInfo->wi_dobalance);
        }
 
-       list_free(table_oids);
+       list_free_deep(tables_to_process);
 
        /*
         * Perform additional work items, as requested by backends.
@@ -2937,6 +2973,7 @@ recheck_relation_needs_vacanalyze(Oid relid,
                                                                  bool 
*wraparound)
 {
        PgStat_StatTabEntry *tabentry;
+       double          score;
 
        /* fetch the pgstat table entry */
        tabentry = pgstat_fetch_stat_tabentry_ext(classForm->relisshared,
@@ -2944,15 +2981,12 @@ recheck_relation_needs_vacanalyze(Oid relid,
 
        relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
                                                          
effective_multixact_freeze_max_age,
-                                                         dovacuum, doanalyze, 
wraparound);
+                                                         dovacuum, doanalyze, 
wraparound,
+                                                         &score);
 
        /* Release tabentry to avoid leakage */
        if (tabentry)
                pfree(tabentry);
-
-       /* ignore ANALYZE for toast tables */
-       if (classForm->relkind == RELKIND_TOASTVALUE)
-               *doanalyze = false;
 }
 
 /*
@@ -2993,6 +3027,32 @@ recheck_relation_needs_vacanalyze(Oid relid,
  * autovacuum_vacuum_threshold GUC variable.  Similarly, a vac_scale_factor
  * value < 0 is substituted with the value of
  * autovacuum_vacuum_scale_factor GUC variable.  Ditto for analyze.
+ *
+ * This function also returns a score that can be used to sort the list of
+ * tables to process.  The idea is to have autovacuum prioritize tables that
+ * are furthest beyond their thresholds (e.g., a table nearing transaction ID
+ * wraparound should be vacuumed first).  This prioritization scheme is
+ * certainly far from perfect; there are simply too many possibilities for any
+ * scoring technique to work across all workloads, and the situation might
+ * change significantly between the time we calculate the score and the time
+ * that autovacuum gets to processing it.  However, we have attempted to
+ * develop something that is expected to work for a large portion of workloads
+ * with reasonable parameter settings.
+ *
+ * The score is calculated as the maximum of the ratios of each of the table's
+ * relevant values to its threshold.  For example, if the number of inserted
+ * tuples is 100, and the insert threshold for the table is 80, the insert
+ * score is 1.25.  If all other scores are below that value, the returned score
+ * will be 1.25.  The other criteria considered for the score are the table
+ * ages (both relfrozenxid and relminmxid) compared to the corresponding
+ * freeze-max-age setting, the number of updated/deleted tuples compared to the
+ * vacuum threshold, and the number of inserted/updated/deleted tuples compared
+ * to the analyze threshold.
+ *
+ * One exception to the previous paragraph is for tables nearing wraparound,
+ * i.e., those that have surpassed the effective failsafe ages.  In that case,
+ * the relfrozen/relminmxid-based score is scaled aggressively so that the
+ * table has a decent chance of sorting to the top of the list.
  */
 static void
 relation_needs_vacanalyze(Oid relid,
@@ -3003,7 +3063,8 @@ relation_needs_vacanalyze(Oid relid,
  /* output params below */
                                                  bool *dovacuum,
                                                  bool *doanalyze,
-                                                 bool *wraparound)
+                                                 bool *wraparound,
+                                                 double *score)
 {
        bool            force_vacuum;
        bool            av_enabled;
@@ -3032,11 +3093,16 @@ relation_needs_vacanalyze(Oid relid,
        int                     multixact_freeze_max_age;
        TransactionId xidForceLimit;
        TransactionId relfrozenxid;
+       TransactionId relminmxid;
        MultiXactId multiForceLimit;
 
        Assert(classForm != NULL);
        Assert(OidIsValid(relid));
 
+       /* initialize variables that aren't guaranteed to be set below */
+       *score = 0.0;
+       *doanalyze = false;
+
        /*
         * Determine vacuum/analyze equation parameters.  We have two possible
         * sources: the passed reloptions (which could be a main table or a 
toast
@@ -3084,32 +3150,73 @@ relation_needs_vacanalyze(Oid relid,
 
        av_enabled = (relopts ? relopts->enabled : true);
 
+       relfrozenxid = classForm->relfrozenxid;
+       relminmxid = classForm->relminmxid;
+
        /* Force vacuum if table is at risk of wraparound */
        xidForceLimit = recentXid - freeze_max_age;
        if (xidForceLimit < FirstNormalTransactionId)
                xidForceLimit -= FirstNormalTransactionId;
-       relfrozenxid = classForm->relfrozenxid;
        force_vacuum = (TransactionIdIsNormal(relfrozenxid) &&
                                        TransactionIdPrecedes(relfrozenxid, 
xidForceLimit));
        if (!force_vacuum)
        {
-               MultiXactId relminmxid = classForm->relminmxid;
-
                multiForceLimit = recentMulti - multixact_freeze_max_age;
                if (multiForceLimit < FirstMultiXactId)
                        multiForceLimit -= FirstMultiXactId;
                force_vacuum = MultiXactIdIsValid(relminmxid) &&
                        MultiXactIdPrecedes(relminmxid, multiForceLimit);
        }
-       *wraparound = force_vacuum;
+       *wraparound = *dovacuum = force_vacuum;
+
+       /* Update the score. */
+       if (force_vacuum)
+       {
+               Oid                     xid_age;
+               Oid                     mxid_age;
+               double          xid_score;
+               double          mxid_score;
+               int                     effective_xid_failsafe_age;
+               int                     effective_mxid_failsafe_age;
+
+               /*
+                * To calculate the (M)XID age portion of the score, divide the 
age by
+                * its respective *_freeze_max_age parameter.
+                */
+               xid_age = TransactionIdIsNormal(relfrozenxid) ? recentXid - 
relfrozenxid : 0;
+               mxid_age = MultiXactIdIsValid(relminmxid) ? recentMulti - 
relminmxid : 0;
+
+               xid_score = (double) xid_age / freeze_max_age;
+               mxid_score = (double) mxid_age / multixact_freeze_max_age;
+
+               /*
+                * To ensure tables are given increased priority once they begin
+                * approaching wraparound, we scale the score aggressively if 
the ages
+                * surpass vacuum_failsafe_age or vacuum_multixact_failsafe_age.
+                *
+                * As in vacuum_xid_failsafe_check(), the effective failsafe 
age is no
+                * less than 105% the value of the respective *_freeze_max_age
+                * parameter.  Note that per-table settings could result in a 
low
+                * score even if the table surpasses the failsafe settings.  
However,
+                * this is a strange enough corner case that we don't bother 
trying to
+                * handle it.
+                */
+               effective_xid_failsafe_age = Max(vacuum_failsafe_age,
+                                                                               
 autovacuum_freeze_max_age * 1.05);
+               effective_mxid_failsafe_age = Max(vacuum_multixact_failsafe_age,
+                                                                               
  autovacuum_multixact_freeze_max_age * 1.05);
+
+               if (xid_age >= effective_xid_failsafe_age)
+                       xid_score = pow(xid_score, Max(1.0, (double) xid_age / 
100000000));
+               if (mxid_age >= effective_mxid_failsafe_age)
+                       mxid_score = pow(mxid_score, Max(1.0, (double) mxid_age 
/ 100000000));
+
+               *score = Max(xid_score, mxid_score);
+       }
 
        /* User disabled it in pg_class.reloptions?  (But ignore if at risk) */
        if (!av_enabled && !force_vacuum)
-       {
-               *doanalyze = false;
-               *dovacuum = false;
                return;
-       }
 
        /*
         * If we found stats for the table, and autovacuum is currently enabled,
@@ -3163,34 +3270,44 @@ relation_needs_vacanalyze(Oid relid,
                 * reset, because if that happens, the last vacuum and analyze 
counts
                 * will be reset too.
                 */
+
+               /*
+                * Determine if this table needs vacuum, and update the score 
if it
+                * does.
+                */
+               if (vactuples > vacthresh)
+               {
+                       *dovacuum = true;
+                       *score = Max(*score, (double) vactuples / 
Max(vacthresh, 1));
+               }
+
+               if (vac_ins_base_thresh >= 0 && instuples > vacinsthresh)
+               {
+                       *dovacuum = true;
+                       *score = Max(*score, (double) instuples / 
Max(vacinsthresh, 1));
+               }
+
+               /*
+                * Determine if this table needs analyze, and update the score 
if it
+                * does.  Note that we don't analyze TOAST tables and 
pg_statistic.
+                */
+               if (anltuples > anlthresh &&
+                       relid != StatisticRelationId &&
+                       classForm->relkind != RELKIND_TOASTVALUE)
+               {
+                       *doanalyze = true;
+                       *score = Max(*score, (double) anltuples / 
Max(anlthresh, 1));
+               }
+
                if (vac_ins_base_thresh >= 0)
-                       elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: %.0f 
(threshold %.0f), anl: %.0f (threshold %.0f)",
+                       elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: %.0f 
(threshold %.0f), anl: %.0f (threshold %.0f), score: %.3f",
                                 NameStr(classForm->relname),
-                                vactuples, vacthresh, instuples, vacinsthresh, 
anltuples, anlthresh);
+                                vactuples, vacthresh, instuples, vacinsthresh, 
anltuples, anlthresh, *score);
                else
-                       elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: 
(disabled), anl: %.0f (threshold %.0f)",
+                       elog(DEBUG3, "%s: vac: %.0f (threshold %.0f), ins: 
(disabled), anl: %.0f (threshold %.0f), score %.3f",
                                 NameStr(classForm->relname),
-                                vactuples, vacthresh, anltuples, anlthresh);
-
-               /* Determine if this table needs vacuum or analyze. */
-               *dovacuum = force_vacuum || (vactuples > vacthresh) ||
-                       (vac_ins_base_thresh >= 0 && instuples > vacinsthresh);
-               *doanalyze = (anltuples > anlthresh);
-       }
-       else
-       {
-               /*
-                * Skip a table not found in stat hash, unless we have to force 
vacuum
-                * for anti-wrap purposes.  If it's not acted upon, there's no 
need to
-                * vacuum it.
-                */
-               *dovacuum = force_vacuum;
-               *doanalyze = false;
+                                vactuples, vacthresh, anltuples, anlthresh, 
*score);
        }
-
-       /* ANALYZE refuses to work with pg_statistic */
-       if (relid == StatisticRelationId)
-               *doanalyze = false;
 }
 
 /*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 432509277c9..3d2b5b6925a 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3010,6 +3010,7 @@ TableScanDesc
 TableScanDescData
 TableSpaceCacheEntry
 TableSpaceOpts
+TableToProcess
 TablespaceList
 TablespaceListCell
 TapeBlockTrailer
-- 
2.39.5 (Apple Git-154)

Reply via email to