Hi, I have a bug pending that autovacuum fails to give priority to for-wraparound tables. When xid consumption rate is high and dead tuple creation is also high, it is possible that some tables are waiting for for-wraparound vacuums that don't complete in time because the workers are busy processing other tables that have accumulated dead tuples; the system is then down because it's too near the Xid wraparound horizon. Apparently this is particularly notorious in connection with TOAST tables, because those are always put in the tables-to-process list after regular tables.
(As far as I recall, this was already reported elsewhere, but so far I have been unable to find the discussion in the archives. Pointers appreciated.) So here's a small, backpatchable patch that sorts the list of tables to process (not all that much tested yet). Tables which have the wraparound flag set are processed before those that are not. Other than this criterion, the order is not defined. Now we could implement this differently, and maybe more simply (say by keeping two lists of tables to process, one with for-wraparound tables and one with the rest) but this way it is simpler to add additional sorting criteria later: say within each category we could first process smaller tables that have more dead tuples. My intention is to clean this up and backpatch to all live branches. Comments? -- Álvaro Herrera http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Training & Services
*** a/src/backend/postmaster/autovacuum.c --- b/src/backend/postmaster/autovacuum.c *************** *** 1890,1895 **** get_database_list(void) --- 1890,1919 ---- return dblist; } + typedef struct vacuum_table + { + Oid reloid; + bool wraparound; + } vacuum_table; + + /* + * qsort comparator. Tables marked for wraparound sort first. + */ + static int + vacpriority_comparator(const void *a, const void *b) + { + const vacuum_table *taba = *(vacuum_table *const *) a; + const vacuum_table *tabb = *(vacuum_table *const *) b; + + if (taba->wraparound == tabb->wraparound) + return 0; + + if (taba->wraparound) + return -1; + else + return 1; + } + /* * Process a database table-by-table * *************** *** 1903,1917 **** do_autovacuum(void) HeapTuple tuple; HeapScanDesc relScan; Form_pg_database dbForm; - List *table_oids = NIL; HASHCTL ctl; HTAB *table_toast_map; - ListCell *volatile cell; PgStat_StatDBEntry *shared; PgStat_StatDBEntry *dbentry; BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; /* * StartTransactionCommand and CommitTransactionCommand will automatically --- 1927,1943 ---- HeapTuple tuple; HeapScanDesc relScan; Form_pg_database dbForm; HASHCTL ctl; HTAB *table_toast_map; PgStat_StatDBEntry *shared; PgStat_StatDBEntry *dbentry; BufferAccessStrategy bstrategy; ScanKeyData key; TupleDesc pg_class_desc; + vacuum_table **tables; + int ntables; + int maxtables; + int i; /* * StartTransactionCommand and CommitTransactionCommand will automatically *************** *** 1986,1991 **** do_autovacuum(void) --- 2012,2022 ---- &ctl, HASH_ELEM | HASH_FUNCTION); + /* initialize our tasklist */ + ntables = 0; + maxtables = 32; + tables = palloc(maxtables * sizeof(vacuum_table *)); + /* * Scan pg_class to determine which tables to vacuum. * *************** *** 2077,2085 **** do_autovacuum(void) } else { ! /* relations that need work are added to table_oids */ if (dovacuum || doanalyze) ! table_oids = lappend_oid(table_oids, relid); /* * Remember the association for the second pass. Note: we must do --- 2108,2133 ---- } else { ! /* relations that need work are added to our tasklist */ if (dovacuum || doanalyze) ! { ! vacuum_table *tab; ! ! /* enlarge the array if necessary */ ! if (ntables >= maxtables) ! { ! maxtables *= 2; ! tables = repalloc(tables, ! maxtables * sizeof(vacuum_table *)); ! } ! ! tab = palloc(sizeof(vacuum_table)); ! ! tab->reloid = relid; ! tab->wraparound = wraparound; ! ! tables[ntables++] = tab; ! } /* * Remember the association for the second pass. Note: we must do *************** *** 2162,2168 **** do_autovacuum(void) /* ignore analyze for toast tables */ if (dovacuum) ! table_oids = lappend_oid(table_oids, relid); } heap_endscan(relScan); --- 2210,2233 ---- /* ignore analyze for toast tables */ if (dovacuum) ! { ! vacuum_table *tab; ! ! /* enlarge the array if necessary */ ! if (ntables >= maxtables) ! { ! maxtables *= 2; ! tables = repalloc(tables, ! maxtables * sizeof(vacuum_table *)); ! } ! ! tab = palloc(sizeof(vacuum_table)); ! ! tab->reloid = relid; ! tab->wraparound = wraparound; ! ! tables[ntables++] = tab; ! } } heap_endscan(relScan); *************** *** 2185,2196 **** do_autovacuum(void) ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * Perform operations on collected tables. */ ! foreach(cell, table_oids) { - Oid relid = lfirst_oid(cell); autovac_table *tab; bool skipit; int stdVacuumCostDelay; --- 2250,2264 ---- ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_MAXSIZE); + /* sort our task list */ + qsort(tables, ntables, sizeof(vacuum_table *), + vacpriority_comparator); + /* * Perform operations on collected tables. */ ! for (i = 0; i < ntables; i++) { autovac_table *tab; bool skipit; int stdVacuumCostDelay; *************** *** 2224,2230 **** do_autovacuum(void) if (worker->wi_dboid != MyDatabaseId) continue; ! if (worker->wi_tableoid == relid) { skipit = true; break; --- 2292,2298 ---- if (worker->wi_dboid != MyDatabaseId) continue; ! if (worker->wi_tableoid == tables[i]->reloid) { skipit = true; break; *************** *** 2248,2254 **** do_autovacuum(void) * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); ! tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ --- 2316,2323 ---- * the race condition is not closed but it is very small. */ MemoryContextSwitchTo(AutovacMemCxt); ! tab = table_recheck_autovac(tables[i]->reloid, table_toast_map, ! pg_class_desc); if (tab == NULL) { /* someone else vacuumed the table, or it went away */ *************** *** 2260,2266 **** do_autovacuum(void) * Ok, good to go. Store the table in shared memory before releasing * the lock so that other workers don't vacuum it concurrently. */ ! MyWorkerInfo->wi_tableoid = relid; LWLockRelease(AutovacuumScheduleLock); /* --- 2329,2335 ---- * Ok, good to go. Store the table in shared memory before releasing * the lock so that other workers don't vacuum it concurrently. */ ! MyWorkerInfo->wi_tableoid = tables[i]->reloid; LWLockRelease(AutovacuumScheduleLock); /* *************** *** 2361,2366 **** deleted: --- 2430,2436 ---- if (tab->at_relname != NULL) pfree(tab->at_relname); pfree(tab); + pfree(tables[i]); /* * Remove my info from shared memory. We could, but intentionally
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers