Hi, The memory consumption of VACUUM has some issues and could be improved. Some of its limitations are recorded in the comments of the “vacuumlazy.c” file. The current design of VACUUM memory usage is that it stores the TID in a fixed-size array which is allocated at the start, based upon maintenance_work_mem. There are three problems with that design
- If the value of maintenance_work_mem is too large then it is a waste of memory for small tables. - If the value of maintenance_work_mem is too small or “TIDs” do not fit in the array then multiple scans happen. - In cases where maintainess_work_mem is set too large, and we have a bigger value of vacuume_count, then the system can be out-of-memory. There are two solutions for these problems. The first is to use a list instead of a fixed size array. The second solution is to allocate the memory in chunks. The attached WIP patch creates an array of ItemPointers and allocates memory in chunks by dividing the maintenance_work_mem into multiple chunks. Comments? -- Ibrar Ahmed
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c index a3c4a1df3b..73922a2e34 100644 --- a/src/backend/access/heap/vacuumlazy.c +++ b/src/backend/access/heap/vacuumlazy.c @@ -110,6 +110,8 @@ */ #define PREFETCH_SIZE ((BlockNumber) 32) +#define MAX_ARRAY_COUNT 100 + typedef struct LVRelStats { /* useindex = true means two-pass strategy; false means one-pass */ @@ -130,9 +132,12 @@ typedef struct LVRelStats BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */ /* List of TIDs of tuples we intend to delete */ /* NB: this list is ordered by TID address */ - int num_dead_tuples; /* current # of entries */ - int max_dead_tuples; /* # slots allocated in array */ - ItemPointer dead_tuples; /* array of ItemPointerData */ + int num_dead_tuples; /* current # of entries */ + int max_dead_tuples; /* # slots allocated in array */ + int num_dead_tuples_array; /* current # of arrays */ + int num_dead_tuples_per_array; /* current # of entries per array */ + int max_dead_tuples_per_array; /* maximum # of entries per array */ + ItemPointer *dead_tuples; /* array of ItemPointerData */ int num_index_scans; TransactionId latestRemovedXid; bool lock_waiter_detected; @@ -148,6 +153,8 @@ static MultiXactId MultiXactCutoff; static BufferAccessStrategy vac_strategy; +static void *pg_bsearch(ItemPointer key, void **base, int num, int li, + int (*cmp)(const void *key, const void *elt)); /* non-export function prototypes */ static void lazy_scan_heap(Relation onerel, VacuumParams *params, @@ -162,7 +169,7 @@ static void lazy_cleanup_index(Relation indrel, IndexBulkDeleteResult *stats, LVRelStats *vacrelstats); static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, - int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer); + int arryindex, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer); static bool should_attempt_truncation(VacuumParams *params, LVRelStats *vacrelstats); static void lazy_truncate_heap(Relation onerel, LVRelStats *vacrelstats); @@ -740,6 +747,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, if ((vacrelstats->max_dead_tuples - vacrelstats->num_dead_tuples) < MaxHeapTuplesPerPage && vacrelstats->num_dead_tuples > 0) { + int i; const int hvp_index[] = { PROGRESS_VACUUM_PHASE, PROGRESS_VACUUM_NUM_INDEX_VACUUMS @@ -789,7 +797,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * not to reset latestRemovedXid since we want that value to be * valid. */ + for (i = 0; i < vacrelstats->num_dead_tuples_array; i++) + { + pfree(vacrelstats->dead_tuples[i]); + vacrelstats->dead_tuples[i] = NULL; + } + vacrelstats->num_dead_tuples_array = 0; vacrelstats->num_dead_tuples = 0; + vacrelstats->num_dead_tuples_per_array = 0; vacrelstats->num_index_scans++; /* @@ -1242,10 +1257,12 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, */ if (!vacrelstats->useindex && vacrelstats->num_dead_tuples > 0) { + int i; + if (nindexes == 0) { /* Remove tuples from heap if the table has no index */ - lazy_vacuum_page(onerel, blkno, buf, 0, vacrelstats, &vmbuffer); + lazy_vacuum_page(onerel, blkno, buf, 0, 0, vacrelstats, &vmbuffer); vacuumed_pages++; has_dead_tuples = false; } @@ -1269,7 +1286,14 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats, * not to reset latestRemovedXid since we want that value to be * valid. */ + for (i = 0; i < vacrelstats->num_dead_tuples_array; i++) + { + pfree(vacrelstats->dead_tuples[i]); + vacrelstats->dead_tuples[i] = NULL; + } + vacrelstats->num_dead_tuples_array = 0; vacrelstats->num_dead_tuples = 0; + vacrelstats->num_dead_tuples_per_array = 0; /* * Periodically do incremental FSM vacuuming to make newly-freed @@ -1529,39 +1553,43 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) int npages; PGRUsage ru0; Buffer vmbuffer = InvalidBuffer; - + int i; pg_rusage_init(&ru0); npages = 0; tupindex = 0; - while (tupindex < vacrelstats->num_dead_tuples) + + for (i = 0; i < vacrelstats->num_dead_tuples_array; i++) { - BlockNumber tblk; - Buffer buf; - Page page; - Size freespace; + while (tupindex < vacrelstats->num_dead_tuples_per_array) + { + BlockNumber tblk; + Buffer buf; + Page page; + Size freespace; - vacuum_delay_point(); + vacuum_delay_point(); - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); - buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, - vac_strategy); - if (!ConditionalLockBufferForCleanup(buf)) - { - ReleaseBuffer(buf); - ++tupindex; - continue; - } - tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats, - &vmbuffer); + tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[i][tupindex]); + buf = ReadBufferExtended(onerel, MAIN_FORKNUM, tblk, RBM_NORMAL, + vac_strategy); + if (!ConditionalLockBufferForCleanup(buf)) + { + ReleaseBuffer(buf); + ++tupindex; + continue; + } + tupindex = lazy_vacuum_page(onerel, tblk, buf, i, tupindex, vacrelstats, + &vmbuffer); - /* Now that we've compacted the page, record its available space */ - page = BufferGetPage(buf); - freespace = PageGetHeapFreeSpace(page); + /* Now that we've compacted the page, record its available space */ + page = BufferGetPage(buf); + freespace = PageGetHeapFreeSpace(page); - UnlockReleaseBuffer(buf); - RecordPageWithFreeSpace(onerel, tblk, freespace); - npages++; + UnlockReleaseBuffer(buf); + RecordPageWithFreeSpace(onerel, tblk, freespace); + npages++; + } } if (BufferIsValid(vmbuffer)) @@ -1589,7 +1617,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats) */ static int lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, - int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer) + int arrindex, int tupindex, LVRelStats *vacrelstats, Buffer *vmbuffer) { Page page = BufferGetPage(buffer); OffsetNumber unused[MaxOffsetNumber]; @@ -1601,19 +1629,22 @@ lazy_vacuum_page(Relation onerel, BlockNumber blkno, Buffer buffer, START_CRIT_SECTION(); - for (; tupindex < vacrelstats->num_dead_tuples; tupindex++) + for (; arrindex < vacrelstats->num_dead_tuples_array; arrindex++) { - BlockNumber tblk; - OffsetNumber toff; - ItemId itemid; + for (; tupindex < vacrelstats->num_dead_tuples_per_array; tupindex++) + { + BlockNumber tblk; + OffsetNumber toff; + ItemId itemid; - tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]); - if (tblk != blkno) - break; /* past end of tuples for this block */ - toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[tupindex]); - itemid = PageGetItemId(page, toff); - ItemIdSetUnused(itemid); - unused[uncnt++] = toff; + tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[arrindex][tupindex]); + if (tblk != blkno) + break; /* past end of tuples for this block */ + toff = ItemPointerGetOffsetNumber(&vacrelstats->dead_tuples[arrindex][tupindex]); + itemid = PageGetItemId(page, toff); + ItemIdSetUnused(itemid); + unused[uncnt++] = toff; + } } PageRepairFragmentation(page); @@ -2141,11 +2172,10 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats) static void lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) { - long maxtuples; - int vac_work_mem = IsAutoVacuumWorkerProcess() && + long maxtuples; + int vac_work_mem = IsAutoVacuumWorkerProcess() && autovacuum_work_mem != -1 ? autovacuum_work_mem : maintenance_work_mem; - if (vacrelstats->useindex) { maxtuples = (vac_work_mem * 1024L) / sizeof(ItemPointerData); @@ -2164,10 +2194,15 @@ lazy_space_alloc(LVRelStats *vacrelstats, BlockNumber relblocks) maxtuples = MaxHeapTuplesPerPage; } + vacrelstats->num_dead_tuples_array = 0; vacrelstats->num_dead_tuples = 0; - vacrelstats->max_dead_tuples = (int) maxtuples; - vacrelstats->dead_tuples = (ItemPointer) - palloc(maxtuples * sizeof(ItemPointerData)); + vacrelstats->num_dead_tuples_per_array = 0; + vacrelstats->max_dead_tuples_per_array = ceil(maxtuples / MAX_ARRAY_COUNT); + + vacrelstats->max_dead_tuples = maxtuples; + + if (!vacrelstats->dead_tuples) + vacrelstats->dead_tuples = palloc(MAX_ARRAY_COUNT * sizeof(ItemPointer)); } /* @@ -2182,13 +2217,29 @@ lazy_record_dead_tuple(LVRelStats *vacrelstats, * could if we are given a really small maintenance_work_mem. In that * case, just forget the last few tuples (we'll get 'em next time). */ - if (vacrelstats->num_dead_tuples < vacrelstats->max_dead_tuples) + if (vacrelstats->num_dead_tuples > vacrelstats->max_dead_tuples) + return; + + if (vacrelstats->num_dead_tuples_array >= MAX_ARRAY_COUNT) + return; + + if ((vacrelstats->num_dead_tuples % vacrelstats->max_dead_tuples_per_array) == 0) { - vacrelstats->dead_tuples[vacrelstats->num_dead_tuples] = *itemptr; + vacrelstats->num_dead_tuples_per_array = 0; + vacrelstats->dead_tuples[vacrelstats->num_dead_tuples_array] = + (ItemPointer) palloc(vacrelstats->max_dead_tuples_per_array * sizeof(ItemPointerData)); + + vacrelstats->dead_tuples[vacrelstats->num_dead_tuples_array][vacrelstats->num_dead_tuples_per_array++] = *itemptr; vacrelstats->num_dead_tuples++; - pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, - vacrelstats->num_dead_tuples); + vacrelstats->num_dead_tuples_array++; } + else + { + vacrelstats->dead_tuples[vacrelstats->num_dead_tuples_array - 1][vacrelstats->num_dead_tuples_per_array++] = *itemptr; + vacrelstats->num_dead_tuples++; + } + pgstat_progress_update_param(PROGRESS_VACUUM_NUM_DEAD_TUPLES, + vacrelstats->num_dead_tuples); } /* @@ -2203,14 +2254,49 @@ lazy_tid_reaped(ItemPointer itemptr, void *state) { LVRelStats *vacrelstats = (LVRelStats *) state; ItemPointer res; + res = (ItemPointer) pg_bsearch(itemptr, + (void**) vacrelstats->dead_tuples, + vacrelstats->num_dead_tuples, + vacrelstats->max_dead_tuples_per_array, + vac_cmp_itemptr); + return (res != NULL); +} + +/* + * pg_bsearch() -- bsearch algorithem for two dimention array. + */ +void * +pg_bsearch(ItemPointer key, void **base, int num, int li, + int (*cmp)(const void *key, const void *elt)) +{ + ItemPointer pivot; + ItemPointer *p = (ItemPointer*)base; + int result; + int n = 0; + int ind = 0; + int i,j,v; + + while (num > 0) + { + ind = num >> 1; - res = (ItemPointer) bsearch((void *) itemptr, - (void *) vacrelstats->dead_tuples, - vacrelstats->num_dead_tuples, - sizeof(ItemPointerData), - vac_cmp_itemptr); + v = ind + n; + i = v / li; + j = v % li; - return (res != NULL); + pivot = &p[i][j]; + result = cmp(key, pivot); + if (result == 0) + return pivot; + + if (result > 0) + { + n = ind + n + 1; + num--; + } + num >>= 1; + } + return NULL; } /*