This patch adds balance_dirty_pages_ub() which is implements per-UB logic of balance_dirty_pages() from PCS6. balance_dirty_pages_ub() is invoked right before the original balance_dirty_pages() to issue per-UB writeback. After that balance_dirty_pages() will be executed for global writeback.
https://jira.sw.ru/browse/PSBM-33841 Signed-off-by: Andrey Ryabinin <[email protected]> --- mm/page-writeback.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 121 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 429c759..7b54682 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1394,6 +1394,124 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi, /* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force + * the caller to perform writeback if the system is over `vm_dirty_ratio'. + * If we're over `background_thresh' then the writeback threads are woken to + * perform some writeout. + */ +static void balance_dirty_pages_ub(struct address_space *mapping, + unsigned long write_chunk) +{ + long ub_dirty, ub_writeback; + long ub_thresh, ub_background_thresh; + unsigned long pages_written = 0; + unsigned long pause = 1; + struct user_beancounter *ub = get_io_ub(); + + struct backing_dev_info *bdi = mapping->backing_dev_info; + + for (;;) { + struct writeback_control wbc = { + .sync_mode = WB_SYNC_NONE, + .nr_to_write = write_chunk, + .range_cyclic = 1, + }; + + if (ub_dirty_limits(&ub_background_thresh, &ub_thresh, ub)) { + ub_dirty = ub_stat_get(ub, dirty_pages); + ub_writeback = ub_stat_get(ub, writeback_pages); + } else { + ub_dirty = ub_writeback = 0; + ub_thresh = ub_background_thresh = LONG_MAX / 2; + } + + /* + * Check thresholds, set dirty_exceeded flags and + * start background writeback before throttling. + */ + if (ub_dirty + ub_writeback <= ub_thresh) + break; + if (!test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags)) + set_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags); + if (!writeback_in_progress(bdi)) + bdi_start_background_writeback(bdi); + + /* + * Throttle it only when the background writeback cannot + * catch-up. This avoids (excessively) small writeouts + * when the bdi limits are ramping up. + */ + if (bdi_cap_account_writeback(bdi) && + ub_dirty + ub_writeback < + (ub_background_thresh + ub_thresh) / 2) + break; + + if (ub_dirty > ub_thresh) { + writeback_inodes_wb(&bdi->wb, wbc.nr_to_write, + WB_REASON_BACKGROUND, ub); + pages_written += write_chunk - wbc.nr_to_write; + ub_dirty = ub_stat_get(ub, dirty_pages); + ub_writeback = ub_stat_get(ub, writeback_pages); + } + + /* fixup ub-stat per-cpu drift to avoid false-positive */ + if (ub_dirty + ub_writeback > ub_thresh && + ub_dirty + ub_writeback - ub_thresh < + UB_STAT_BATCH * num_possible_cpus()) { + ub_dirty = ub_stat_get_exact(ub, dirty_pages); + ub_writeback = ub_stat_get_exact(ub, writeback_pages); + } + + if (ub_dirty + ub_writeback <= ub_thresh) + break; + + if (pages_written >= write_chunk) + break; /* We've done our duty */ + + __set_current_state(TASK_KILLABLE); + io_schedule_timeout(pause); + + /* + * Increase the delay for each loop, up to our previous + * default of taking a 100ms nap. + */ + pause <<= 1; + if (pause > HZ / 10) + pause = HZ / 10; + + if (fatal_signal_pending(current)) + break; + } + + if (ub_dirty + ub_writeback < ub_thresh && + test_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags)) + clear_bit(UB_DIRTY_EXCEEDED, &ub->ub_flags); + + virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_BALANCE_DIRTY, + (void*)write_chunk); + + /* + * Even if this is filtered writeback for other ub it will write + * inodes for this ub, because ub->dirty_exceeded is set. + */ + if (writeback_in_progress(bdi)) + return; + + /* + * In laptop mode, we wait until hitting the higher threshold before + * starting background writeout, and then write out all the way down + * to the lower threshold. So slow writers cause minimal disk activity. + * + * In normal mode, we start background writeout at the lower + * background_thresh, to keep the amount of dirty memory low. + */ + if ((laptop_mode && pages_written) || + (!laptop_mode && ub_dirty > ub_background_thresh)) + bdi_start_background_writeback(bdi); +} + +/* + * balance_dirty_pages() must be called by processes which are generating dirty + * data. It looks at the number of dirty pages in the machine and will force * the caller to wait once crossing the (background_thresh + dirty_thresh) / 2. * If we're over `background_thresh' then the writeback threads are woken to * perform some writeout. @@ -1690,8 +1808,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) } preempt_enable(); - if (unlikely(current->nr_dirtied >= ratelimit)) + if (unlikely(current->nr_dirtied >= ratelimit)) { + balance_dirty_pages_ub(mapping, ratelimit); balance_dirty_pages(mapping, current->nr_dirtied); + } } EXPORT_SYMBOL(balance_dirty_pages_ratelimited); -- 2.4.10 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
