This patch adds balance_dirty_pages_ub() which is implements per-UB logic of balance_dirty_pages() from PCS6. balance_dirty_pages_ub() is invoked right before the original balance_dirty_pages() to issue per-UB writeback. After that balance_dirty_pages() will be executed for global writeback.
https://jira.sw.ru/browse/PSBM-33841 Signed-off-by: Andrey Ryabinin <[email protected]> --- mm/page-writeback.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 429c759..951330e 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1391,6 +1391,101 @@ static inline void bdi_dirty_limits(struct backing_dev_info *bdi, } } +static void balance_dirty_pages_ub(struct address_space *mapping, + unsigned long write_chunk) +{ + long ub_dirty, ub_writeback; + long ub_thresh, ub_background_thresh; + unsigned long pages_written = 0; + unsigned long pause = 1; + struct user_beancounter *ub = get_io_ub(); + + struct backing_dev_info *bdi = mapping->backing_dev_info; + + for (;;) { + ub_dirty = ub_stat_get(ub, dirty_pages); + ub_writeback = ub_stat_get(ub, writeback_pages); + + if (!ub_dirty_limits(&ub_background_thresh, &ub_thresh, ub)) + break; + + /* + * Check thresholds, set dirty_exceeded flags and + * start background writeback before throttling. + */ + if (ub_dirty + ub_writeback <= ub_thresh) + break; + if (!writeback_in_progress(bdi)) + bdi_start_background_writeback(bdi); + + /* + * Throttle it only when the background writeback cannot + * catch-up. This avoids (excessively) small writeouts + * when the bdi limits are ramping up. + */ + if (ub_dirty + ub_writeback < + (ub_background_thresh + ub_thresh) / 2) + break; + + if (ub_dirty > ub_thresh) { + pages_written = writeback_inodes_wb(&bdi->wb, write_chunk, + WB_REASON_BACKGROUND, ub); + ub_dirty = ub_stat_get(ub, dirty_pages); + ub_writeback = ub_stat_get(ub, writeback_pages); + } + + /* fixup ub-stat per-cpu drift to avoid false-positive */ + if (ub_dirty + ub_writeback > ub_thresh && + ub_dirty + ub_writeback - ub_thresh < + UB_STAT_BATCH * num_possible_cpus()) { + ub_dirty = ub_stat_get_exact(ub, dirty_pages); + ub_writeback = ub_stat_get_exact(ub, writeback_pages); + } + + if (ub_dirty + ub_writeback <= ub_thresh) + break; + + if (pages_written >= write_chunk) + break; /* We've done our duty */ + + __set_current_state(TASK_KILLABLE); + io_schedule_timeout(pause); + + /* + * Increase the delay for each loop, up to our previous + * default of taking a 100ms nap. + */ + pause <<= 1; + if (pause > HZ / 10) + pause = HZ / 10; + + if (fatal_signal_pending(current)) + break; + } + + virtinfo_notifier_call(VITYPE_IO, VIRTINFO_IO_BALANCE_DIRTY, + (void*)write_chunk); + + /* + * Even if this is filtered writeback for other ub it will write + * inodes for this ub, because ub->dirty_exceeded is set. + */ + if (writeback_in_progress(bdi)) + return; + + /* + * In laptop mode, we wait until hitting the higher threshold before + * starting background writeout, and then write out all the way down + * to the lower threshold. So slow writers cause minimal disk activity. + * + * In normal mode, we start background writeout at the lower + * background_thresh, to keep the amount of dirty memory low. + */ + if ((laptop_mode && pages_written) || + (!laptop_mode && ub_dirty > ub_background_thresh)) + bdi_start_background_writeback(bdi); +} + /* * balance_dirty_pages() must be called by processes which are generating dirty * data. It looks at the number of dirty pages in the machine and will force @@ -1690,8 +1785,10 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping) } preempt_enable(); - if (unlikely(current->nr_dirtied >= ratelimit)) + if (unlikely(current->nr_dirtied >= ratelimit)) { + balance_dirty_pages_ub(mapping, current->nr_dirtied); balance_dirty_pages(mapping, current->nr_dirtied); + } } EXPORT_SYMBOL(balance_dirty_pages_ratelimited); -- 2.4.10 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
