[PATCH 03/18] writeback: implement wb_domain

2015-03-22 Thread Tejun Heo
Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

Currently, what constitutes the global writeback domain are scattered
across a number of global states.  This patch starts collecting them
into struct wb_domain.

* fprop_global which serves as the basis for proportional bandwidth
  measurement and its period timer are moved into struct wb_domain.

* global_wb_domain hosts the states for the global domain.

* While at it, flatten wb_writeout_fraction() into its callers.  This
  thin wrapper doesn't provide any actual benefits while getting in
  the way.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo 
Cc: Jens Axboe 
Cc: Jan Kara 
Cc: Wu Fengguang 
Cc: Greg Thelen 
---
 include/linux/writeback.h | 32 +
 mm/page-writeback.c   | 72 ++-
 2 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 82e0e39..5af0a57e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,7 @@
 #include 
 #include 
 #include 
+#include 
 
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
@@ -87,6 +88,36 @@ struct writeback_control {
 };
 
 /*
+ * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
+ * and are measured against each other in.  There always is one global
+ * domain, global_wb_domain, that every wb in the system is a member of.
+ * This allows measuring the relative bandwidth of each wb to distribute
+ * dirtyable memory accordingly.
+ */
+struct wb_domain {
+   /*
+* Scale the writeback cache size proportional to the relative
+* writeout speed.
+*
+* We do this by keeping a floating proportion between BDIs, based
+* on page writeback completions [end_page_writeback()]. Those
+* devices that write out pages fastest will get the larger share,
+* while the slower will get a smaller share.
+*
+* We use page writeout completions because we are interested in
+* getting rid of dirty pages. Having them written out is the
+* primary goal.
+*
+* We introduce a concept of time, a period over which we measure
+* these events, because demand can/will vary over time. The length
+* of this period itself is measured in page writeback completions.
+*/
+   struct fprop_global completions;
+   struct timer_list period_timer; /* timer for aging of completions */
+   unsigned long period_time;
+};
+
+/*
  * fs/fs-writeback.c
  */
 struct bdi_writeback;
@@ -120,6 +151,7 @@ static inline void laptop_sync_completion(void) { }
 #endif
 void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
+int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
 extern unsigned long global_dirty_limit;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d9ebabe..3c6ccc7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -124,29 +124,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 unsigned long global_dirty_limit;
 
-/*
- * Scale the writeback cache size proportional to the relative writeout speeds.
- *
- * We do this by keeping a floating proportion between BDIs, based on page
- * writeback completions [end_page_writeback()]. Those devices that write out
- * pages fastest will get the larger share, while the slower will get a smaller
- * share.
- *
- * We use page writeout completions because we are interested in getting rid of
- * dirty pages. Having them written out is the primary goal.
- *
- * We introduce a concept of time, a period over which we measure these events,
- * because demand can/will vary over time. The length of this period itself is
- * measured in page writeback completions.
- *
- */
-static struct fprop_global writeout_completions;
-
-static void writeout_period(unsigned long t);
-/* Timer for aging of writeout_completions */
-static struct timer_list writeout_period_timer =
-   TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
-static unsigned long writeout_period_time = 0;
+static struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -433,24 +411,26 @@ static unsigned long wp_next_time(unsigned long cur_time)
 }
 
 /*
- * Increment the BDI's writeout completion count and the global writeout
+ * Increment the wb's writeout completion count and the global writeout
  * completion count. Called from 

[PATCH 03/18] writeback: implement wb_domain

2015-03-22 Thread Tejun Heo
Dirtyable memory is distributed to a wb (bdi_writeback) according to
the relative bandwidth the wb is writing out in the whole system.
This distribution is global - each wb is measured against all other
wb's and gets the proportinately sized portion of the memory in the
whole system.

For cgroup writeback, the amount of dirtyable memory is scoped by
memcg and thus each wb would need to be measured and controlled in its
memcg.  IOW, a wb will belong to two writeback domains - the global
and memcg domains.

Currently, what constitutes the global writeback domain are scattered
across a number of global states.  This patch starts collecting them
into struct wb_domain.

* fprop_global which serves as the basis for proportional bandwidth
  measurement and its period timer are moved into struct wb_domain.

* global_wb_domain hosts the states for the global domain.

* While at it, flatten wb_writeout_fraction() into its callers.  This
  thin wrapper doesn't provide any actual benefits while getting in
  the way.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo t...@kernel.org
Cc: Jens Axboe ax...@kernel.dk
Cc: Jan Kara j...@suse.cz
Cc: Wu Fengguang fengguang...@intel.com
Cc: Greg Thelen gthe...@google.com
---
 include/linux/writeback.h | 32 +
 mm/page-writeback.c   | 72 ++-
 2 files changed, 59 insertions(+), 45 deletions(-)

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 82e0e39..5af0a57e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -7,6 +7,7 @@
 #include linux/sched.h
 #include linux/workqueue.h
 #include linux/fs.h
+#include linux/flex_proportions.h
 
 DECLARE_PER_CPU(int, dirty_throttle_leaks);
 
@@ -87,6 +88,36 @@ struct writeback_control {
 };
 
 /*
+ * A wb_domain represents a domain that wb's (bdi_writeback's) belong to
+ * and are measured against each other in.  There always is one global
+ * domain, global_wb_domain, that every wb in the system is a member of.
+ * This allows measuring the relative bandwidth of each wb to distribute
+ * dirtyable memory accordingly.
+ */
+struct wb_domain {
+   /*
+* Scale the writeback cache size proportional to the relative
+* writeout speed.
+*
+* We do this by keeping a floating proportion between BDIs, based
+* on page writeback completions [end_page_writeback()]. Those
+* devices that write out pages fastest will get the larger share,
+* while the slower will get a smaller share.
+*
+* We use page writeout completions because we are interested in
+* getting rid of dirty pages. Having them written out is the
+* primary goal.
+*
+* We introduce a concept of time, a period over which we measure
+* these events, because demand can/will vary over time. The length
+* of this period itself is measured in page writeback completions.
+*/
+   struct fprop_global completions;
+   struct timer_list period_timer; /* timer for aging of completions */
+   unsigned long period_time;
+};
+
+/*
  * fs/fs-writeback.c
  */
 struct bdi_writeback;
@@ -120,6 +151,7 @@ static inline void laptop_sync_completion(void) { }
 #endif
 void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
+int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
 extern unsigned long global_dirty_limit;
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d9ebabe..3c6ccc7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -124,29 +124,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 unsigned long global_dirty_limit;
 
-/*
- * Scale the writeback cache size proportional to the relative writeout speeds.
- *
- * We do this by keeping a floating proportion between BDIs, based on page
- * writeback completions [end_page_writeback()]. Those devices that write out
- * pages fastest will get the larger share, while the slower will get a smaller
- * share.
- *
- * We use page writeout completions because we are interested in getting rid of
- * dirty pages. Having them written out is the primary goal.
- *
- * We introduce a concept of time, a period over which we measure these events,
- * because demand can/will vary over time. The length of this period itself is
- * measured in page writeback completions.
- *
- */
-static struct fprop_global writeout_completions;
-
-static void writeout_period(unsigned long t);
-/* Timer for aging of writeout_completions */
-static struct timer_list writeout_period_timer =
-   TIMER_DEFERRED_INITIALIZER(writeout_period, 0, 0);
-static unsigned long writeout_period_time = 0;
+static struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -433,24 +411,26 @@ static unsigned long wp_next_time(unsigned long cur_time)
 }
 
 /*
- * Increment the BDI's writeout completion