[PATCH 05/19] writeback: move global_dirty_limit into wb_domain

2015-05-22 Thread Tejun Heo
This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo 
Cc: Jens Axboe 
Cc: Jan Kara 
Cc: Wu Fengguang 
Cc: Greg Thelen 
---
 fs/fs-writeback.c|  2 +-
 include/linux/writeback.h| 17 ++-
 include/trace/events/writeback.h |  7 +++---
 mm/page-writeback.c  | 46 
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cd89484..51c8a5b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -887,7 +887,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
pages = LONG_MAX;
else {
pages = min(wb->avg_write_bandwidth / 2,
-   global_dirty_limit / DIRTY_SCOPE);
+   global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work->nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES,
   MIN_WRITEBACK_PAGES);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3148db1..5fdd4e1 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+   spinlock_t lock;
+
/*
 * Scale the writeback cache size proportional to the relative
 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
struct fprop_global completions;
struct timer_list period_timer; /* timer for aging of completions */
unsigned long period_time;
+
+   /*
+* The dirtyable memory and dirty threshold could be suddenly
+* knocked down by a large amount (eg. on the startup of KVM in a
+* swapless system). This may throw the system into deep dirty
+* exceeded state and throttle heavy/light dirtiers alike. To
+* retain good responsiveness, maintain global_dirty_limit for
+* tracking slowly down to the knocked down dirty threshold.
+*
+* Both fields are protected by ->lock.
+*/
+   unsigned long dirty_limit_tstamp;
+   unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 9b876f6..bec6999 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -361,7 +361,7 @@ TRACE_EVENT(global_dirty_state,
__entry->nr_written = global_page_state(NR_WRITTEN);
__entry->background_thresh = background_thresh;
__entry->dirty_thresh   = dirty_thresh;
-   __entry->dirty_limit = global_dirty_limit;
+   __entry->dirty_limit= global_wb_domain.dirty_limit;
),
 
TP_printk("dirty=%lu writeback=%lu unstable=%lu "
@@ -463,8 +463,9 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long freerun = (thresh + bg_thresh) / 2;
strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
 
-   __entry->limit  = global_dirty_limit;
-   __entry->setpoint   = (global_dirty_limit + freerun) / 2;
+   __entry->limit  = global_wb_domain.dirty_limit;
+   __entry->setpoint   = (global_wb_domain.dirty_limit +
+   freerun) / 2;
__entry->dirty  = dirty;
__entry->bdi_setpoint   = __entry->setpoint *
bdi_thresh / (thresh + 1);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 08e1737..27e60ba 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
-unsigned long global_dirty_limit;
-
-static struct wb_domain global_wb_domain;
+struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 {
memset(dom, 0, sizeof(*dom));
+
+   spin_lock_init(>lock);
+

[PATCH 05/19] writeback: move global_dirty_limit into wb_domain

2015-05-22 Thread Tejun Heo
This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo t...@kernel.org
Cc: Jens Axboe ax...@kernel.dk
Cc: Jan Kara j...@suse.cz
Cc: Wu Fengguang fengguang...@intel.com
Cc: Greg Thelen gthe...@google.com
---
 fs/fs-writeback.c|  2 +-
 include/linux/writeback.h| 17 ++-
 include/trace/events/writeback.h |  7 +++---
 mm/page-writeback.c  | 46 
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index cd89484..51c8a5b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -887,7 +887,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
pages = LONG_MAX;
else {
pages = min(wb-avg_write_bandwidth / 2,
-   global_dirty_limit / DIRTY_SCOPE);
+   global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work-nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES,
   MIN_WRITEBACK_PAGES);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 3148db1..5fdd4e1 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+   spinlock_t lock;
+
/*
 * Scale the writeback cache size proportional to the relative
 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
struct fprop_global completions;
struct timer_list period_timer; /* timer for aging of completions */
unsigned long period_time;
+
+   /*
+* The dirtyable memory and dirty threshold could be suddenly
+* knocked down by a large amount (eg. on the startup of KVM in a
+* swapless system). This may throw the system into deep dirty
+* exceeded state and throttle heavy/light dirtiers alike. To
+* retain good responsiveness, maintain global_dirty_limit for
+* tracking slowly down to the knocked down dirty threshold.
+*
+* Both fields are protected by -lock.
+*/
+   unsigned long dirty_limit_tstamp;
+   unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 9b876f6..bec6999 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -361,7 +361,7 @@ TRACE_EVENT(global_dirty_state,
__entry-nr_written = global_page_state(NR_WRITTEN);
__entry-background_thresh = background_thresh;
__entry-dirty_thresh   = dirty_thresh;
-   __entry-dirty_limit = global_dirty_limit;
+   __entry-dirty_limit= global_wb_domain.dirty_limit;
),
 
TP_printk(dirty=%lu writeback=%lu unstable=%lu 
@@ -463,8 +463,9 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long freerun = (thresh + bg_thresh) / 2;
strlcpy(__entry-bdi, dev_name(bdi-dev), 32);
 
-   __entry-limit  = global_dirty_limit;
-   __entry-setpoint   = (global_dirty_limit + freerun) / 2;
+   __entry-limit  = global_wb_domain.dirty_limit;
+   __entry-setpoint   = (global_wb_domain.dirty_limit +
+   freerun) / 2;
__entry-dirty  = dirty;
__entry-bdi_setpoint   = __entry-setpoint *
bdi_thresh / (thresh + 1);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 08e1737..27e60ba 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
-unsigned long global_dirty_limit;
-
-static struct wb_domain global_wb_domain;
+struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 {
memset(dom, 0, sizeof(*dom));
+
+ 

[PATCH 05/19] writeback: move global_dirty_limit into wb_domain

2015-04-06 Thread Tejun Heo
This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo 
Cc: Jens Axboe 
Cc: Jan Kara 
Cc: Wu Fengguang 
Cc: Greg Thelen 
---
 fs/fs-writeback.c|  2 +-
 include/linux/writeback.h| 17 ++-
 include/trace/events/writeback.h |  7 +++---
 mm/page-writeback.c  | 46 
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 85daae2..5b842bd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -868,7 +868,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
pages = LONG_MAX;
else {
pages = min(wb->avg_write_bandwidth / 2,
-   global_dirty_limit / DIRTY_SCOPE);
+   global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work->nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES,
   MIN_WRITEBACK_PAGES);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 4972dcf..fe0924a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+   spinlock_t lock;
+
/*
 * Scale the writeback cache size proportional to the relative
 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
struct fprop_global completions;
struct timer_list period_timer; /* timer for aging of completions */
unsigned long period_time;
+
+   /*
+* The dirtyable memory and dirty threshold could be suddenly
+* knocked down by a large amount (eg. on the startup of KVM in a
+* swapless system). This may throw the system into deep dirty
+* exceeded state and throttle heavy/light dirtiers alike. To
+* retain good responsiveness, maintain global_dirty_limit for
+* tracking slowly down to the knocked down dirty threshold.
+*
+* Both fields are protected by ->lock.
+*/
+   unsigned long dirty_limit_tstamp;
+   unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 5c9a68c..d5ac3dd 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -344,7 +344,7 @@ TRACE_EVENT(global_dirty_state,
__entry->nr_written = global_page_state(NR_WRITTEN);
__entry->background_thresh = background_thresh;
__entry->dirty_thresh   = dirty_thresh;
-   __entry->dirty_limit = global_dirty_limit;
+   __entry->dirty_limit= global_wb_domain.dirty_limit;
),
 
TP_printk("dirty=%lu writeback=%lu unstable=%lu "
@@ -446,8 +446,9 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long freerun = (thresh + bg_thresh) / 2;
strlcpy(__entry->bdi, dev_name(bdi->dev), 32);
 
-   __entry->limit  = global_dirty_limit;
-   __entry->setpoint   = (global_dirty_limit + freerun) / 2;
+   __entry->limit  = global_wb_domain.dirty_limit;
+   __entry->setpoint   = (global_wb_domain.dirty_limit +
+   freerun) / 2;
__entry->dirty  = dirty;
__entry->bdi_setpoint   = __entry->setpoint *
bdi_thresh / (thresh + 1);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 43380dc..c141533 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
-unsigned long global_dirty_limit;
-
-static struct wb_domain global_wb_domain;
+struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 {
memset(dom, 0, sizeof(*dom));
+
+   spin_lock_init(>lock);
+

[PATCH 05/19] writeback: move global_dirty_limit into wb_domain

2015-04-06 Thread Tejun Heo
This patch is a part of the series to define wb_domain which
represents a domain that wb's (bdi_writeback's) belong to and are
measured against each other in.  This will enable IO backpressure
propagation for cgroup writeback.

global_dirty_limit exists to regulate the global dirty threshold which
is a property of the wb_domain.  This patch moves hard_dirty_limit,
dirty_lock, and update_time into wb_domain.

This is pure reorganization and doesn't introduce any behavioral
changes.

Signed-off-by: Tejun Heo t...@kernel.org
Cc: Jens Axboe ax...@kernel.dk
Cc: Jan Kara j...@suse.cz
Cc: Wu Fengguang fengguang...@intel.com
Cc: Greg Thelen gthe...@google.com
---
 fs/fs-writeback.c|  2 +-
 include/linux/writeback.h| 17 ++-
 include/trace/events/writeback.h |  7 +++---
 mm/page-writeback.c  | 46 
 4 files changed, 44 insertions(+), 28 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 85daae2..5b842bd 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -868,7 +868,7 @@ static long writeback_chunk_size(struct bdi_writeback *wb,
pages = LONG_MAX;
else {
pages = min(wb-avg_write_bandwidth / 2,
-   global_dirty_limit / DIRTY_SCOPE);
+   global_wb_domain.dirty_limit / DIRTY_SCOPE);
pages = min(pages, work-nr_pages);
pages = round_down(pages + MIN_WRITEBACK_PAGES,
   MIN_WRITEBACK_PAGES);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 4972dcf..fe0924a 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -95,6 +95,8 @@ struct writeback_control {
  * dirtyable memory accordingly.
  */
 struct wb_domain {
+   spinlock_t lock;
+
/*
 * Scale the writeback cache size proportional to the relative
 * writeout speed.
@@ -115,6 +117,19 @@ struct wb_domain {
struct fprop_global completions;
struct timer_list period_timer; /* timer for aging of completions */
unsigned long period_time;
+
+   /*
+* The dirtyable memory and dirty threshold could be suddenly
+* knocked down by a large amount (eg. on the startup of KVM in a
+* swapless system). This may throw the system into deep dirty
+* exceeded state and throttle heavy/light dirtiers alike. To
+* retain good responsiveness, maintain global_dirty_limit for
+* tracking slowly down to the knocked down dirty threshold.
+*
+* Both fields are protected by -lock.
+*/
+   unsigned long dirty_limit_tstamp;
+   unsigned long dirty_limit;
 };
 
 /*
@@ -153,7 +168,7 @@ void throttle_vm_writeout(gfp_t gfp_mask);
 bool zone_dirty_ok(struct zone *zone);
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
 
-extern unsigned long global_dirty_limit;
+extern struct wb_domain global_wb_domain;
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index 5c9a68c..d5ac3dd 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -344,7 +344,7 @@ TRACE_EVENT(global_dirty_state,
__entry-nr_written = global_page_state(NR_WRITTEN);
__entry-background_thresh = background_thresh;
__entry-dirty_thresh   = dirty_thresh;
-   __entry-dirty_limit = global_dirty_limit;
+   __entry-dirty_limit= global_wb_domain.dirty_limit;
),
 
TP_printk(dirty=%lu writeback=%lu unstable=%lu 
@@ -446,8 +446,9 @@ TRACE_EVENT(balance_dirty_pages,
unsigned long freerun = (thresh + bg_thresh) / 2;
strlcpy(__entry-bdi, dev_name(bdi-dev), 32);
 
-   __entry-limit  = global_dirty_limit;
-   __entry-setpoint   = (global_dirty_limit + freerun) / 2;
+   __entry-limit  = global_wb_domain.dirty_limit;
+   __entry-setpoint   = (global_wb_domain.dirty_limit +
+   freerun) / 2;
__entry-dirty  = dirty;
__entry-bdi_setpoint   = __entry-setpoint *
bdi_thresh / (thresh + 1);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 43380dc..c141533 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -122,9 +122,7 @@ EXPORT_SYMBOL(laptop_mode);
 
 /* End of sysctl-exported parameters */
 
-unsigned long global_dirty_limit;
-
-static struct wb_domain global_wb_domain;
+struct wb_domain global_wb_domain;
 
 /*
  * Length of period for aging writeout fractions of bdis. This is an
@@ -470,9 +468,15 @@ static void writeout_period(unsigned long t)
 int wb_domain_init(struct wb_domain *dom, gfp_t gfp)
 {
memset(dom, 0, sizeof(*dom));
+
+