Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-12 Thread Shaohua Li
On Thu, Oct 12, 2017 at 04:59:01PM +, Bart Van Assche wrote:
> On Wed, 2017-10-11 at 12:32 -0700, Shaohua Li wrote:
> > On Wed, Oct 11, 2017 at 05:17:56PM +, Bart Van Assche wrote:
> > > On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> > > > The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, 
> > > > which
> > > > will prevent md_check_recovery restarting resync/reshape. I think we 
> > > > need
> > > > preserve mddev->recovery in suspend prepare and restore after resume
> > > 
> > > Hello Shaohua,
> > > 
> > > As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
> > > MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I 
> > > think
> > > it should be sufficient to save and restore the state of the
> > > MD_RECOVERY_FROZEN flag. However, when I ran the following test:
> > > * echo frozen > /sys/block/md0/md/sync_action
> > > * cat /sys/block/md0/md/sync_action
> > > * systemctl hibernate
> > > * (power on system again)
> > > * cat /sys/block/md0/md/sync_action
> > > 
> > > the output that appeared was as follows:
> > > 
> > > frozen
> > > idle 
> > > Does that mean that a hibernate or suspend followed by a resume is 
> > > sufficient
> > > to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
> > > patch at the start of this e-mail thread does not need any further
> > > modifications?
> > 
> > Have no idea why it shows 'idle'. From my understanding, after resume, 
> > previous
> > memory is stored and MD_RECOVERY_FROZEN bit should be set. Was trying to
> > understand what happens.
> 
> Hello Shaohua,
> 
> I think a user space action causes the MD_RECOVERY_FROZEN bit to be cleared.
> After I had added a few debug statements in the md driver this is what I
> found in the system log:
> 
> Oct 12 09:38:39 kernel: action_store: storing action check
> Oct 12 09:38:39 kernel: md: data-check of RAID array md0
> Oct 12 09:38:40 systemd[1]: Starting Hibernate...
> Oct 12 09:38:40 kernel: md: md0: data-check interrupted.
> [ powered on system manually ]
> Oct 12 09:39:05 kernel: action_store: storing action check
> Oct 12 09:39:05 kernel: md: data-check of RAID array md0
> Oct 12 09:39:11 kernel: action_store: storing action idle
> Oct 12 09:39:11 kernel: md: md0: data-check interrupted.
> 
> Anyway, how about the patch below? I have verified by adding a debug pr_info()
> statement that the newly added code really clears the MD_RECOVERY_FROZEN bit.

Ok, for patch 1-3:
Reviewed-by: Shaohua Li 

> Subject: [PATCH] md: Neither resync nor reshape while the system is frozen
> 
> Some people use the md driver on laptops and use the suspend and
> resume functionality. Since it is essential that submitting of
> new I/O requests stops before a hibernation image is created,
> interrupt the md resync and reshape actions if the system is
> being frozen. Note: the resync and reshape will restart after
> the system is resumed and a message similar to the following
> will appear in the system log:
> 
> md: md0: data-check interrupted.
> 
> Signed-off-by: Bart Van Assche 
> Cc: Shaohua Li 
> Cc: linux-r...@vger.kernel.org
> Cc: Ming Lei 
> Cc: Christoph Hellwig 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> ---
>  drivers/md/md.c | 50 +-
>  drivers/md/md.h |  8 
>  2 files changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b99584e5d6b1..8b2fc750f97f 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -66,6 +66,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 
>  #include "md.h"
> @@ -7439,6 +7441,7 @@ static int md_thread(void *arg)
>*/
>  
>   allow_signal(SIGKILL);
> + set_freezable();
>   while (!kthread_should_stop()) {
>  
>   /* We need to wait INTERRUPTIBLE so that
> @@ -7449,7 +7452,7 @@ static int md_thread(void *arg)
>   if (signal_pending(current))
>   flush_signals(current);
>  
> - wait_event_interruptible_timeout
> + wait_event_freezable_timeout
>   (thread->wqueue,
>test_bit(THREAD_WAKEUP, >flags)
>|| kthread_should_stop() || kthread_should_park(),
> @@ -8944,6 +8947,8 @@ static void md_stop_all_writes(void)
>   int need_delay = 0;
>  
>   for_each_mddev(mddev, tmp) {
> + mddev->frozen_before_suspend =
> + test_bit(MD_RECOVERY_FROZEN, >recovery);
>   if (mddev_trylock(mddev)) {
>   if (mddev->pers)
>   __md_stop_writes(mddev);
> @@ -8963,6 +8968,47 @@ static void md_stop_all_writes(void)
>   mdelay(1000*1);
>  }
>  
> +static void md_restore_frozen_flag(void)
> +{
> + struct 

Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-12 Thread Bart Van Assche
On Wed, 2017-10-11 at 12:32 -0700, Shaohua Li wrote:
> On Wed, Oct 11, 2017 at 05:17:56PM +, Bart Van Assche wrote:
> > On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> > > The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, 
> > > which
> > > will prevent md_check_recovery restarting resync/reshape. I think we need
> > > preserve mddev->recovery in suspend prepare and restore after resume
> > 
> > Hello Shaohua,
> > 
> > As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
> > MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I think
> > it should be sufficient to save and restore the state of the
> > MD_RECOVERY_FROZEN flag. However, when I ran the following test:
> > * echo frozen > /sys/block/md0/md/sync_action
> > * cat /sys/block/md0/md/sync_action
> > * systemctl hibernate
> > * (power on system again)
> > * cat /sys/block/md0/md/sync_action
> > 
> > the output that appeared was as follows:
> > 
> > frozen
> > idle 
> > Does that mean that a hibernate or suspend followed by a resume is 
> > sufficient
> > to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
> > patch at the start of this e-mail thread does not need any further
> > modifications?
> 
> Have no idea why it shows 'idle'. From my understanding, after resume, 
> previous
> memory is stored and MD_RECOVERY_FROZEN bit should be set. Was trying to
> understand what happens.

Hello Shaohua,

I think a user space action causes the MD_RECOVERY_FROZEN bit to be cleared.
After I had added a few debug statements in the md driver this is what I
found in the system log:

Oct 12 09:38:39 kernel: action_store: storing action check
Oct 12 09:38:39 kernel: md: data-check of RAID array md0
Oct 12 09:38:40 systemd[1]: Starting Hibernate...
Oct 12 09:38:40 kernel: md: md0: data-check interrupted.
[ powered on system manually ]
Oct 12 09:39:05 kernel: action_store: storing action check
Oct 12 09:39:05 kernel: md: data-check of RAID array md0
Oct 12 09:39:11 kernel: action_store: storing action idle
Oct 12 09:39:11 kernel: md: md0: data-check interrupted.

Anyway, how about the patch below? I have verified by adding a debug pr_info()
statement that the newly added code really clears the MD_RECOVERY_FROZEN bit.

Thanks,

Bart.


Subject: [PATCH] md: Neither resync nor reshape while the system is frozen

Some people use the md driver on laptops and use the suspend and
resume functionality. Since it is essential that submitting of
new I/O requests stops before a hibernation image is created,
interrupt the md resync and reshape actions if the system is
being frozen. Note: the resync and reshape will restart after
the system is resumed and a message similar to the following
will appear in the system log:

md: md0: data-check interrupted.

Signed-off-by: Bart Van Assche 
Cc: Shaohua Li 
Cc: linux-r...@vger.kernel.org
Cc: Ming Lei 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
---
 drivers/md/md.c | 50 +-
 drivers/md/md.h |  8 
 2 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b99584e5d6b1..8b2fc750f97f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -66,6 +66,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include "md.h"
@@ -7439,6 +7441,7 @@ static int md_thread(void *arg)
 */
 
allow_signal(SIGKILL);
+   set_freezable();
while (!kthread_should_stop()) {
 
/* We need to wait INTERRUPTIBLE so that
@@ -7449,7 +7452,7 @@ static int md_thread(void *arg)
if (signal_pending(current))
flush_signals(current);
 
-   wait_event_interruptible_timeout
+   wait_event_freezable_timeout
(thread->wqueue,
 test_bit(THREAD_WAKEUP, >flags)
 || kthread_should_stop() || kthread_should_park(),
@@ -8944,6 +8947,8 @@ static void md_stop_all_writes(void)
int need_delay = 0;
 
for_each_mddev(mddev, tmp) {
+   mddev->frozen_before_suspend =
+   test_bit(MD_RECOVERY_FROZEN, >recovery);
if (mddev_trylock(mddev)) {
if (mddev->pers)
__md_stop_writes(mddev);
@@ -8963,6 +8968,47 @@ static void md_stop_all_writes(void)
mdelay(1000*1);
 }
 
+static void md_restore_frozen_flag(void)
+{
+   struct list_head *tmp;
+   struct mddev *mddev;
+
+   for_each_mddev(mddev, tmp) {
+   if (mddev->frozen_before_suspend)
+   set_bit(MD_RECOVERY_FROZEN, >recovery);
+   else
+   clear_bit(MD_RECOVERY_FROZEN, >recovery);
+   }
+}
+
+/*
+ * Ensure that neither 

Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-11 Thread Shaohua Li
On Wed, Oct 11, 2017 at 05:17:56PM +, Bart Van Assche wrote:
> On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> > The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which
> > will prevent md_check_recovery restarting resync/reshape. I think we need
> > preserve mddev->recovery in suspend prepare and restore after resume
> 
> Hello Shaohua,
> 
> As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
> MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I think
> it should be sufficient to save and restore the state of the
> MD_RECOVERY_FROZEN flag. However, when I ran the following test:
> * echo frozen > /sys/block/md0/md/sync_action
> * cat /sys/block/md0/md/sync_action
> * systemctl hibernate
> * (power on system again)
> * cat /sys/block/md0/md/sync_action
> 
> the output that appeared was as follows:
> 
> frozen
> idle 
> Does that mean that a hibernate or suspend followed by a resume is sufficient
> to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
> patch at the start of this e-mail thread does not need any further
> modifications?

Have no idea why it shows 'idle'. From my understanding, after resume, previous
memory is stored and MD_RECOVERY_FROZEN bit should be set. Was trying to
understand what happens.

Thanks,
Shaohua


Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-11 Thread Bart Van Assche
On Tue, 2017-10-10 at 18:48 -0700, Shaohua Li wrote:
> The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which
> will prevent md_check_recovery restarting resync/reshape. I think we need
> preserve mddev->recovery in suspend prepare and restore after resume

Hello Shaohua,

As far as I can see __md_stop_writes() sets MD_RECOVERY_FROZEN and can set
MD_RECOVERY_INTR. Since md_check_recovery() clears MD_RECOVERY_INTR I think
it should be sufficient to save and restore the state of the
MD_RECOVERY_FROZEN flag. However, when I ran the following test:
* echo frozen > /sys/block/md0/md/sync_action
* cat /sys/block/md0/md/sync_action
* systemctl hibernate
* (power on system again)
* cat /sys/block/md0/md/sync_action

the output that appeared was as follows:

frozen
idle

Does that mean that a hibernate or suspend followed by a resume is sufficient
to clear the MD_RECOVERY_FROZEN flag for the md drivers and hence that the
patch at the start of this e-mail thread does not need any further
modifications?

Thanks,

Bart.

Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-10 Thread Shaohua Li
On Tue, Oct 10, 2017 at 11:33:06PM +, Bart Van Assche wrote:
> On Tue, 2017-10-10 at 15:30 -0700, Shaohua Li wrote:
> > On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:
> > > Some people use the md driver on laptops and use the suspend and
> > > resume functionality. Since it is essential that submitting of
> > > new I/O requests stops before a hibernation image is created,
> > > interrupt the md resync and reshape actions if the system is
> > > being frozen. Note: the resync and reshape will restart after
> > > the system is resumed and a message similar to the following
> > > will appear in the system log:
> > > 
> > > md: md0: data-check interrupted.
> > 
> > Where do we restart resync and reshape?
> 
> Hello Shaohua,
> 
> My understanding of the md driver is as follows:
> - Before any write occurs, md_write_start() is called. That function clears
>   mddev->safemode and checks mddev->in_sync. If that variable is set, it is
>   cleared and md_write_start() wakes up mddev->thread and waits until the
>   superblock has been written to disk.
> - All mddev->thread implementations call md_check_recovery(). That function
>   updates the superblock by calling md_update_sb() if needed.
>   md_check_recovery() also starts a resync thread if the array is not in
>   sync. See also the comment block above md_check_recovery().
> - md_do_sync() performs the resync. If it completes successfully the
>   "in_sync" state is set (set_in_sync()). If it is interrupted the "in_sync"
>   state is not modified.
> - super_90_sync() sets the MD_SB_CLEAN flag in the on-disk superblock if the
>   "in_sync" flag is set.
> 
> In other words: if the array is in sync the MD_SB_CLEAN flag is set in the
> on-disk superblock. If a resync is needed that flag is not set. The
> MD_SB_CLEAN flag is checked when the superblock is loaded. If that flag is
> not set a resync is started.

The problem is __md_stop_writes set some bit like MD_RECOVERY_FROZEN, which
will prevent md_check_recovery restarting resync/reshape. I think we need
preserve mddev->recovery in suspend prepare and restore after resume

Thanks,
Shaohua


Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-10 Thread Bart Van Assche
On Tue, 2017-10-10 at 15:30 -0700, Shaohua Li wrote:
> On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:
> > Some people use the md driver on laptops and use the suspend and
> > resume functionality. Since it is essential that submitting of
> > new I/O requests stops before a hibernation image is created,
> > interrupt the md resync and reshape actions if the system is
> > being frozen. Note: the resync and reshape will restart after
> > the system is resumed and a message similar to the following
> > will appear in the system log:
> > 
> > md: md0: data-check interrupted.
> 
> Where do we restart resync and reshape?

Hello Shaohua,

My understanding of the md driver is as follows:
- Before any write occurs, md_write_start() is called. That function clears
  mddev->safemode and checks mddev->in_sync. If that variable is set, it is
  cleared and md_write_start() wakes up mddev->thread and waits until the
  superblock has been written to disk.
- All mddev->thread implementations call md_check_recovery(). That function
  updates the superblock by calling md_update_sb() if needed.
  md_check_recovery() also starts a resync thread if the array is not in
  sync. See also the comment block above md_check_recovery().
- md_do_sync() performs the resync. If it completes successfully the
  "in_sync" state is set (set_in_sync()). If it is interrupted the "in_sync"
  state is not modified.
- super_90_sync() sets the MD_SB_CLEAN flag in the on-disk superblock if the
  "in_sync" flag is set.

In other words: if the array is in sync the MD_SB_CLEAN flag is set in the
on-disk superblock. If a resync is needed that flag is not set. The
MD_SB_CLEAN flag is checked when the superblock is loaded. If that flag is
not set a resync is started.

Bart.


Re: [PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-10 Thread Shaohua Li
On Tue, Oct 10, 2017 at 02:03:39PM -0700, Bart Van Assche wrote:
> Some people use the md driver on laptops and use the suspend and
> resume functionality. Since it is essential that submitting of
> new I/O requests stops before a hibernation image is created,
> interrupt the md resync and reshape actions if the system is
> being frozen. Note: the resync and reshape will restart after
> the system is resumed and a message similar to the following
> will appear in the system log:
> 
> md: md0: data-check interrupted.

Where do we restart resync and reshape?
 
> Signed-off-by: Bart Van Assche 
> Cc: Shaohua Li 
> Cc: linux-r...@vger.kernel.org
> Cc: Ming Lei 
> Cc: Christoph Hellwig 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> ---
>  drivers/md/md.c | 30 +-
>  1 file changed, 29 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/md/md.c b/drivers/md/md.c
> index b99584e5d6b1..d712d3320c1d 100644
> --- a/drivers/md/md.c
> +++ b/drivers/md/md.c
> @@ -66,6 +66,8 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#include 
>  
>  #include 
>  #include "md.h"
> @@ -7439,6 +7441,7 @@ static int md_thread(void *arg)
>*/
>  
>   allow_signal(SIGKILL);
> + set_freezable();
>   while (!kthread_should_stop()) {
>  
>   /* We need to wait INTERRUPTIBLE so that
> @@ -7449,7 +7452,7 @@ static int md_thread(void *arg)
>   if (signal_pending(current))
>   flush_signals(current);
>  
> - wait_event_interruptible_timeout
> + wait_event_freezable_timeout
>   (thread->wqueue,
>test_bit(THREAD_WAKEUP, >flags)
>|| kthread_should_stop() || kthread_should_park(),
> @@ -8963,6 +8966,29 @@ static void md_stop_all_writes(void)
>   mdelay(1000*1);
>  }
>  
> +/*
> + * Ensure that neither resyncing nor reshaping occurs while the system is
> + * frozen.
> + */
> +static int md_notify_pm(struct notifier_block *bl, unsigned long state,
> + void *unused)
> +{
> + pr_debug("%s: state = %ld\n", __func__, state);
> +
> + switch (state) {
> + case PM_HIBERNATION_PREPARE:
> + case PM_SUSPEND_PREPARE:
> + case PM_RESTORE_PREPARE:
> + md_stop_all_writes();
> + break;
> + }
> + return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block md_pm_notifier = {
> + .notifier_call  = md_notify_pm,
> +};
> +
>  static int md_notify_reboot(struct notifier_block *this,
>   unsigned long code, void *x)
>  {
> @@ -9009,6 +9035,7 @@ static int __init md_init(void)
>   md_probe, NULL, NULL);
>  
>   register_reboot_notifier(_reboot_notifier);
> + register_pm_notifier(_pm_notifier);
>   raid_table_header = register_sysctl_table(raid_root_table);
>  
>   md_geninit();
> @@ -9248,6 +9275,7 @@ static __exit void md_exit(void)
>  
>   unregister_blkdev(MD_MAJOR,"md");
>   unregister_blkdev(mdp_major, "mdp");
> + unregister_pm_notifier(_pm_notifier);
>   unregister_reboot_notifier(_reboot_notifier);
>   unregister_sysctl_table(raid_table_header);
>  
> -- 
> 2.14.2
> 


[PATCH v8 03/10] md: Neither resync nor reshape while the system is frozen

2017-10-10 Thread Bart Van Assche
Some people use the md driver on laptops and use the suspend and
resume functionality. Since it is essential that submitting of
new I/O requests stops before a hibernation image is created,
interrupt the md resync and reshape actions if the system is
being frozen. Note: the resync and reshape will restart after
the system is resumed and a message similar to the following
will appear in the system log:

md: md0: data-check interrupted.

Signed-off-by: Bart Van Assche 
Cc: Shaohua Li 
Cc: linux-r...@vger.kernel.org
Cc: Ming Lei 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
---
 drivers/md/md.c | 30 +-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index b99584e5d6b1..d712d3320c1d 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -66,6 +66,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include "md.h"
@@ -7439,6 +7441,7 @@ static int md_thread(void *arg)
 */
 
allow_signal(SIGKILL);
+   set_freezable();
while (!kthread_should_stop()) {
 
/* We need to wait INTERRUPTIBLE so that
@@ -7449,7 +7452,7 @@ static int md_thread(void *arg)
if (signal_pending(current))
flush_signals(current);
 
-   wait_event_interruptible_timeout
+   wait_event_freezable_timeout
(thread->wqueue,
 test_bit(THREAD_WAKEUP, >flags)
 || kthread_should_stop() || kthread_should_park(),
@@ -8963,6 +8966,29 @@ static void md_stop_all_writes(void)
mdelay(1000*1);
 }
 
+/*
+ * Ensure that neither resyncing nor reshaping occurs while the system is
+ * frozen.
+ */
+static int md_notify_pm(struct notifier_block *bl, unsigned long state,
+   void *unused)
+{
+   pr_debug("%s: state = %ld\n", __func__, state);
+
+   switch (state) {
+   case PM_HIBERNATION_PREPARE:
+   case PM_SUSPEND_PREPARE:
+   case PM_RESTORE_PREPARE:
+   md_stop_all_writes();
+   break;
+   }
+   return NOTIFY_DONE;
+}
+
+static struct notifier_block md_pm_notifier = {
+   .notifier_call  = md_notify_pm,
+};
+
 static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
 {
@@ -9009,6 +9035,7 @@ static int __init md_init(void)
md_probe, NULL, NULL);
 
register_reboot_notifier(_reboot_notifier);
+   register_pm_notifier(_pm_notifier);
raid_table_header = register_sysctl_table(raid_root_table);
 
md_geninit();
@@ -9248,6 +9275,7 @@ static __exit void md_exit(void)
 
unregister_blkdev(MD_MAJOR,"md");
unregister_blkdev(mdp_major, "mdp");
+   unregister_pm_notifier(_pm_notifier);
unregister_reboot_notifier(_reboot_notifier);
unregister_sysctl_table(raid_table_header);
 
-- 
2.14.2