>> The reason the deadlock is that:
>>   Task                                       Btrfs-cleaner
>>   umount()
>>     down_write(&s->s_umount)
>>     sync_filesystem()
>>                                      do auto-defragment and produce
>>                                      lots of dirty pages
>>     close_ctree()
>>       wait for the end of
>>       btrfs-cleaner
> 
> why it's needed to wait for cleaner during close_ctree? I got bitten

> yesterday by (a non-deadlock) scenario, when there were tons of deleted

> snapshots, filesystem almost full, so getting and managing free space
> was slow (btrfs-cache thread was more active than btrfs-cleaner), and
> umount just did not end. interruptible by reboot only.

> 

> avoiding this particular case of waiting for cleaner:


Your patch doesn't fix the problem.
  Task                                  Btrfs-cleaner
  umount()
    down_write(&s->s_umount)
    sync_filesystem()
                                        do auto-defragment for some files
                                        and produce lots of dirty pages.
                                        do auto-defragment for the left files
                                          start_transaction
                                            reserve space
                                              shrink_delalloc()
                                                writeback_inodes_sb_nr_if_idle()
                                                  down_read(&sb->s_umount)
    close_ctree()
      stop_kthread()
        wait for the end of
        btrfs-cleaner

the deadlock still happens.

But I found you add a trylock for ->s_umount in cleaner_kthread(), this method
can fix the deadlock problem, I think. It may be introduced by the other patch,
could you send that patch to me? I found if we fail to trylock ->cleaner_mutex,
we will forget to unlock ->s_umount.

Thanks
Miao

> 
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
> 
>         mutex_lock(&root->fs_info->cleaner_mutex);
>         btrfs_run_delayed_iputs(root);
> -       btrfs_clean_old_snapshots(root);
> +       if (!btrfs_fs_closing(root->fs_info)) {
> +               /* btrfs_clean_old_snapshots(root); */
> +               wake_up_process(root->fs_info->cleaner_kthread);
> +               printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
> +       } else {
> +               printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
> +       }
>         mutex_unlock(&root->fs_info->cleaner_mutex);
> 
>         /* wait until ongoing cleanup work done */
> 
> 
> plus not even trying to call the cleaner directly, rather waking the cleaner
> thread. (attached whole work-in-progress patch).
> 
> david
> ---
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 58a232d..0651f6f 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -1617,13 +1617,14 @@ static int cleaner_kthread(void *arg)
>       struct btrfs_root *root = arg;
>  
>       do {
> +             int again = 0;
>               vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
>  
>               if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
>                       down_read_trylock(&root->fs_info->sb->s_umount) &&
>                   mutex_trylock(&root->fs_info->cleaner_mutex)) {
>                       btrfs_run_delayed_iputs(root);
> -                     btrfs_clean_old_snapshots(root);
> +                     again = btrfs_clean_one_old_snapshot(root);
>                       mutex_unlock(&root->fs_info->cleaner_mutex);
>                       btrfs_run_defrag_inodes(root->fs_info);
>                       up_read(&root->fs_info->sb->s_umount);
> @@ -1631,7 +1632,7 @@ static int cleaner_kthread(void *arg)
>  
>               if (freezing(current)) {
>                       refrigerator();
> -             } else {
> +             } else if (!again) {//FIXME: check again now directly from 
> dead_roots?
>                       set_current_state(TASK_INTERRUPTIBLE);
>                       if (!kthread_should_stop())
>                               schedule();
> @@ -3064,7 +3066,13 @@ int btrfs_commit_super(struct btrfs_root *root)
>  
>       mutex_lock(&root->fs_info->cleaner_mutex);
>       btrfs_run_delayed_iputs(root);
> -     btrfs_clean_old_snapshots(root);
> +     if (!btrfs_fs_closing(root->fs_info)) {
> +             /* btrfs_clean_old_snapshots(root); */
> +             wake_up_process(root->fs_info->cleaner_kthread);
> +             printk(KERN_DEBUG "btrfs: wake cleaner from commit_super\n");
> +     } else {
> +             printk(KERN_DEBUG "btrfs: skip cleaning when going down\n");
> +     }
>       mutex_unlock(&root->fs_info->cleaner_mutex);
>  
>       /* wait until ongoing cleanup work done */
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index ec1e0c6..3aba911 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -6995,6 +6995,11 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
>       wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root);
>  
>       while (1) {
> +             if (btrfs_fs_closing(root->fs_info)) {
> +                     printk(KERN_DEBUG "btrfs: drop early exit\n");
> +                     err = -EAGAIN;
> +                     goto out_end_trans;
> +             }
>               ret = walk_down_tree(trans, root, path, wc);
>               if (ret < 0) {
>                       err = ret;
> diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
> index 922e6ec..c9dc857 100644
> --- a/fs/btrfs/relocation.c
> +++ b/fs/btrfs/relocation.c
> @@ -4060,6 +4060,7 @@ int btrfs_relocate_block_group(struct btrfs_root 
> *extent_root, u64 group_start)
>       while (1) {
>               mutex_lock(&fs_info->cleaner_mutex);
>  
> +             // FIXME: wake cleaner
>               btrfs_clean_old_snapshots(fs_info->tree_root);
>               ret = relocate_block_group(rc);
>  
> diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
> index de2942f..3d83f6b 100644
> --- a/fs/btrfs/transaction.c
> +++ b/fs/btrfs/transaction.c
> @@ -783,7 +783,7 @@ static noinline int commit_cowonly_roots(struct 
> btrfs_trans_handle *trans,
>  int btrfs_add_dead_root(struct btrfs_root *root)
>  {
>       spin_lock(&root->fs_info->trans_lock);
> -     list_add(&root->root_list, &root->fs_info->dead_roots);
> +     list_add_tail(&root->root_list, &root->fs_info->dead_roots);
>       spin_unlock(&root->fs_info->trans_lock);
>       return 0;
>  }
> @@ -1533,7 +1533,45 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root)
>                       ret = btrfs_drop_snapshot(root, NULL, 0, 0);
>               else
>                       ret =btrfs_drop_snapshot(root, NULL, 1, 0);
> -             BUG_ON(ret < 0);
> +             BUG_ON(ret < 0 && ret != -EAGAIN);
>       }
>       return 0;
>  }
> +/*
> + * return < 0 if error
> + * 0 if there are no more dead_roots at the time of call
> + * 1 there are more to be processed, call me again
> + *
> + * (racy)
> + */
> +int btrfs_clean_one_old_snapshot(struct btrfs_root *root)
> +{
> +     int ret;
> +     int run_again = 1;
> +     struct btrfs_fs_info *fs_info = root->fs_info;
> +
> +     if (root->fs_info->sb->s_flags & MS_RDONLY) {
> +             printk(KERN_WARNING "btrfs: cleaner called for RO fs!\n");
> +     }
> +
> +     spin_lock(&fs_info->trans_lock);
> +     root = list_first_entry(&fs_info->dead_roots,
> +                     struct btrfs_root, root_list);
> +     list_del(&root->root_list);
> +     if (list_empty(&fs_info->dead_roots))
> +             run_again = 0;
> +     spin_unlock(&fs_info->trans_lock);
> +
> +     printk(KERN_DEBUG "btrfs: cleaner removing %llu\n",
> +                     (unsigned long long)root->objectid);
> +
> +     btrfs_kill_all_delayed_nodes(root);
> +
> +     if (btrfs_header_backref_rev(root->node) <
> +                     BTRFS_MIXED_BACKREF_REV)
> +             ret = btrfs_drop_snapshot(root, NULL, 0, 0);
> +     else
> +             ret = btrfs_drop_snapshot(root, NULL, 1, 0);
> +     BUG_ON(ret < 0 && ret != -EAGAIN);
> +     return run_again || ret == -EAGAIN;
> +}
> diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
> index fe27379..7071ca5 100644
> --- a/fs/btrfs/transaction.h
> +++ b/fs/btrfs/transaction.h
> @@ -94,6 +94,7 @@ int btrfs_write_and_wait_transaction(struct 
> btrfs_trans_handle *trans,
>  int btrfs_add_dead_root(struct btrfs_root *root);
>  int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
>  int btrfs_clean_old_snapshots(struct btrfs_root *root);
> +int btrfs_clean_one_old_snapshot(struct btrfs_root *root);
>  int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
>                            struct btrfs_root *root);
>  int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
> ---
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to