Re: [PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-09-03 Thread Miao Xie
On Tue, 2 Sep 2014 09:05:15 -0400, Chris Mason wrote:
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 08e65e9..56b1546 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int 
> err)
>  
>   fs_info = end_io_wq->info;
>   end_io_wq->error = err;
> - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
> +
> + if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
> + btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
> + NULL);
> + else
> + INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);

 It's not clear why this one is using INIT_WORK instead of
 btrfs_init_work, or why we're calling directly into queue_work instead
 of btrfs_queue_work.  What am I missing?
>>>
>>> I'm sorry that I forgot writing the explanation in this patch's changlog,
>>> I wrote it in Patch 0.
>>>
>>> "2.When the io on the mirror ends, we will insert the endio work into the
>>>system workqueue, not btrfs own endio workqueue, because the original
>>>endio work is still blocked in the btrfs endio workqueue, if we insert
>>>the endio work of the io on the mirror into that workqueue, deadlock
>>>would happen."
>>
>> Can you elaborate the deadlock?
>>
>> Now that buffer read can insert a subsequent read-mirror bio into btrfs endio
>> workqueue without problems, what's the difference?
> 
> We do have problems if we're inserting dependent items in the same
> workqueue.
> 
> Miao, please make a repair workqueue.  I'll also have a use for it in
> the raid56 parity work I think.

OK, I'll update the patch soon.

Thanks
Miao

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-09-02 Thread Chris Mason


On 09/02/2014 08:33 AM, Liu Bo wrote:
> On Mon, Sep 01, 2014 at 02:56:15PM +0800, Miao Xie wrote:
>> On Fri, 29 Aug 2014 14:31:48 -0400, Chris Mason wrote:
>>> On 07/29/2014 05:24 AM, Miao Xie wrote:
 This patch implement data repair function when direct read fails.

 The detail of the implementation is:
 - When we find the data is not right, we try to read the data from the 
 other
   mirror.
 - After we get right data, we write it back to the corrupted mirror.
 - And if the data on the new mirror is still corrupted, we will try next
   mirror until we read right data or all the mirrors are traversed.
 - After the above work, we set the uptodate flag according to the result.

 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index 08e65e9..56b1546 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int 
 err)
  
fs_info = end_io_wq->info;
end_io_wq->error = err;
 -  btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 +
 +  if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
 +  btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
 +  NULL);
 +  else
 +  INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);
>>>
>>> It's not clear why this one is using INIT_WORK instead of
>>> btrfs_init_work, or why we're calling directly into queue_work instead
>>> of btrfs_queue_work.  What am I missing?
>>
>> I'm sorry that I forgot writing the explanation in this patch's changlog,
>> I wrote it in Patch 0.
>>
>> "2.When the io on the mirror ends, we will insert the endio work into the
>>system workqueue, not btrfs own endio workqueue, because the original
>>endio work is still blocked in the btrfs endio workqueue, if we insert
>>the endio work of the io on the mirror into that workqueue, deadlock
>>would happen."
> 
> Can you elaborate the deadlock?
> 
> Now that buffer read can insert a subsequent read-mirror bio into btrfs endio
> workqueue without problems, what's the difference?

We do have problems if we're inserting dependent items in the same
workqueue.

Miao, please make a repair workqueue.  I'll also have a use for it in
the raid56 parity work I think.

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-09-02 Thread Liu Bo
On Mon, Sep 01, 2014 at 02:56:15PM +0800, Miao Xie wrote:
> On Fri, 29 Aug 2014 14:31:48 -0400, Chris Mason wrote:
> > On 07/29/2014 05:24 AM, Miao Xie wrote:
> >> This patch implement data repair function when direct read fails.
> >>
> >> The detail of the implementation is:
> >> - When we find the data is not right, we try to read the data from the 
> >> other
> >>   mirror.
> >> - After we get right data, we write it back to the corrupted mirror.
> >> - And if the data on the new mirror is still corrupted, we will try next
> >>   mirror until we read right data or all the mirrors are traversed.
> >> - After the above work, we set the uptodate flag according to the result.
> >>
> >> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> >> index 08e65e9..56b1546 100644
> >> --- a/fs/btrfs/disk-io.c
> >> +++ b/fs/btrfs/disk-io.c
> >> @@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int 
> >> err)
> >>  
> >>fs_info = end_io_wq->info;
> >>end_io_wq->error = err;
> >> -  btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
> >> +
> >> +  if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
> >> +  btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
> >> +  NULL);
> >> +  else
> >> +  INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);
> > 
> > It's not clear why this one is using INIT_WORK instead of
> > btrfs_init_work, or why we're calling directly into queue_work instead
> > of btrfs_queue_work.  What am I missing?
> 
> I'm sorry that I forgot writing the explanation in this patch's changlog,
> I wrote it in Patch 0.
> 
> "2.When the io on the mirror ends, we will insert the endio work into the
>system workqueue, not btrfs own endio workqueue, because the original
>endio work is still blocked in the btrfs endio workqueue, if we insert
>the endio work of the io on the mirror into that workqueue, deadlock
>would happen."

Can you elaborate the deadlock?

Now that buffer read can insert a subsequent read-mirror bio into btrfs endio
workqueue without problems, what's the difference?

thanks,
-liubo

> 
> Could you add it into the changelog of this patch when you apply this patch?
> 
> Thanks
> Miao
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-08-31 Thread Miao Xie
On Fri, 29 Aug 2014 14:31:48 -0400, Chris Mason wrote:
> On 07/29/2014 05:24 AM, Miao Xie wrote:
>> This patch implement data repair function when direct read fails.
>>
>> The detail of the implementation is:
>> - When we find the data is not right, we try to read the data from the other
>>   mirror.
>> - After we get right data, we write it back to the corrupted mirror.
>> - And if the data on the new mirror is still corrupted, we will try next
>>   mirror until we read right data or all the mirrors are traversed.
>> - After the above work, we set the uptodate flag according to the result.
>>
>> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
>> index 08e65e9..56b1546 100644
>> --- a/fs/btrfs/disk-io.c
>> +++ b/fs/btrfs/disk-io.c
>> @@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
>>  
>>  fs_info = end_io_wq->info;
>>  end_io_wq->error = err;
>> -btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
>> +
>> +if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
>> +btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
>> +NULL);
>> +else
>> +INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);
> 
> It's not clear why this one is using INIT_WORK instead of
> btrfs_init_work, or why we're calling directly into queue_work instead
> of btrfs_queue_work.  What am I missing?

I'm sorry that I forgot writing the explanation in this patch's changlog,
I wrote it in Patch 0.

"2.When the io on the mirror ends, we will insert the endio work into the
   system workqueue, not btrfs own endio workqueue, because the original
   endio work is still blocked in the btrfs endio workqueue, if we insert
   the endio work of the io on the mirror into that workqueue, deadlock
   would happen."

Could you add it into the changelog of this patch when you apply this patch?

Thanks
Miao
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-08-29 Thread Chris Mason
On 07/29/2014 05:24 AM, Miao Xie wrote:
> This patch implement data repair function when direct read fails.
> 
> The detail of the implementation is:
> - When we find the data is not right, we try to read the data from the other
>   mirror.
> - After we get right data, we write it back to the corrupted mirror.
> - And if the data on the new mirror is still corrupted, we will try next
>   mirror until we read right data or all the mirrors are traversed.
> - After the above work, we set the uptodate flag according to the result.
> 
> diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
> index 08e65e9..56b1546 100644
> --- a/fs/btrfs/disk-io.c
> +++ b/fs/btrfs/disk-io.c
> @@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
>  
>   fs_info = end_io_wq->info;
>   end_io_wq->error = err;
> - btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
> +
> + if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
> + btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
> + NULL);
> + else
> + INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);

It's not clear why this one is using INIT_WORK instead of
btrfs_init_work, or why we're calling directly into queue_work instead
of btrfs_queue_work.  What am I missing?

-chris
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 11/12] Btrfs: implement repair function when direct read fails

2014-07-29 Thread Miao Xie
This patch implement data repair function when direct read fails.

The detail of the implementation is:
- When we find the data is not right, we try to read the data from the other
  mirror.
- After we get right data, we write it back to the corrupted mirror.
- And if the data on the new mirror is still corrupted, we will try next
  mirror until we read right data or all the mirrors are traversed.
- After the above work, we set the uptodate flag according to the result.

Signed-off-by: Miao Xie 
---
Changelog v1-v2:
- None
---
 fs/btrfs/btrfs_inode.h |   2 +-
 fs/btrfs/disk-io.c |  43 ++--
 fs/btrfs/disk-io.h |   1 +
 fs/btrfs/extent_io.c   |  12 ++-
 fs/btrfs/extent_io.h   |   5 +-
 fs/btrfs/inode.c   | 276 +
 6 files changed, 300 insertions(+), 39 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 745fca40..20d4975 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -271,7 +271,7 @@ struct btrfs_dio_private {
 * The original bio may be splited to several sub-bios, this is
 * done during endio of sub-bios
 */
-   int (*subio_endio)(struct inode *, struct btrfs_io_bio *);
+   int (*subio_endio)(struct inode *, struct btrfs_io_bio *, int);
 };
 
 /*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9..56b1546 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -691,6 +691,27 @@ static int btree_io_failed_hook(struct page *page, int 
failed_mirror)
return -EIO;/* we fixed nothing */
 }
 
+static inline void do_end_workqueue_fn(struct end_io_wq *end_io_wq)
+{
+   struct bio *bio = end_io_wq->bio;
+
+   bio->bi_private = end_io_wq->private;
+   bio->bi_end_io = end_io_wq->end_io;
+   bio_endio_nodec(bio, end_io_wq->error);
+   kfree(end_io_wq);
+}
+
+static void dio_end_workqueue_fn(struct work_struct *work)
+{
+   struct btrfs_work *bwork;
+   struct end_io_wq *end_io_wq;
+
+   bwork = container_of(work, struct btrfs_work, normal_work);
+   end_io_wq = container_of(bwork, struct end_io_wq, work);
+
+   do_end_workqueue_fn(end_io_wq);
+}
+
 static void end_workqueue_bio(struct bio *bio, int err)
 {
struct end_io_wq *end_io_wq = bio->bi_private;
@@ -698,7 +719,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
 
fs_info = end_io_wq->info;
end_io_wq->error = err;
-   btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
+
+   if (likely(end_io_wq->metadata != BTRFS_WQ_ENDIO_DIO_REPAIR))
+   btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL,
+   NULL);
+   else
+   INIT_WORK(&end_io_wq->work.normal_work, dio_end_workqueue_fn);
 
if (bio->bi_rw & REQ_WRITE) {
if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
@@ -714,7 +740,9 @@ static void end_workqueue_bio(struct bio *bio, int err)
btrfs_queue_work(fs_info->endio_write_workers,
 &end_io_wq->work);
} else {
-   if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+   if (unlikely(end_io_wq->metadata == BTRFS_WQ_ENDIO_DIO_REPAIR))
+   queue_work(system_wq, &end_io_wq->work.normal_work);
+   else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
btrfs_queue_work(fs_info->endio_raid56_workers,
 &end_io_wq->work);
else if (end_io_wq->metadata)
@@ -738,6 +766,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct 
bio *bio,
int metadata)
 {
struct end_io_wq *end_io_wq;
+
end_io_wq = kmalloc(sizeof(*end_io_wq), GFP_NOFS);
if (!end_io_wq)
return -ENOMEM;
@@ -1730,18 +1759,10 @@ static int setup_bdi(struct btrfs_fs_info *info, struct 
backing_dev_info *bdi)
  */
 static void end_workqueue_fn(struct btrfs_work *work)
 {
-   struct bio *bio;
struct end_io_wq *end_io_wq;
-   int error;
 
end_io_wq = container_of(work, struct end_io_wq, work);
-   bio = end_io_wq->bio;
-
-   error = end_io_wq->error;
-   bio->bi_private = end_io_wq->private;
-   bio->bi_end_io = end_io_wq->end_io;
-   kfree(end_io_wq);
-   bio_endio_nodec(bio, error);
+   do_end_workqueue_fn(end_io_wq);
 }
 
 static int cleaner_kthread(void *arg)
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index 23ce3ce..4fde7a0 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -30,6 +30,7 @@ enum {
BTRFS_WQ_ENDIO_METADATA = 1,
BTRFS_WQ_ENDIO_FREE_SPACE = 2,
BTRFS_WQ_ENDIO_RAID56 = 3,
+   BTRFS_WQ_ENDIO_DIO_REPAIR = 4,
 };
 
 static inline u64 btrfs_sb_offset(int mirror)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 8082220..31600ef 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/exten