Re: [PATCH] Btrfs: unlock extent when the dio completes for reads

2012-08-01 Thread Josef Bacik
On Tue, Jul 31, 2012 at 03:30:49PM -0600, Zach Brown wrote:
  +static void btrfs_direct_read_iodone(struct kiocb *iocb, loff_t offset,
  +ssize_t bytes, void *private, int ret,
  +bool is_async)
  +{
  +   struct inode *inode = iocb-ki_filp-f_mapping-host;
  +   u64 lockend;
  +
  +   if (get_state_private(BTRFS_I(inode)-io_tree, offset, lockend)) {
  +   WARN_ON(1);
  +   goto out;
  +   }
  +
  +   unlock_extent(BTRFS_I(inode)-io_tree, offset, lockend);
  +out:
  +   if (is_async)
  +   aio_complete(iocb, ret, 0);
  +   inode_dio_done(inode);
  +}
 
 Hmm.  I'm worried that this can be called from interrupt context and the
 state/extent calls there seem to use bare spin locks.  Did you run this
 with lockdep?  Am I confused?
 

Our read completions are run in a helper thread since we need to do things like
clear the extent state stuff and such so it's safe in this regard.  Course I hit
a lockup with xfstests 91 because there are some cases where we can return
errors without doing the dio complete (rightly so since we haven't setup the dio
yet), so now I have to figure out how to know when we've been unlocked via dio
complete or that I've gotten an error back and already unlocked it...

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: unlock extent when the dio completes for reads V2

2012-08-01 Thread Josef Bacik
A deadlock in xfstests 113 was uncovered by commit

d187663ef24cd3d033f0cbf2867e70b36a3a90b8

This is because we would not return EIOCBQUEUED for short AIO reads, instead
we'd wait for the DIO to complete and then return the amount of data we
transferred, which would allow our stuff to unlock the remaning amount.  But
with this change this no longer happens, so if we have a short AIO read (for
example if we try to read past EOF), we could leave the section from EOF to
the end of where we tried to read locked.  To fix this we need to store the
end of the region we've locked in our locked state bit and then only unlock
when the dio has finished completely.  This makes sure we always unlock the
entire range we locked when reading.  Before this patch we would hang when
running 113, no this no longer happens.  Thanks to Zach Brown for the idea,

Signed-off-by: Josef Bacik jba...@fusionio.com
---
V1-V2: fix a problem where we didn't unlock the range if io was never
submitted.
 fs/btrfs/inode.c |   38 --
 1 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2..4b82ae2 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5810,9 +5810,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, 
sector_t iblock,
if (!create  (em-block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, em-flags))) {
free_extent_map(em);
-   /* DIO will do one hole at a time, so just unlock a sector */
-   unlock_extent(BTRFS_I(inode)-io_tree, start,
- start + root-sectorsize - 1);
return 0;
}
 
@@ -5961,8 +5958,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int 
err)
bvec++;
} while (bvec = bvec_end);
 
-   unlock_extent(BTRFS_I(inode)-io_tree, dip-logical_offset,
- dip-logical_offset + dip-bytes - 1);
bio-bi_private = dip-private;
 
kfree(dip-csums);
@@ -6340,6 +6335,26 @@ static ssize_t check_direct_IO(struct btrfs_root *root, 
int rw, struct kiocb *io
 out:
return retval;
 }
+
+static void btrfs_direct_read_iodone(struct kiocb *iocb, loff_t offset,
+ssize_t bytes, void *private, int ret,
+bool is_async)
+{
+   struct inode *inode = iocb-ki_filp-f_mapping-host;
+   u64 lockend;
+
+   if (get_state_private(BTRFS_I(inode)-io_tree, offset, lockend)) {
+   WARN_ON(1);
+   goto out;
+   }
+
+   unlock_extent(BTRFS_I(inode)-io_tree, offset, lockend);
+out:
+   if (is_async)
+   aio_complete(iocb, ret, 0);
+   inode_dio_done(inode);
+}
+
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
@@ -6353,6 +6368,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
int writing = rw  WRITE;
int write_bits = 0;
size_t count = iov_length(iov, nr_segs);
+   dio_iodone_t *iodone = NULL;
 
if (check_direct_IO(BTRFS_I(inode)-root, rw, iocb, iov,
offset, nr_segs)) {
@@ -6438,6 +6454,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 1, 0, cached_state, GFP_NOFS);
goto out;
}
+   } else {
+   set_state_private(BTRFS_I(inode)-io_tree, lockstart, lockend);
+   iodone = btrfs_direct_read_iodone;
}
 
free_extent_state(cached_state);
@@ -6445,9 +6464,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb 
*iocb,
 
ret = __blockdev_direct_IO(rw, iocb, inode,
   BTRFS_I(inode)-root-fs_info-fs_devices-latest_bdev,
-  iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+  iov, offset, nr_segs, btrfs_get_blocks_direct, iodone,
   btrfs_submit_direct, 0);
 
+   /*
+* If we're reading we will be unlocked by the iodone call, unless ret
+* is 0 then we need to unlock since we didn't submit any io.
+*/
+   if (!writing  ret)
+   goto out;
+
if (ret  0  ret != -EIOCBQUEUED) {
clear_extent_bit(BTRFS_I(inode)-io_tree, offset,
  offset + iov_length(iov, nr_segs) - 1,
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: unlock extent when the dio completes for reads

2012-07-31 Thread Josef Bacik
A deadlock in xfstests 113 was uncovered by commit

d187663ef24cd3d033f0cbf2867e70b36a3a90b8

This is because we would not return EIOCBQUEUED for short AIO reads, instead
we'd wait for the DIO to complete and then return the amount of data we
transferred, which would allow our stuff to unlock the remaning amount.  But
with this change this no longer happens, so if we have a short AIO read (for
example if we try to read past EOF), we could leave the section from EOF to
the end of where we tried to read locked.  To fix this we need to store the
end of the region we've locked in our locked state bit and then only unlock
when the dio has finished completely.  This makes sure we always unlock the
entire range we locked when reading.  Before this patch we would hang when
running 113, no this no longer happens.  Thanks to Zach Brown for the idea,

Signed-off-by: Josef Bacik jba...@fusionio.com
---
 fs/btrfs/inode.c |   35 +--
 1 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2..c2f9300 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5810,9 +5810,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, 
sector_t iblock,
if (!create  (em-block_start == EXTENT_MAP_HOLE ||
test_bit(EXTENT_FLAG_PREALLOC, em-flags))) {
free_extent_map(em);
-   /* DIO will do one hole at a time, so just unlock a sector */
-   unlock_extent(BTRFS_I(inode)-io_tree, start,
- start + root-sectorsize - 1);
return 0;
}
 
@@ -5961,8 +5958,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int 
err)
bvec++;
} while (bvec = bvec_end);
 
-   unlock_extent(BTRFS_I(inode)-io_tree, dip-logical_offset,
- dip-logical_offset + dip-bytes - 1);
bio-bi_private = dip-private;
 
kfree(dip-csums);
@@ -6340,6 +6335,26 @@ static ssize_t check_direct_IO(struct btrfs_root *root, 
int rw, struct kiocb *io
 out:
return retval;
 }
+
+static void btrfs_direct_read_iodone(struct kiocb *iocb, loff_t offset,
+ssize_t bytes, void *private, int ret,
+bool is_async)
+{
+   struct inode *inode = iocb-ki_filp-f_mapping-host;
+   u64 lockend;
+
+   if (get_state_private(BTRFS_I(inode)-io_tree, offset, lockend)) {
+   WARN_ON(1);
+   goto out;
+   }
+
+   unlock_extent(BTRFS_I(inode)-io_tree, offset, lockend);
+out:
+   if (is_async)
+   aio_complete(iocb, ret, 0);
+   inode_dio_done(inode);
+}
+
 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
const struct iovec *iov, loff_t offset,
unsigned long nr_segs)
@@ -6353,6 +6368,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
int writing = rw  WRITE;
int write_bits = 0;
size_t count = iov_length(iov, nr_segs);
+   dio_iodone_t *iodone = NULL;
 
if (check_direct_IO(BTRFS_I(inode)-root, rw, iocb, iov,
offset, nr_segs)) {
@@ -6438,6 +6454,9 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 1, 0, cached_state, GFP_NOFS);
goto out;
}
+   } else {
+   set_state_private(BTRFS_I(inode)-io_tree, lockstart, lockend);
+   iodone = btrfs_direct_read_iodone;
}
 
free_extent_state(cached_state);
@@ -6445,9 +6464,13 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb 
*iocb,
 
ret = __blockdev_direct_IO(rw, iocb, inode,
   BTRFS_I(inode)-root-fs_info-fs_devices-latest_bdev,
-  iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
+  iov, offset, nr_segs, btrfs_get_blocks_direct, iodone,
   btrfs_submit_direct, 0);
 
+   /* If we're reading we will be unlocked by the iodone call */
+   if (!writing)
+   goto out;
+
if (ret  0  ret != -EIOCBQUEUED) {
clear_extent_bit(BTRFS_I(inode)-io_tree, offset,
  offset + iov_length(iov, nr_segs) - 1,
-- 
1.7.7.6

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: unlock extent when the dio completes for reads

2012-07-31 Thread Zach Brown
 +static void btrfs_direct_read_iodone(struct kiocb *iocb, loff_t offset,
 +  ssize_t bytes, void *private, int ret,
 +  bool is_async)
 +{
 + struct inode *inode = iocb-ki_filp-f_mapping-host;
 + u64 lockend;
 +
 + if (get_state_private(BTRFS_I(inode)-io_tree, offset, lockend)) {
 + WARN_ON(1);
 + goto out;
 + }
 +
 + unlock_extent(BTRFS_I(inode)-io_tree, offset, lockend);
 +out:
 + if (is_async)
 + aio_complete(iocb, ret, 0);
 + inode_dio_done(inode);
 +}

Hmm.  I'm worried that this can be called from interrupt context and the
state/extent calls there seem to use bare spin locks.  Did you run this
with lockdep?  Am I confused?

- z
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html