These fields may both change while the image is mapped if a snapshot
is created or deleted or the image is resized.  They are guarded by
rbd_dev->header_rwsem, so hold that while reading them, and store a
local copy to refer to outside of the critical section. The local copy
will stay consistent since the snapshot context is reference counted,
and the mapping size is just a u64. This prevents torn loads from
giving us inconsistent values.

Move reading header.snapc into the caller of rbd_img_request_create()
so that we only need to take the semaphore once. The read-only caller,
rbd_parent_request_create() can just pass NULL for snapc, since the
snapshot context is only relevant for writes.

Signed-off-by: Josh Durgin <[email protected]>
---
 drivers/block/rbd.c |   31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
index 4c95b50..b6676b6 100644
--- a/drivers/block/rbd.c
+++ b/drivers/block/rbd.c
@@ -1961,7 +1961,8 @@ static bool rbd_dev_parent_get(struct rbd_device *rbd_dev)
 static struct rbd_img_request *rbd_img_request_create(
                                        struct rbd_device *rbd_dev,
                                        u64 offset, u64 length,
-                                       bool write_request)
+                                       bool write_request,
+                                       struct ceph_snap_context *snapc)
 {
        struct rbd_img_request *img_request;
 
@@ -1969,12 +1970,6 @@ static struct rbd_img_request *rbd_img_request_create(
        if (!img_request)
                return NULL;
 
-       if (write_request) {
-               down_read(&rbd_dev->header_rwsem);
-               ceph_get_snap_context(rbd_dev->header.snapc);
-               up_read(&rbd_dev->header_rwsem);
-       }
-
        img_request->rq = NULL;
        img_request->rbd_dev = rbd_dev;
        img_request->offset = offset;
@@ -1982,7 +1977,7 @@ static struct rbd_img_request *rbd_img_request_create(
        img_request->flags = 0;
        if (write_request) {
                img_request_write_set(img_request);
-               img_request->snapc = rbd_dev->header.snapc;
+               img_request->snapc = snapc;
        } else {
                img_request->snap_id = rbd_dev->spec->snap_id;
        }
@@ -2038,8 +2033,8 @@ static struct rbd_img_request *rbd_parent_request_create(
        rbd_assert(obj_request->img_request);
        rbd_dev = obj_request->img_request->rbd_dev;
 
-       parent_request = rbd_img_request_create(rbd_dev->parent,
-                                               img_offset, length, false);
+       parent_request = rbd_img_request_create(rbd_dev->parent, img_offset,
+                                               length, false, NULL);
        if (!parent_request)
                return NULL;
 
@@ -3065,8 +3060,10 @@ static void rbd_request_fn(struct request_queue *q)
        while ((rq = blk_fetch_request(q))) {
                bool write_request = rq_data_dir(rq) == WRITE;
                struct rbd_img_request *img_request;
+               struct ceph_snap_context *snapc = NULL;
                u64 offset;
                u64 length;
+               u64 mapping_size;
 
                /* Ignore any non-FS requests that filter through. */
 
@@ -3120,8 +3117,16 @@ static void rbd_request_fn(struct request_queue *q)
                        goto end_request;       /* Shouldn't happen */
                }
 
+               down_read(&rbd_dev->header_rwsem);
+               mapping_size = rbd_dev->mapping.size;
+               if (op_type != OBJ_OP_READ) {
+                       snapc = rbd_dev->header.snapc;
+                       ceph_get_snap_context(snapc);
+               }
+               up_read(&rbd_dev->header_rwsem);
+
                result = -EIO;
-               if (offset + length > rbd_dev->mapping.size) {
+               if (offset + length > mapping_size) {
                        rbd_warn(rbd_dev, "beyond EOD (%llu~%llu > %llu)\n",
                                offset, length, rbd_dev->mapping.size);
                        goto end_request;
@@ -3129,7 +3134,7 @@ static void rbd_request_fn(struct request_queue *q)
 
                result = -ENOMEM;
                img_request = rbd_img_request_create(rbd_dev, offset, length,
-                                                       write_request);
+                                                       write_request, snapc);
                if (!img_request)
                        goto end_request;
 
@@ -3147,6 +3152,8 @@ end_request:
                        rbd_warn(rbd_dev, "%s %llx at %llx result %d\n",
                                write_request ? "write" : "read",
                                length, offset, result);
+                       if (snapc)
+                               ceph_put_snap_context(snapc);
 
                        __blk_end_request_all(rq, result);
                }
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to