On Tue, Aug 28, 2018 at 07:33:25PM -0400, Kent Overstreet wrote:
> I just came across bi_done and your patch that added it -
> f9df1cd99e bio: add bvec_iter rewind API
> 
> I invite you to think through what happens to a bio that gets split by 
> something
> further down the stack, and then rewound after completion:
> 
> To create the first half of the split, you just truncate the iterator by 
> setting
> bio->bi_iter.bi_size to be the length you want. So if the original bio becomes
> the first half of the split, we've lost the original value for bi_size. After
> completion, bio_rewind_iter will rewind the bio to the start position the
> original code expects, but the bio will _not_ have the same size as it did 
> when
> that code submitted it.
> 
> So if bio_rewind_iter() is being used because we want to restore the bio to 
> what
> it was when we submitted it, this is bogus.
> 
> To create the second half of the split, we just advance the bio's iterator up 
> to
> where we want the split to start. So this shouldn't prevent us from restoring
> the bio to the state it was in when it was created by rewinding it.
> 
> Except if you look at bio_split(), it sets bi_done to 0, and git blame reveals
> an interesting commit message...
> 
>     block: reset bi_iter.bi_done after splitting bio
>     
>     After the bio has been updated to represent the remaining sectors, reset
>     bi_done so bio_rewind_iter() does not rewind further than it should.
>     
>     This resolves a bio_integrity_process() failure on reads where the
>     original request was split.
> 
> *sigh*
> 
> The original code was bogus, and so was the fix. If a bio is split _AFTER_ a
> chunk of code in the stack sees it - and wants to restore it to the state it 
> was
> it when it saw it after it completes - then not touching bi_done gives us the
> result we want. But if the bio was split _BEFORE_ that chunk of code sees it,
> rewinding it without ever touching bi_done will rewind it to be _LARGER_ than
> the bio that chunk of code saw.
> 
> So what should bi_done be? There is no correct answer - fundamentally, bi_done
> _cannot_ do what you're trying to do with it. Advancing or truncating an
> iterator destroys information, and stacked layers can manipulate bio iterators
> in arbitrary ways.
> 
> The correct way, the ONLY way, to restore a bio's iterator to a previous state
> after it's been submitted and other code has messed with it, is to save the
> entire iterator state: before you submit the bio, you save a copy of
> bio->bi_iter in your driver's private state, and then you restore it after
> completion.

I agree, and it isn't good to introduce extra update of .bi_done in fast path
too, and it just makes things complicated in concept.

Maybe we can do the following way, and looks it works in scsi_debug/dix
test.

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 67b5fb861a51..839c332069a9 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio)
        if (bio_data_dir(bio) == WRITE) {
                bio_integrity_process(bio, &bio->bi_iter,
                                      bi->profile->generate_fn);
+       } else {
+               bip->bio_iter = bio->bi_iter;
        }
        return true;
 
@@ -331,19 +333,13 @@ static void bio_integrity_verify_fn(struct work_struct 
*work)
                container_of(work, struct bio_integrity_payload, bip_work);
        struct bio *bio = bip->bip_bio;
        struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
-       struct bvec_iter iter = bio->bi_iter;
 
        /*
         * At the moment verify is called bio's iterator was advanced
         * during split and completion, we need to rewind iterator to
         * it's original position.
         */
-       if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
-               bio->bi_status = bio_integrity_process(bio, &iter,
-                                                      bi->profile->verify_fn);
-       } else {
-               bio->bi_status = BLK_STS_IOERR;
-       }
+       bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, 
bi->profile->verify_fn);
 
        bio_integrity_free(bio);
        bio_endio(bio);
diff --git a/block/bio.c b/block/bio.c
index b12966e415d3..529c1a65b1e8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -283,6 +283,8 @@ void bio_init(struct bio *bio, struct bio_vec *table,
        atomic_set(&bio->__bi_remaining, 1);
        atomic_set(&bio->__bi_cnt, 1);
 
+       WARN_ON(max_vecs > UIO_MAXIOV);
+
        bio->bi_io_vec = table;
        bio->bi_max_vecs = max_vecs;
 }
@@ -1807,7 +1809,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
                bio_integrity_trim(split);
 
        bio_advance(bio, split->bi_iter.bi_size);
-       bio->bi_iter.bi_done = 0;
 
        if (bio_flagged(bio, BIO_TRACE_COMPLETION))
                bio_set_flag(split, BIO_TRACE_COMPLETION);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 51371740d2a8..14b4fa266357 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -170,27 +170,11 @@ static inline void bio_advance_iter(struct bio *bio, 
struct bvec_iter *iter,
 {
        iter->bi_sector += bytes >> 9;
 
-       if (bio_no_advance_iter(bio)) {
+       if (bio_no_advance_iter(bio))
                iter->bi_size -= bytes;
-               iter->bi_done += bytes;
-       } else {
+       else
                bvec_iter_advance(bio->bi_io_vec, iter, bytes);
                /* TODO: It is reasonable to complete bio with error here. */
-       }
-}
-
-static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
-               unsigned int bytes)
-{
-       iter->bi_sector -= bytes >> 9;
-
-       if (bio_no_advance_iter(bio)) {
-               iter->bi_size += bytes;
-               iter->bi_done -= bytes;
-               return true;
-       }
-
-       return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)                  \
@@ -353,6 +337,8 @@ struct bio_integrity_payload {
        unsigned short          bip_max_vcnt;   /* integrity bio_vec slots */
        unsigned short          bip_flags;      /* control flags */
 
+       struct bvec_iter        bio_iter;       /* for rewinding parent bio */
+
        struct work_struct      bip_work;       /* I/O completion */
 
        struct bio_vec          *bip_vec;
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index fe7a22dd133b..5ebfd0500fe7 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -38,11 +38,8 @@ struct bvec_iter {
                                                   sectors */
        unsigned int            bi_size;        /* residual I/O count */
 
-       unsigned int            bi_idx;         /* current index into bvl_vec */
-
-       unsigned int            bi_done;        /* number of bytes completed */
-
-       unsigned int            bi_bvec_done;   /* number of bytes completed in
+       unsigned int            bi_idx:10;      /* current index into bvl_vec */
+       unsigned int            bi_bvec_done:22;        /* number of bytes 
completed in
                                                   current bvec */
 };
 
@@ -85,7 +82,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
                bytes -= len;
                iter->bi_size -= len;
                iter->bi_bvec_done += len;
-               iter->bi_done += len;
 
                if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
                        iter->bi_bvec_done = 0;
@@ -95,30 +91,6 @@ static inline bool bvec_iter_advance(const struct bio_vec 
*bv,
        return true;
 }
 
-static inline bool bvec_iter_rewind(const struct bio_vec *bv,
-                                    struct bvec_iter *iter,
-                                    unsigned int bytes)
-{
-       while (bytes) {
-               unsigned len = min(bytes, iter->bi_bvec_done);
-
-               if (iter->bi_bvec_done == 0) {
-                       if (WARN_ONCE(iter->bi_idx == 0,
-                                     "Attempted to rewind iter beyond "
-                                     "bvec's boundaries\n")) {
-                               return false;
-                       }
-                       iter->bi_idx--;
-                       iter->bi_bvec_done = __bvec_iter_bvec(bv, 
*iter)->bv_len;
-                       continue;
-               }
-               bytes -= len;
-               iter->bi_size += len;
-               iter->bi_bvec_done -= len;
-       }
-       return true;
-}
-
 #define for_each_bvec(bvl, bio_vec, iter, start)                       \
        for (iter = (start);                                            \
             (iter).bi_size &&                                          \
Thanks,
Ming

Reply via email to