Maxim Patlasov <[email protected]> writes: > Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19, > but without help of delayed_flush engine: > > To ensure consistency on crash/power outage/hard reboot > events, ploop must implement the following barrier logic > for RELOC_A|S requests: > > 1) After we store data to new place, but before updating > BAT on disk, we have FLUSH everything (in fact, flushing > those data would be enough, but it is simplier to flush > everything). > > 2) We should not proceed handling RELOC_A|S until we > 100% sure new BAT value went to disk platters. So far as > new BAT is only one page, it's OK to mark corresponding > bio with FUA flag for io_direct case. For io_kaio, not > having FUA api, we have to post_fsync BAT update. > > PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced > long time ago probably were intended to ensure the > logic above, but they actually didn't. > > The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA, > and implements barriers in a straightforward and simple way: > check for RELOC_A|S explicitly and make FLUSH/FUA where > needed. > > Signed-off-by: Maxim Patlasov <[email protected]> > --- > drivers/block/ploop/dev.c | 4 ++-- > drivers/block/ploop/io_direct.c | 7 ------- > drivers/block/ploop/io_kaio.c | 8 +++++--- > drivers/block/ploop/map.c | 22 ++++++++++------------ > include/linux/ploop/ploop.h | 1 - > 5 files changed, 17 insertions(+), 25 deletions(-) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index 2b60dfa..40768b6 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2610,8 +2610,8 @@ restart: > top_delta = ploop_top_delta(plo); > sbl.head = sbl.tail = preq->aux_bio; > > - /* Relocated data write required sync before BAT updatee */ > - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); > + /* Relocated data write required sync before BAT update > + * this will happen inside index_update */ > > if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { > preq->eng_state = PLOOP_E_DATA_WBI; > diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c > index c4d0f63..266f041 100644 > --- a/drivers/block/ploop/io_direct.c > +++ b/drivers/block/ploop/io_direct.c > @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * > preq, > sector_t sec, nsec; > int err; > struct bio_list_walk bw; > - int postfua = 0; > int write = !!(rw & REQ_WRITE); > int delayed_fua = 0; > > trace_submit(preq); > > - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) > - postfua = 1; > - > if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) { > /* Mark req that delayed flush required */ > preq->req_rw |= (REQ_FLUSH | REQ_FUA); > @@ -233,9 +229,6 @@ flush_bio: > b->bi_private = preq; > b->bi_end_io = dio_endio_async; > > - if (unlikely(postfua && !bl.head)) > - rw2 |= REQ_FUA; > - > ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw); > submit_bio(rw2, b); > } > diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c > index ed550f4..85863df 100644 > --- a/drivers/block/ploop/io_kaio.c > +++ b/drivers/block/ploop/io_kaio.c > @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * > preq) > unsigned long flags; > int post_fsync = 0; > int need_fua = !!(preq->req_rw & REQ_FUA); > + unsigned long state = READ_ONCE(preq->state); > + int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)); > > if (preq->error || !(preq->req_rw & REQ_FUA) || > preq->eng_state == PLOOP_E_INDEX_READ || > @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * > preq) > } > > /* Convert requested fua to fsync */ > - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) || > - test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) || > - (need_fua && !ploop_req_delay_fua_possible(preq))) { This is the change I dislike the most. io_XXX should not care it is reloc or not. Caller should rule whenether PREFLUSH/POSTFLUSH should happen before preq completes. So IMHO this is a crunch, but correct one.
> + if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) ||
> + (need_fua && !ploop_req_delay_fua_possible(preq)) ||
> + (reloc && ploop_req_delay_fua_possible(preq))) {
> post_fsync = 1;
> preq->req_rw &= ~REQ_FUA;
> }
> diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c
> index 915a216..1883674 100644
> --- a/drivers/block/ploop/map.c
> +++ b/drivers/block/ploop/map.c
> @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq)
> struct page * page;
> sector_t sec;
> unsigned long rw;
> + unsigned long state = READ_ONCE(preq->state);
>
> /* No way back, we are going to initiate index write. */
>
> @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq)
> __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m);
> plo->st.map_single_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
> - /* Relocate requires consistent writes, mark such reqs appropriately */
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state);
>
> rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH));
>
> @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq)
> will do the FLUSH */
> preq->req_rw &= ~REQ_FLUSH;
>
> + /* Relocate requires consistent index update */
> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> + rw |= (REQ_FLUSH | REQ_FUA);
> +
> top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw);
>
> put_page(page);
> @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int
> err)
> int delayed = 0;
> unsigned int idx;
> sector_t sec;
> - int force_fua;
> unsigned long rw;
>
> /* First, complete processing of written back indices,
> @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int
> err)
>
> main_preq = NULL;
> rw = 0;
> - force_fua = 0;
>
> list_for_each_safe(cursor, tmp, &m->io_queue) {
> struct ploop_request * preq;
> + unsigned long state;
>
> preq = list_entry(cursor, struct ploop_request, list);
>
> @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int
> err)
> will do the FLUSH */
> preq->req_rw &= ~REQ_FLUSH;
>
> - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) ||
> - test_bit(PLOOP_REQ_RELOC_S, &preq->state))
> - force_fua = 1;
> + state = READ_ONCE(preq->state);
> + /* Relocate requires consistent index update */
> + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL))
> + rw |= (REQ_FLUSH | REQ_FUA);
>
> preq->eng_state = PLOOP_E_INDEX_WB;
> get_page(page);
> @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int
> err)
> plo->st.map_multi_writes++;
> top_delta->ops->map_index(top_delta, m->mn_start, &sec);
>
> - if (force_fua)
> - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state);
> -
> top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec,
> rw);
> put_page(page);
> diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h
> index 920daf7..6f41570 100644
> --- a/include/linux/ploop/ploop.h
> +++ b/include/linux/ploop/ploop.h
> @@ -474,7 +474,6 @@ enum
> PLOOP_REQ_ZERO,
> PLOOP_REQ_DISCARD,
> PLOOP_REQ_RSYNC,
> - PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */
> PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */
> PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */
> PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
