Rebase Dima's patch on top of rh7-3.10.0-327.18.2.vz7.14.19, but without help of delayed_flush engine:
To ensure consistency on crash/power outage/hard reboot events, ploop must implement the following barrier logic for RELOC_A|S requests: 1) After we store data to new place, but before updating BAT on disk, we have FLUSH everything (in fact, flushing those data would be enough, but it is simplier to flush everything). 2) We should not proceed handling RELOC_A|S until we 100% sure new BAT value went to disk platters. So far as new BAT is only one page, it's OK to mark corresponding bio with FUA flag for io_direct case. For io_kaio, not having FUA api, we have to post_fsync BAT update. PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA introduced long time ago probably were intended to ensure the logic above, but they actually didn't. The patch removes PLOOP_REQ_FORCE_FLUSH/PLOOP_REQ_FORCE_FUA, and implements barriers in a straightforward and simple way: check for RELOC_A|S explicitly and make FLUSH/FUA where needed. Signed-off-by: Maxim Patlasov <[email protected]> --- drivers/block/ploop/dev.c | 4 ++-- drivers/block/ploop/io_direct.c | 7 ------- drivers/block/ploop/io_kaio.c | 8 +++++--- drivers/block/ploop/map.c | 22 ++++++++++------------ include/linux/ploop/ploop.h | 1 - 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c index 2b60dfa..40768b6 100644 --- a/drivers/block/ploop/dev.c +++ b/drivers/block/ploop/dev.c @@ -2610,8 +2610,8 @@ restart: top_delta = ploop_top_delta(plo); sbl.head = sbl.tail = preq->aux_bio; - /* Relocated data write required sync before BAT updatee */ - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); + /* Relocated data write required sync before BAT update + * this will happen inside index_update */ if (test_bit(PLOOP_REQ_RELOC_S, &preq->state)) { preq->eng_state = PLOOP_E_DATA_WBI; diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c index c4d0f63..266f041 100644 --- a/drivers/block/ploop/io_direct.c +++ b/drivers/block/ploop/io_direct.c @@ -89,15 +89,11 @@ dio_submit(struct ploop_io *io, struct ploop_request * preq, sector_t sec, nsec; int err; struct bio_list_walk bw; - int postfua = 0; int write = !!(rw & REQ_WRITE); int delayed_fua = 0; trace_submit(preq); - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state)) - postfua = 1; - if ((rw & REQ_FUA) && ploop_req_delay_fua_possible(preq)) { /* Mark req that delayed flush required */ preq->req_rw |= (REQ_FLUSH | REQ_FUA); @@ -233,9 +229,6 @@ flush_bio: b->bi_private = preq; b->bi_end_io = dio_endio_async; - if (unlikely(postfua && !bl.head)) - rw2 |= REQ_FUA; - ploop_acc_ff_out(preq->plo, rw2 | b->bi_rw); submit_bio(rw2, b); } diff --git a/drivers/block/ploop/io_kaio.c b/drivers/block/ploop/io_kaio.c index ed550f4..85863df 100644 --- a/drivers/block/ploop/io_kaio.c +++ b/drivers/block/ploop/io_kaio.c @@ -69,6 +69,8 @@ static void kaio_complete_io_state(struct ploop_request * preq) unsigned long flags; int post_fsync = 0; int need_fua = !!(preq->req_rw & REQ_FUA); + unsigned long state = READ_ONCE(preq->state); + int reloc = !!(state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)); if (preq->error || !(preq->req_rw & REQ_FUA) || preq->eng_state == PLOOP_E_INDEX_READ || @@ -80,9 +82,9 @@ static void kaio_complete_io_state(struct ploop_request * preq) } /* Convert requested fua to fsync */ - if (test_and_clear_bit(PLOOP_REQ_FORCE_FUA, &preq->state) || - test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) || - (need_fua && !ploop_req_delay_fua_possible(preq))) { + if (test_and_clear_bit(PLOOP_REQ_KAIO_FSYNC, &preq->state) || + (need_fua && !ploop_req_delay_fua_possible(preq)) || + (reloc && ploop_req_delay_fua_possible(preq))) { post_fsync = 1; preq->req_rw &= ~REQ_FUA; } diff --git a/drivers/block/ploop/map.c b/drivers/block/ploop/map.c index 915a216..1883674 100644 --- a/drivers/block/ploop/map.c +++ b/drivers/block/ploop/map.c @@ -909,6 +909,7 @@ void ploop_index_update(struct ploop_request * preq) struct page * page; sector_t sec; unsigned long rw; + unsigned long state = READ_ONCE(preq->state); /* No way back, we are going to initiate index write. */ @@ -961,10 +962,6 @@ void ploop_index_update(struct ploop_request * preq) __TRACE("wbi %p %u %p\n", preq, preq->req_cluster, m); plo->st.map_single_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); - /* Relocate requires consistent writes, mark such reqs appropriately */ - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - set_bit(PLOOP_REQ_FORCE_FUA, &preq->state); rw = (preq->req_rw & (REQ_FUA | REQ_FLUSH)); @@ -972,6 +969,10 @@ void ploop_index_update(struct ploop_request * preq) will do the FLUSH */ preq->req_rw &= ~REQ_FLUSH; + /* Relocate requires consistent index update */ + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) + rw |= (REQ_FLUSH | REQ_FUA); + top_delta->io.ops->write_page(&top_delta->io, preq, page, sec, rw); put_page(page); @@ -1094,7 +1095,6 @@ static void map_wb_complete(struct map_node * m, int err) int delayed = 0; unsigned int idx; sector_t sec; - int force_fua; unsigned long rw; /* First, complete processing of written back indices, @@ -1166,10 +1166,10 @@ static void map_wb_complete(struct map_node * m, int err) main_preq = NULL; rw = 0; - force_fua = 0; list_for_each_safe(cursor, tmp, &m->io_queue) { struct ploop_request * preq; + unsigned long state; preq = list_entry(cursor, struct ploop_request, list); @@ -1191,9 +1191,10 @@ static void map_wb_complete(struct map_node * m, int err) will do the FLUSH */ preq->req_rw &= ~REQ_FLUSH; - if (test_bit(PLOOP_REQ_RELOC_A, &preq->state) || - test_bit(PLOOP_REQ_RELOC_S, &preq->state)) - force_fua = 1; + state = READ_ONCE(preq->state); + /* Relocate requires consistent index update */ + if (state & (PLOOP_REQ_RELOC_A_FL|PLOOP_REQ_RELOC_S_FL)) + rw |= (REQ_FLUSH | REQ_FUA); preq->eng_state = PLOOP_E_INDEX_WB; get_page(page); @@ -1220,9 +1221,6 @@ static void map_wb_complete(struct map_node * m, int err) plo->st.map_multi_writes++; top_delta->ops->map_index(top_delta, m->mn_start, &sec); - if (force_fua) - set_bit(PLOOP_REQ_FORCE_FUA, &main_preq->state); - top_delta->io.ops->write_page(&top_delta->io, main_preq, page, sec, rw); put_page(page); diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h index 920daf7..6f41570 100644 --- a/include/linux/ploop/ploop.h +++ b/include/linux/ploop/ploop.h @@ -474,7 +474,6 @@ enum PLOOP_REQ_ZERO, PLOOP_REQ_DISCARD, PLOOP_REQ_RSYNC, - PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */ PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */ PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */ PLOOP_REQ_PUSH_BACKUP, /* preq was ACKed by userspace push_backup */ _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
