Maxim Patlasov <[email protected]> writes:
> The patch implements an optimization of submit_alloc path for pio_direct: > write user data directly to host block-device (accordingly to fiemap info) > and then use fallocate(FALLOC_FL_CONVERT_UNWRITTEN). This avoids expensive > pagecache_write_begin/copy/pagecache_write_end mechanism (saves ~750usec > per megabyte in my experiments). The feature improves performance > significantly. Before the patch: Ack-by:Dmitry Monakhov <[email protected]> > > # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync > 10240+0 records in > 10240+0 records out > 10737418240 bytes (11 GB) copied, 44.2684 s, 243 MB/s > > after the patch: > > # dd if=/dev/zero of=/mnt2/sb-io-test bs=1M count=10k oflag=dsync > 10240+0 records in > 10240+0 records out > 10737418240 bytes (11 GB) copied, 29.3066 s, 366 MB/s > > https://jira.sw.ru/browse/PSBM-22381 > > Signed-off-by: Maxim Patlasov <[email protected]> > --- > drivers/block/ploop/dev.c | 7 +++++++ > drivers/block/ploop/io_direct.c | 42 > ++++++++++++++++++++++++++++++++++++--- > include/linux/ploop/ploop.h | 6 ++++++ > 3 files changed, 52 insertions(+), 3 deletions(-) > > diff --git a/drivers/block/ploop/dev.c b/drivers/block/ploop/dev.c > index feb56c7..1da073c 100644 > --- a/drivers/block/ploop/dev.c > +++ b/drivers/block/ploop/dev.c > @@ -2294,7 +2294,14 @@ static void ploop_req_state_process(struct > ploop_request * preq) > preq->prealloc_size = 0; /* only for sanity */ > } > > + if (test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)) { > + preq->eng_io->ops->post_submit(preq->eng_io, preq); > + clear_bit(PLOOP_REQ_POST_SUBMIT, &preq->state); > + preq->eng_io = NULL; > + } > + > restart: > + BUG_ON(test_bit(PLOOP_REQ_POST_SUBMIT, &preq->state)); > __TRACE("ST %p %u %lu\n", preq, preq->req_cluster, preq->eng_state); > switch (preq->eng_state) { > case PLOOP_E_ENTRY: > diff --git a/drivers/block/ploop/io_direct.c b/drivers/block/ploop/io_direct.c > index 514af4b..d6f0f21 100644 > --- a/drivers/block/ploop/io_direct.c > +++ b/drivers/block/ploop/io_direct.c > @@ -359,6 +359,10 @@ static inline void bzero_page(struct page *page) > kunmap_atomic(kaddr); > } > > +static void > +dio_submit_pad(struct ploop_io *io, struct ploop_request * preq, > + struct bio_list * sbl, unsigned int size, > + struct extent_map *em); > > static int > cached_submit(struct ploop_io *io, iblock_t iblk, struct ploop_request * > preq, > @@ -371,6 +375,8 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct > ploop_request * preq, > struct bio_iter biter; > loff_t new_size; > loff_t used_pos; > + bool may_fallocate = io->files.file->f_op->fallocate && > + io->files.flags & EXT4_EXTENTS_FL; > > trace_cached_submit(preq); > > @@ -379,9 +385,7 @@ cached_submit(struct ploop_io *io, iblock_t iblk, struct > ploop_request * preq, > used_pos = (io->alloc_head - 1) << (io->plo->cluster_log + 9); > > #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) > - if (use_prealloc && end_pos > used_pos && > - io->files.file->f_op->fallocate && > - io->files.flags & EXT4_EXTENTS_FL) { > + if (use_prealloc && end_pos > used_pos && may_fallocate) { > if (unlikely(io->prealloced_size < clu_siz)) { > loff_t prealloc = end_pos; > if (prealloc > PLOOP_MAX_PREALLOC(plo)) > @@ -405,6 +409,21 @@ try_again: > } > #endif > > + if (may_fallocate) { > + sector_t sec = (sector_t)iblk << preq->plo->cluster_log; > + sector_t len = 1 << preq->plo->cluster_log; > + struct extent_map * em = extent_lookup_create(io, sec, len); > + > + if (unlikely(IS_ERR(em))) > + return PTR_ERR(em); > + > + preq->iblock = iblk; > + preq->eng_io = io; > + set_bit(PLOOP_REQ_POST_SUBMIT, &preq->state); > + dio_submit_pad(io, preq, sbl, size, em); > + return 0; > + } > + > bio_iter_init(&biter, sbl); > mutex_lock(&io->files.inode->i_mutex); > > @@ -480,6 +499,22 @@ try_again: > return err; > } > > +static void > +dio_post_submit(struct ploop_io *io, struct ploop_request * preq) > +{ > + sector_t sec = (sector_t)preq->iblock << preq->plo->cluster_log; > + loff_t clu_siz = 1 << (preq->plo->cluster_log + 9); > + int err; > + > + err = io->files.file->f_op->fallocate(io->files.file, > + FALLOC_FL_CONVERT_UNWRITTEN, > + (loff_t)sec << 9, clu_siz); > + if (err) { > + PLOOP_REQ_SET_ERROR(preq, err); > + set_bit(PLOOP_S_ABORT, &preq->plo->state); > + } > +} > + > /* Submit the whole cluster. If preq contains only partial data > * within the cluster, pad the rest of cluster with zeros. > */ > @@ -1854,6 +1889,7 @@ static struct ploop_io_ops ploop_io_ops_direct = > .alloc = dio_alloc_sync, > .submit = dio_submit, > .submit_alloc = dio_submit_alloc, > + .post_submit = dio_post_submit, > .disable_merge = dio_disable_merge, > .fastmap = dio_fastmap, > .read_page = dio_read_page, > diff --git a/include/linux/ploop/ploop.h b/include/linux/ploop/ploop.h > index b8c7130..c9fb1b0 100644 > --- a/include/linux/ploop/ploop.h > +++ b/include/linux/ploop/ploop.h > @@ -142,6 +142,7 @@ struct ploop_io_ops > struct bio_list *sbl, iblock_t iblk, unsigned int > size); > void (*submit_alloc)(struct ploop_io *, struct ploop_request *, > struct bio_list *sbl, unsigned int size); > + void (*post_submit)(struct ploop_io *, struct ploop_request *); > > int (*disable_merge)(struct ploop_io * io, sector_t isector, > unsigned int len); > int (*fastmap)(struct ploop_io * io, struct bio *orig_bio, > @@ -459,6 +460,7 @@ enum > PLOOP_REQ_FORCE_FUA, /*force fua of req write I/O by engine */ > PLOOP_REQ_FORCE_FLUSH, /*force flush by engine */ > PLOOP_REQ_KAIO_FSYNC, /*force image fsync by KAIO module */ > + PLOOP_REQ_POST_SUBMIT, /* preq needs post_submit processing */ > }; > > enum > @@ -561,6 +563,10 @@ struct ploop_request > > /* # bytes in tail of image file to prealloc on behalf of this preq */ > loff_t prealloc_size; > + > + /* if the engine starts operation on particular io, let's finish > + * the operation on the same io (see io.ops->post_submit) */ > + struct ploop_io *eng_io; > }; > > static inline struct ploop_delta * ploop_top_delta(struct ploop_device * plo)
signature.asc
Description: PGP signature
_______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
