Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
On Fri, Nov 02, 2018 at 10:00:59AM +, Tim Smith wrote: > When I/O consists of many small requests, performance is improved by > batching them together in a single io_submit() call. When there are > relatively few requests, the extra overhead is not worth it. This > introduces a check to start batching I/O requests via blk_io_plug()/ > blk_io_unplug() in an amount proportional to the number which were > already in flight at the time we started reading the ring. > > Signed-off-by: Tim Smith Acked-by: Anthony PERARD -- Anthony PERARD
Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
> -Original Message- > From: Tim Smith [mailto:tim.sm...@citrix.com] > Sent: 02 November 2018 10:01 > To: xen-de...@lists.xenproject.org; qemu-devel@nongnu.org; qemu- > bl...@nongnu.org > Cc: Anthony Perard ; Kevin Wolf > ; Paul Durrant ; Stefano > Stabellini ; Max Reitz > Subject: [PATCH 1/3] Improve xen_disk batching behaviour > > When I/O consists of many small requests, performance is improved by > batching them together in a single io_submit() call. When there are > relatively few requests, the extra overhead is not worth it. This > introduces a check to start batching I/O requests via blk_io_plug()/ > blk_io_unplug() in an amount proportional to the number which were > already in flight at the time we started reading the ring. > > Signed-off-by: Tim Smith Reviewed-by: Paul Durrant > --- > hw/block/xen_disk.c | 30 ++ > 1 file changed, 30 insertions(+) > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > index 36eff94f84..cb2881b7e6 100644 > --- a/hw/block/xen_disk.c > +++ b/hw/block/xen_disk.c > @@ -101,6 +101,9 @@ struct XenBlkDev { > AioContext *ctx; > }; > > +/* Threshold of in-flight requests above which we will start using > + * blk_io_plug()/blk_io_unplug() to batch requests */ > +#define IO_PLUG_THRESHOLD 1 > /* - */ > > static void ioreq_reset(struct ioreq *ioreq) > @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > { > RING_IDX rc, rp; > struct ioreq *ioreq; > +int inflight_atstart = blkdev->requests_inflight; > +int batched = 0; > > blkdev->more_work = 0; > > @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ > > blk_send_response_all(blkdev); > +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight > + * when we got here, this is an indication that there the bottleneck > + * is below us, so it's worth beginning to batch up I/O requests > + * rather than submitting them immediately. The maximum number > + * of requests we're willing to batch is the number already in > + * flight, so it can grow up to max_requests when the bottleneck > + * is below us */ > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +blk_io_plug(blkdev->blk); > +} > while (rc != rp) { > /* pull request from ring */ > if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) { > @@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > continue; > } > > +if (inflight_atstart > IO_PLUG_THRESHOLD && > +batched >= inflight_atstart) { > +blk_io_unplug(blkdev->blk); > +} > ioreq_runio_qemu_aio(ioreq); > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +if (batched >= inflight_atstart) { > +blk_io_plug(blkdev->blk); > +batched = 0; > +} else { > +batched++; > +} > +} > +} > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +blk_io_unplug(blkdev->blk); > } > > if (blkdev->more_work && blkdev->requests_inflight < blkdev- > >max_requests) {
[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
When I/O consists of many small requests, performance is improved by batching them together in a single io_submit() call. When there are relatively few requests, the extra overhead is not worth it. This introduces a check to start batching I/O requests via blk_io_plug()/ blk_io_unplug() in an amount proportional to the number which were already in flight at the time we started reading the ring. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 30 ++ 1 file changed, 30 insertions(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 36eff94f84..cb2881b7e6 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -101,6 +101,9 @@ struct XenBlkDev { AioContext *ctx; }; +/* Threshold of in-flight requests above which we will start using + * blk_io_plug()/blk_io_unplug() to batch requests */ +#define IO_PLUG_THRESHOLD 1 /* - */ static void ioreq_reset(struct ioreq *ioreq) @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) { RING_IDX rc, rp; struct ioreq *ioreq; +int inflight_atstart = blkdev->requests_inflight; +int batched = 0; blkdev->more_work = 0; @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ blk_send_response_all(blkdev); +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight + * when we got here, this is an indication that there the bottleneck + * is below us, so it's worth beginning to batch up I/O requests + * rather than submitting them immediately. The maximum number + * of requests we're willing to batch is the number already in + * flight, so it can grow up to max_requests when the bottleneck + * is below us */ +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_plug(blkdev->blk); +} while (rc != rp) { /* pull request from ring */ if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) { @@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) continue; } +if (inflight_atstart > IO_PLUG_THRESHOLD && +batched >= inflight_atstart) { +blk_io_unplug(blkdev->blk); +} ioreq_runio_qemu_aio(ioreq); +if (inflight_atstart > IO_PLUG_THRESHOLD) { +if (batched >= inflight_atstart) { +blk_io_plug(blkdev->blk); +batched = 0; +} else { +batched++; +} +} +} +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_unplug(blkdev->blk); } if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {
[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
When I/O consists of many small requests, performance is improved by batching them together in a single io_submit() call. When there are relatively few requests, the extra overhead is not worth it. This introduces a check to start batching I/O requests via blk_io_plug()/ blk_io_unplug() in an amount proportional to the number which were already in flight at the time we started reading the ring. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 29 + 1 file changed, 29 insertions(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 36eff94f84..6cb40d66fa 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -101,6 +101,9 @@ struct XenBlkDev { AioContext *ctx; }; +/* Threshold of in-flight requests above which we will start using + * blk_io_plug()/blk_io_unplug() to batch requests */ +#define IO_PLUG_THRESHOLD 1 /* - */ static void ioreq_reset(struct ioreq *ioreq) @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) { RING_IDX rc, rp; struct ioreq *ioreq; +int inflight_atstart = blkdev->requests_inflight; +int batched = 0; blkdev->more_work = 0; @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ blk_send_response_all(blkdev); +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight + * when we got here, this is an indication that there the bottleneck + * is below us, so it's worth beginning to batch up I/O requests + * rather than submitting them immediately. The maximum number + * of requests we're willing to batch is the number already in + * flight, so it can grow up to max_requests when the bottleneck + * is below us */ +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_plug(blkdev->blk); +} while (rc != rp) { /* pull request from ring */ if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) { @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) continue; } +if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) { +blk_io_unplug(blkdev->blk); +} ioreq_runio_qemu_aio(ioreq); +if (inflight_atstart > IO_PLUG_THRESHOLD) { +if (batched >= inflight_atstart) { +blk_io_plug(blkdev->blk); +batched=0; +} else { +batched++; +} +} +} +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_unplug(blkdev->blk); } if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {
Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
Hi Tim, I'll reply here for the series (since there are no cover-letter). Thanks, the patches looks good. There are just details that needs to be fixed. When resubmitting the patches, could you CC the relevant maintainers, with the help of the script get_maintainer.pl, and add a cover-letter as it is used for automatic checking. You can find more information here: https://wiki.qemu.org/Contribute/SubmitAPatch#Submitting_your_Patches The patches have a few coding style issue listed below, can you fix them and resend the patches? Thanks. $ ./scripts/checkpatch.pl 000* WARNING: line over 80 characters #62: FILE: hw/block/xen_disk.c:607: +if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) { ERROR: spaces required around that '=' (ctx:VxV) #69: FILE: hw/block/xen_disk.c:614: +batched=0; ^ total: 1 errors, 1 warnings, 54 lines checked 0001-Improve-xen_disk-batching-behaviour.patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. total: 0 errors, 0 warnings, 111 lines checked 0002-Improve-xen_disk-response-latency.patch has no obvious style problems and is ready for submission. WARNING: line over 80 characters #38: FILE: hw/block/xen_disk.c:139: +/* We cannot need more pages per ioreq than this, and we do re-use ioreqs, ERROR: line over 90 characters #41: FILE: hw/block/xen_disk.c:142: +ioreq->buf = qemu_memalign(XC_PAGE_SIZE, BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE); total: 1 errors, 1 warnings, 50 lines checked 0003-Avoid-repeated-memory-allocation-in-xen_disk.patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. Cheers, -- Anthony PERARD
Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
> -Original Message- > From: Qemu-devel [mailto:qemu-devel- > bounces+paul.durrant=citrix@nongnu.org] On Behalf Of Tim Smith > Sent: 07 September 2018 11:21 > To: qemu-devel@nongnu.org > Subject: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour > > When I/O consists of many small requests, performance is improved by > batching them together in a single io_submit() call. When there are > relatively few requests, the extra overhead is not worth it. This > introduces a check to start batching I/O requests via blk_io_plug()/ > blk_io_unplug() in an amount proportional to the number which were > already in flight at the time we started reading the ring. > > Signed-off-by: Tim Smith Reviewed-by: Paul Durrant > --- > hw/block/xen_disk.c | 29 + > 1 file changed, 29 insertions(+) > > diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c > index 36eff94f84..6cb40d66fa 100644 > --- a/hw/block/xen_disk.c > +++ b/hw/block/xen_disk.c > @@ -101,6 +101,9 @@ struct XenBlkDev { > AioContext *ctx; > }; > > +/* Threshold of in-flight requests above which we will start using > + * blk_io_plug()/blk_io_unplug() to batch requests */ > +#define IO_PLUG_THRESHOLD 1 > /* - */ > > static void ioreq_reset(struct ioreq *ioreq) > @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > { > RING_IDX rc, rp; > struct ioreq *ioreq; > +int inflight_atstart = blkdev->requests_inflight; > +int batched = 0; > > blkdev->more_work = 0; > > @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ > > blk_send_response_all(blkdev); > +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight > + * when we got here, this is an indication that there the bottleneck > + * is below us, so it's worth beginning to batch up I/O requests > + * rather than submitting them immediately. The maximum number > + * of requests we're willing to batch is the number already in > + * flight, so it can grow up to max_requests when the bottleneck > + * is below us */ > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +blk_io_plug(blkdev->blk); > +} > while (rc != rp) { > /* pull request from ring */ > if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) { > @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev > *blkdev) > continue; > } > > +if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= > inflight_atstart) { > +blk_io_unplug(blkdev->blk); > +} > ioreq_runio_qemu_aio(ioreq); > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +if (batched >= inflight_atstart) { > +blk_io_plug(blkdev->blk); > +batched=0; > +} else { > +batched++; > +} > +} > +} > +if (inflight_atstart > IO_PLUG_THRESHOLD) { > +blk_io_unplug(blkdev->blk); > } > > if (blkdev->more_work && blkdev->requests_inflight < blkdev- > >max_requests) { >
[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
When I/O consists of many small requests, performance is improved by batching them together in a single io_submit() call. When there are relatively few requests, the extra overhead is not worth it. This introduces a check to start batching I/O requests via blk_io_plug()/ blk_io_unplug() in an amount proportional to the number which were already in flight at the time we started reading the ring. Signed-off-by: Tim Smith --- hw/block/xen_disk.c | 29 + 1 file changed, 29 insertions(+) diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c index 36eff94f84..6cb40d66fa 100644 --- a/hw/block/xen_disk.c +++ b/hw/block/xen_disk.c @@ -101,6 +101,9 @@ struct XenBlkDev { AioContext *ctx; }; +/* Threshold of in-flight requests above which we will start using + * blk_io_plug()/blk_io_unplug() to batch requests */ +#define IO_PLUG_THRESHOLD 1 /* - */ static void ioreq_reset(struct ioreq *ioreq) @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) { RING_IDX rc, rp; struct ioreq *ioreq; +int inflight_atstart = blkdev->requests_inflight; +int batched = 0; blkdev->more_work = 0; @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) xen_rmb(); /* Ensure we see queued requests up to 'rp'. */ blk_send_response_all(blkdev); +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight + * when we got here, this is an indication that there the bottleneck + * is below us, so it's worth beginning to batch up I/O requests + * rather than submitting them immediately. The maximum number + * of requests we're willing to batch is the number already in + * flight, so it can grow up to max_requests when the bottleneck + * is below us */ +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_plug(blkdev->blk); +} while (rc != rp) { /* pull request from ring */ if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) { @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev) continue; } +if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= inflight_atstart) { +blk_io_unplug(blkdev->blk); +} ioreq_runio_qemu_aio(ioreq); +if (inflight_atstart > IO_PLUG_THRESHOLD) { +if (batched >= inflight_atstart) { +blk_io_plug(blkdev->blk); +batched=0; +} else { +batched++; +} +} +} +if (inflight_atstart > IO_PLUG_THRESHOLD) { +blk_io_unplug(blkdev->blk); } if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) {