Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-11-02 Thread Anthony PERARD
On Fri, Nov 02, 2018 at 10:00:59AM +, Tim Smith wrote:
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith 

Acked-by: Anthony PERARD 

-- 
Anthony PERARD



Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-11-02 Thread Paul Durrant
> -Original Message-
> From: Tim Smith [mailto:tim.sm...@citrix.com]
> Sent: 02 November 2018 10:01
> To: xen-de...@lists.xenproject.org; qemu-devel@nongnu.org; qemu-
> bl...@nongnu.org
> Cc: Anthony Perard ; Kevin Wolf
> ; Paul Durrant ; Stefano
> Stabellini ; Max Reitz 
> Subject: [PATCH 1/3] Improve xen_disk batching behaviour
> 
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith 

Reviewed-by: Paul Durrant 

> ---
>  hw/block/xen_disk.c |   30 ++
>  1 file changed, 30 insertions(+)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 36eff94f84..cb2881b7e6 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -101,6 +101,9 @@ struct XenBlkDev {
>  AioContext  *ctx;
>  };
> 
> +/* Threshold of in-flight requests above which we will start using
> + * blk_io_plug()/blk_io_unplug() to batch requests */
> +#define IO_PLUG_THRESHOLD 1
>  /* - */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  {
>  RING_IDX rc, rp;
>  struct ioreq *ioreq;
> +int inflight_atstart = blkdev->requests_inflight;
> +int batched = 0;
> 
>  blkdev->more_work = 0;
> 
> @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
>  blk_send_response_all(blkdev);
> +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
> + * when we got here, this is an indication that there the bottleneck
> + * is below us, so it's worth beginning to batch up I/O requests
> + * rather than submitting them immediately. The maximum number
> + * of requests we're willing to batch is the number already in
> + * flight, so it can grow up to max_requests when the bottleneck
> + * is below us */
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +blk_io_plug(blkdev->blk);
> +}
>  while (rc != rp) {
>  /* pull request from ring */
>  if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) {
> @@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  continue;
>  }
> 
> +if (inflight_atstart > IO_PLUG_THRESHOLD &&
> +batched >= inflight_atstart) {
> +blk_io_unplug(blkdev->blk);
> +}
>  ioreq_runio_qemu_aio(ioreq);
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +if (batched >= inflight_atstart) {
> +blk_io_plug(blkdev->blk);
> +batched = 0;
> +} else {
> +batched++;
> +}
> +}
> +}
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +blk_io_unplug(blkdev->blk);
>  }
> 
>  if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {



[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-11-02 Thread Tim Smith
When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith 
---
 hw/block/xen_disk.c |   30 ++
 1 file changed, 30 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..cb2881b7e6 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
 AioContext  *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* - */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
 RING_IDX rc, rp;
 struct ioreq *ioreq;
+int inflight_atstart = blkdev->requests_inflight;
+int batched = 0;
 
 blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
 blk_send_response_all(blkdev);
+/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+ * when we got here, this is an indication that there the bottleneck
+ * is below us, so it's worth beginning to batch up I/O requests
+ * rather than submitting them immediately. The maximum number
+ * of requests we're willing to batch is the number already in
+ * flight, so it can grow up to max_requests when the bottleneck
+ * is below us */
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_plug(blkdev->blk);
+}
 while (rc != rp) {
 /* pull request from ring */
 if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) {
@@ -589,7 +604,22 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 continue;
 }
 
+if (inflight_atstart > IO_PLUG_THRESHOLD &&
+batched >= inflight_atstart) {
+blk_io_unplug(blkdev->blk);
+}
 ioreq_runio_qemu_aio(ioreq);
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+if (batched >= inflight_atstart) {
+blk_io_plug(blkdev->blk);
+batched = 0;
+} else {
+batched++;
+}
+}
+}
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_unplug(blkdev->blk);
 }
 
 if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) 
{




[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-11-02 Thread Tim Smith
When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith 
---
 hw/block/xen_disk.c |   29 +
 1 file changed, 29 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..6cb40d66fa 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
 AioContext  *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* - */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
 RING_IDX rc, rp;
 struct ioreq *ioreq;
+int inflight_atstart = blkdev->requests_inflight;
+int batched = 0;
 
 blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
 blk_send_response_all(blkdev);
+/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+ * when we got here, this is an indication that there the bottleneck
+ * is below us, so it's worth beginning to batch up I/O requests
+ * rather than submitting them immediately. The maximum number
+ * of requests we're willing to batch is the number already in
+ * flight, so it can grow up to max_requests when the bottleneck
+ * is below us */
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_plug(blkdev->blk);
+}
 while (rc != rp) {
 /* pull request from ring */
 if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) {
@@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 continue;
 }
 
+if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= 
inflight_atstart) {
+blk_io_unplug(blkdev->blk);
+}
 ioreq_runio_qemu_aio(ioreq);
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+if (batched >= inflight_atstart) {
+blk_io_plug(blkdev->blk);
+batched=0;
+} else {
+batched++;
+}
+}
+}
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_unplug(blkdev->blk);
 }
 
 if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) 
{




Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-09-18 Thread Anthony PERARD
Hi Tim,

I'll reply here for the series (since there are no cover-letter).

Thanks, the patches looks good. There are just details that needs to be
fixed.

When resubmitting the patches, could you CC the relevant maintainers,
with the help of the script get_maintainer.pl, and add a cover-letter as
it is used for automatic checking. You can find more information here:
https://wiki.qemu.org/Contribute/SubmitAPatch#Submitting_your_Patches

The patches have a few coding style issue listed below, can you fix
them and resend the patches? Thanks.


$ ./scripts/checkpatch.pl 000*
WARNING: line over 80 characters
#62: FILE: hw/block/xen_disk.c:607:
+if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= 
inflight_atstart) {

ERROR: spaces required around that '=' (ctx:VxV)
#69: FILE: hw/block/xen_disk.c:614:
+batched=0;
^

total: 1 errors, 1 warnings, 54 lines checked

0001-Improve-xen_disk-batching-behaviour.patch has style problems, please 
review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.
total: 0 errors, 0 warnings, 111 lines checked

0002-Improve-xen_disk-response-latency.patch has no obvious style problems and 
is ready for submission.
WARNING: line over 80 characters
#38: FILE: hw/block/xen_disk.c:139:
+/* We cannot need more pages per ioreq than this, and we do re-use 
ioreqs,

ERROR: line over 90 characters
#41: FILE: hw/block/xen_disk.c:142:
+ioreq->buf = qemu_memalign(XC_PAGE_SIZE, 
BLKIF_MAX_SEGMENTS_PER_REQUEST * XC_PAGE_SIZE);

total: 1 errors, 1 warnings, 50 lines checked

0003-Avoid-repeated-memory-allocation-in-xen_disk.patch has style problems, 
please review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.


Cheers,

-- 
Anthony PERARD



Re: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-09-07 Thread Paul Durrant
> -Original Message-
> From: Qemu-devel [mailto:qemu-devel-
> bounces+paul.durrant=citrix@nongnu.org] On Behalf Of Tim Smith
> Sent: 07 September 2018 11:21
> To: qemu-devel@nongnu.org
> Subject: [Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour
> 
> When I/O consists of many small requests, performance is improved by
> batching them together in a single io_submit() call. When there are
> relatively few requests, the extra overhead is not worth it. This
> introduces a check to start batching I/O requests via blk_io_plug()/
> blk_io_unplug() in an amount proportional to the number which were
> already in flight at the time we started reading the ring.
> 
> Signed-off-by: Tim Smith 

Reviewed-by: Paul Durrant 

> ---
>  hw/block/xen_disk.c |   29 +
>  1 file changed, 29 insertions(+)
> 
> diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
> index 36eff94f84..6cb40d66fa 100644
> --- a/hw/block/xen_disk.c
> +++ b/hw/block/xen_disk.c
> @@ -101,6 +101,9 @@ struct XenBlkDev {
>  AioContext  *ctx;
>  };
> 
> +/* Threshold of in-flight requests above which we will start using
> + * blk_io_plug()/blk_io_unplug() to batch requests */
> +#define IO_PLUG_THRESHOLD 1
>  /* - */
> 
>  static void ioreq_reset(struct ioreq *ioreq)
> @@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  {
>  RING_IDX rc, rp;
>  struct ioreq *ioreq;
> +int inflight_atstart = blkdev->requests_inflight;
> +int batched = 0;
> 
>  blkdev->more_work = 0;
> 
> @@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
> 
>  blk_send_response_all(blkdev);
> +/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
> + * when we got here, this is an indication that there the bottleneck
> + * is below us, so it's worth beginning to batch up I/O requests
> + * rather than submitting them immediately. The maximum number
> + * of requests we're willing to batch is the number already in
> + * flight, so it can grow up to max_requests when the bottleneck
> + * is below us */
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +blk_io_plug(blkdev->blk);
> +}
>  while (rc != rp) {
>  /* pull request from ring */
>  if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) {
> @@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev
> *blkdev)
>  continue;
>  }
> 
> +if (inflight_atstart > IO_PLUG_THRESHOLD && batched >=
> inflight_atstart) {
> +blk_io_unplug(blkdev->blk);
> +}
>  ioreq_runio_qemu_aio(ioreq);
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +if (batched >= inflight_atstart) {
> +blk_io_plug(blkdev->blk);
> +batched=0;
> +} else {
> +batched++;
> +}
> +}
> +}
> +if (inflight_atstart > IO_PLUG_THRESHOLD) {
> +blk_io_unplug(blkdev->blk);
>  }
> 
>  if (blkdev->more_work && blkdev->requests_inflight < blkdev-
> >max_requests) {
> 



[Qemu-devel] [PATCH 1/3] Improve xen_disk batching behaviour

2018-09-07 Thread Tim Smith
When I/O consists of many small requests, performance is improved by
batching them together in a single io_submit() call. When there are
relatively few requests, the extra overhead is not worth it. This
introduces a check to start batching I/O requests via blk_io_plug()/
blk_io_unplug() in an amount proportional to the number which were
already in flight at the time we started reading the ring.

Signed-off-by: Tim Smith 
---
 hw/block/xen_disk.c |   29 +
 1 file changed, 29 insertions(+)

diff --git a/hw/block/xen_disk.c b/hw/block/xen_disk.c
index 36eff94f84..6cb40d66fa 100644
--- a/hw/block/xen_disk.c
+++ b/hw/block/xen_disk.c
@@ -101,6 +101,9 @@ struct XenBlkDev {
 AioContext  *ctx;
 };
 
+/* Threshold of in-flight requests above which we will start using
+ * blk_io_plug()/blk_io_unplug() to batch requests */
+#define IO_PLUG_THRESHOLD 1
 /* - */
 
 static void ioreq_reset(struct ioreq *ioreq)
@@ -542,6 +545,8 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 {
 RING_IDX rc, rp;
 struct ioreq *ioreq;
+int inflight_atstart = blkdev->requests_inflight;
+int batched = 0;
 
 blkdev->more_work = 0;
 
@@ -550,6 +555,16 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
 
 blk_send_response_all(blkdev);
+/* If there was more than IO_PLUG_THRESHOLD ioreqs in flight
+ * when we got here, this is an indication that there the bottleneck
+ * is below us, so it's worth beginning to batch up I/O requests
+ * rather than submitting them immediately. The maximum number
+ * of requests we're willing to batch is the number already in
+ * flight, so it can grow up to max_requests when the bottleneck
+ * is below us */
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_plug(blkdev->blk);
+}
 while (rc != rp) {
 /* pull request from ring */
 if (RING_REQUEST_CONS_OVERFLOW(>rings.common, rc)) {
@@ -589,7 +604,21 @@ static void blk_handle_requests(struct XenBlkDev *blkdev)
 continue;
 }
 
+if (inflight_atstart > IO_PLUG_THRESHOLD && batched >= 
inflight_atstart) {
+blk_io_unplug(blkdev->blk);
+}
 ioreq_runio_qemu_aio(ioreq);
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+if (batched >= inflight_atstart) {
+blk_io_plug(blkdev->blk);
+batched=0;
+} else {
+batched++;
+}
+}
+}
+if (inflight_atstart > IO_PLUG_THRESHOLD) {
+blk_io_unplug(blkdev->blk);
 }
 
 if (blkdev->more_work && blkdev->requests_inflight < blkdev->max_requests) 
{