Re: [Qemu-devel] [PATCH v4 3/3] The support for queue timer and throttling algorithm

2011-08-04 Thread Zhi Yong Wu
On Tue, Aug 2, 2011 at 4:39 AM, Ryan Harper ry...@us.ibm.com wrote:
 * Zhi Yong Wu wu...@linux.vnet.ibm.com [2011-08-01 01:32]:
 Note:
       1.) When bps/iops limits are specified to a small value such as 511 
 bytes/s, this VM will hang up. We are considering how to handle this senario.
       2.) When dd command is issued in guest, if its option bs is set to a 
 large value such as bs=1024K, the result speed will slightly bigger than 
 the limits.

 For these problems, if you have nice thought, pls let us know.:)

 Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
 ---
  block.c     |  302 
 +--
  block.h     |    1 -
  block_int.h |   29 ++
  3 files changed, 323 insertions(+), 9 deletions(-)

 diff --git a/block.c b/block.c
 index 24a25d5..42763a3 100644
 --- a/block.c
 +++ b/block.c
 @@ -29,6 +29,9 @@
  #include module.h
  #include qemu-objects.h

 +#include qemu-timer.h
 +#include block/blk-queue.h
 +
  #ifdef CONFIG_BSD
  #include sys/types.h
  #include sys/stat.h
 @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t 
 sector_num,
  static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
                           const uint8_t *buf, int nb_sectors);

 +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
 +        bool is_write, double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
 +        double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
 +        bool is_write, uint64_t *wait);
 +
  static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
      QTAILQ_HEAD_INITIALIZER(bdrv_states);

 @@ -90,6 +100,20 @@ int is_windows_drive(const char *filename)
  }
  #endif

 +static int bdrv_io_limits_enable(BlockIOLimit *io_limits)
 +{
 +    if ((io_limits-bps[0] == 0)
 +          (io_limits-bps[1] == 0)
 +          (io_limits-bps[2] == 0)
 +          (io_limits-iops[0] == 0)
 +          (io_limits-iops[1] == 0)
 +          (io_limits-iops[2] == 0)) {
 +        return 0;


 I'd add a field to BlockIOLimit structure, and just do:

  static int bdrv_io_limits_enabled(BlockIOLimit *io_limits)
  {
       return io_limist-enabled;
  }

 Update bdrv_set_io_limits() to do the original check you have, and if
 one of the fields is set, update the enabled flag.

 We incur that logic on *every* request, so let's make it as cheap as
 possible.
Good point, it has been adopted in qmp/hmp patch.

 +    }
 +
 +    return 1;
 +}
 +
  /* check if the path starts with protocol: */
  static int path_has_protocol(const char *path)
  {
 @@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size,
      }
  }

 +static void bdrv_block_timer(void *opaque)
 +{
 +    BlockDriverState *bs = opaque;
 +    BlockQueue *queue = bs-block_queue;
 +
 +    while (!QTAILQ_EMPTY(queue-requests)) {
 +        BlockIORequest *request = NULL;
 +        int ret = 0;
 +
 +        request = QTAILQ_FIRST(queue-requests);
 +        QTAILQ_REMOVE(queue-requests, request, entry);
 +
 +        ret = qemu_block_queue_handler(request);
 +        if (ret == 0) {
 +            QTAILQ_INSERT_HEAD(queue-requests, request, entry);
 +            break;

 btw, I did some tracing and I never saw a request get re-added here.
 Now, ideally, I think you were try to do the following:

 The request is coming from the queue, if we exceed our limits, then we'd
 get back NULL from the handler and we'd requeue the request at the
Right.
 head... but that doesn't actually happen.
It could take place. For example, if block req1 comes, it exceed the
limits, the block timer is set to fire in 10ms; When block req2 comes,
it also exceed this limits, the block timer is updated to 3ms; Let us
assume that no block request is coming. When the firing time arrives,
req2 will be serviced; but when next enqueued request req1 is got from
block queue, it could still exceed the limits, it will need to be
enqueued. right?

 Rather, if we exceed our limits, we invoke qemu_block_queue_enqueue()
 again, which allocates a whole new request and puts it at the tail.
 I think we want to update the throttling logic in readv/writev to return
 NULL if bs-req_from_queue == true and we exceed the limits.  Then this
No, NULL indicate that the block emulation layer fails to start
readv/writev request.
 logic will do the right thing by inserting the request back to the head.
We've used a request pool, Maybe we can release it to this pool when
the request need to be freed.


 +        }
 +
 +        qemu_free(request);


 See my email to blk-queue.c on how we can eliminate free'ing the request
 here.
After i go to office next week, i will check it.

 +    }
 +}
 +
  void bdrv_register(BlockDriver *bdrv)
  {
      if (!bdrv-bdrv_aio_readv) {
 @@ -642,6 +688,19 @@ int bdrv_open(BlockDriverState *bs, const char 
 *filename, int flags,
              bs-change_cb(bs-change_opaque, 

[Qemu-devel] [PATCH v4 3/3] The support for queue timer and throttling algorithm

2011-08-01 Thread Zhi Yong Wu
Note:
  1.) When bps/iops limits are specified to a small value such as 511 
bytes/s, this VM will hang up. We are considering how to handle this senario.
  2.) When dd command is issued in guest, if its option bs is set to a 
large value such as bs=1024K, the result speed will slightly bigger than the 
limits.

For these problems, if you have nice thought, pls let us know.:)

Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
---
 block.c |  302 +--
 block.h |1 -
 block_int.h |   29 ++
 3 files changed, 323 insertions(+), 9 deletions(-)

diff --git a/block.c b/block.c
index 24a25d5..42763a3 100644
--- a/block.c
+++ b/block.c
@@ -29,6 +29,9 @@
 #include module.h
 #include qemu-objects.h
 
+#include qemu-timer.h
+#include block/blk-queue.h
+
 #ifdef CONFIG_BSD
 #include sys/types.h
 #include sys/stat.h
@@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t 
sector_num,
 static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
  const uint8_t *buf, int nb_sectors);
 
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+bool is_write, uint64_t *wait);
+
 static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
 QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
@@ -90,6 +100,20 @@ int is_windows_drive(const char *filename)
 }
 #endif
 
+static int bdrv_io_limits_enable(BlockIOLimit *io_limits)
+{
+if ((io_limits-bps[0] == 0)
+  (io_limits-bps[1] == 0)
+  (io_limits-bps[2] == 0)
+  (io_limits-iops[0] == 0)
+  (io_limits-iops[1] == 0)
+  (io_limits-iops[2] == 0)) {
+return 0;
+}
+
+return 1;
+}
+
 /* check if the path starts with protocol: */
 static int path_has_protocol(const char *path)
 {
@@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size,
 }
 }
 
+static void bdrv_block_timer(void *opaque)
+{
+BlockDriverState *bs = opaque;
+BlockQueue *queue = bs-block_queue;
+
+while (!QTAILQ_EMPTY(queue-requests)) {
+BlockIORequest *request = NULL;
+int ret = 0;
+
+request = QTAILQ_FIRST(queue-requests);
+QTAILQ_REMOVE(queue-requests, request, entry);
+
+ret = qemu_block_queue_handler(request);
+if (ret == 0) {
+QTAILQ_INSERT_HEAD(queue-requests, request, entry);
+break;
+}
+
+qemu_free(request);
+}
+}
+
 void bdrv_register(BlockDriver *bdrv)
 {
 if (!bdrv-bdrv_aio_readv) {
@@ -642,6 +688,19 @@ int bdrv_open(BlockDriverState *bs, const char *filename, 
int flags,
 bs-change_cb(bs-change_opaque, CHANGE_MEDIA);
 }
 
+/* throttling disk I/O limits */
+if (bdrv_io_limits_enable(bs-io_limits)) {
+bs-req_from_queue = false;
+bs-block_queue= qemu_new_block_queue();
+bs-block_timer= qemu_new_timer_ns(vm_clock, bdrv_block_timer, bs);
+
+bs-slice_start[0] = qemu_get_clock_ns(vm_clock);
+bs-slice_start[1] = qemu_get_clock_ns(vm_clock);
+
+bs-slice_end[0]   = qemu_get_clock_ns(vm_clock) + BLOCK_IO_SLICE_TIME;
+bs-slice_end[1]   = qemu_get_clock_ns(vm_clock) + BLOCK_IO_SLICE_TIME;
+}
+
 return 0;
 
 unlink_and_fail:
@@ -680,6 +739,16 @@ void bdrv_close(BlockDriverState *bs)
 if (bs-change_cb)
 bs-change_cb(bs-change_opaque, CHANGE_MEDIA);
 }
+
+/* throttling disk I/O limits */
+if (bs-block_queue) {
+qemu_del_block_queue(bs-block_queue);
+}
+
+if (bs-block_timer) {
+qemu_del_timer(bs-block_timer);
+qemu_free_timer(bs-block_timer);
+}
 }
 
 void bdrv_close_all(void)
@@ -1312,6 +1381,14 @@ void bdrv_get_geometry_hint(BlockDriverState *bs,
 *psecs = bs-secs;
 }
 
+/* throttling disk io limits */
+void bdrv_set_io_limits(BlockDriverState *bs,
+BlockIOLimit *io_limits)
+{
+memset(bs-io_limits, 0, sizeof(BlockIOLimit));
+bs-io_limits = *io_limits;
+}
+
 /* Recognize floppy formats */
 typedef struct FDFormat {
 FDriveType drive;
@@ -2111,6 +2188,165 @@ char *bdrv_snapshot_dump(char *buf, int buf_size, 
QEMUSnapshotInfo *sn)
 return buf;
 }
 
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait) {
+uint64_t bps_limit = 0;
+double   bytes_limit, bytes_disp, bytes_res;
+double   slice_time, wait_time;
+
+if (bs-io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+bps_limit = bs-io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+} else if (bs-io_limits.bps[is_write]) {
+bps_limit = bs-io_limits.bps[is_write];
+} else {
+if (wait) {
+ 

Re: [Qemu-devel] [PATCH v4 3/3] The support for queue timer and throttling algorithm

2011-08-01 Thread Ryan Harper
* Zhi Yong Wu wu...@linux.vnet.ibm.com [2011-08-01 01:32]:
 Note:
   1.) When bps/iops limits are specified to a small value such as 511 
 bytes/s, this VM will hang up. We are considering how to handle this senario.
   2.) When dd command is issued in guest, if its option bs is set to a 
 large value such as bs=1024K, the result speed will slightly bigger than 
 the limits.
 
 For these problems, if you have nice thought, pls let us know.:)
 
 Signed-off-by: Zhi Yong Wu wu...@linux.vnet.ibm.com
 ---
  block.c |  302 
 +--
  block.h |1 -
  block_int.h |   29 ++
  3 files changed, 323 insertions(+), 9 deletions(-)
 
 diff --git a/block.c b/block.c
 index 24a25d5..42763a3 100644
 --- a/block.c
 +++ b/block.c
 @@ -29,6 +29,9 @@
  #include module.h
  #include qemu-objects.h
 
 +#include qemu-timer.h
 +#include block/blk-queue.h
 +
  #ifdef CONFIG_BSD
  #include sys/types.h
  #include sys/stat.h
 @@ -58,6 +61,13 @@ static int bdrv_read_em(BlockDriverState *bs, int64_t 
 sector_num,
  static int bdrv_write_em(BlockDriverState *bs, int64_t sector_num,
   const uint8_t *buf, int nb_sectors);
 
 +static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
 +bool is_write, double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
 +double elapsed_time, uint64_t *wait);
 +static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
 +bool is_write, uint64_t *wait);
 +
  static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
  QTAILQ_HEAD_INITIALIZER(bdrv_states);
 
 @@ -90,6 +100,20 @@ int is_windows_drive(const char *filename)
  }
  #endif
 
 +static int bdrv_io_limits_enable(BlockIOLimit *io_limits)
 +{
 +if ((io_limits-bps[0] == 0)
 +  (io_limits-bps[1] == 0)
 +  (io_limits-bps[2] == 0)
 +  (io_limits-iops[0] == 0)
 +  (io_limits-iops[1] == 0)
 +  (io_limits-iops[2] == 0)) {
 +return 0;


I'd add a field to BlockIOLimit structure, and just do:

 static int bdrv_io_limits_enabled(BlockIOLimit *io_limits)
 {
   return io_limist-enabled;
 }

Update bdrv_set_io_limits() to do the original check you have, and if
one of the fields is set, update the enabled flag.

We incur that logic on *every* request, so let's make it as cheap as
possible.

 +}
 +
 +return 1;
 +}
 +
  /* check if the path starts with protocol: */
  static int path_has_protocol(const char *path)
  {
 @@ -167,6 +191,28 @@ void path_combine(char *dest, int dest_size,
  }
  }
 
 +static void bdrv_block_timer(void *opaque)
 +{
 +BlockDriverState *bs = opaque;
 +BlockQueue *queue = bs-block_queue;
 +
 +while (!QTAILQ_EMPTY(queue-requests)) {
 +BlockIORequest *request = NULL;
 +int ret = 0;
 +
 +request = QTAILQ_FIRST(queue-requests);
 +QTAILQ_REMOVE(queue-requests, request, entry);
 +
 +ret = qemu_block_queue_handler(request);
 +if (ret == 0) {
 +QTAILQ_INSERT_HEAD(queue-requests, request, entry);
 +break;

btw, I did some tracing and I never saw a request get re-added here.
Now, ideally, I think you were try to do the following:

The request is coming from the queue, if we exceed our limits, then we'd
get back NULL from the handler and we'd requeue the request at the
head... but that doesn't actually happen.

Rather, if we exceed our limits, we invoke qemu_block_queue_enqueue()
again, which allocates a whole new request and puts it at the tail.
I think we want to update the throttling logic in readv/writev to return
NULL if bs-req_from_queue == true and we exceed the limits.  Then this
logic will do the right thing by inserting the request back to the head.


 +}
 +
 +qemu_free(request);


See my email to blk-queue.c on how we can eliminate free'ing the request
here.

 +}
 +}
 +
  void bdrv_register(BlockDriver *bdrv)
  {
  if (!bdrv-bdrv_aio_readv) {
 @@ -642,6 +688,19 @@ int bdrv_open(BlockDriverState *bs, const char 
 *filename, int flags,
  bs-change_cb(bs-change_opaque, CHANGE_MEDIA);
  }
 
 +/* throttling disk I/O limits */
 +if (bdrv_io_limits_enable(bs-io_limits)) {
 +bs-req_from_queue = false;
 +bs-block_queue= qemu_new_block_queue();
 +bs-block_timer= qemu_new_timer_ns(vm_clock, bdrv_block_timer, 
 bs);
 +
 +bs-slice_start[0] = qemu_get_clock_ns(vm_clock);
 +bs-slice_start[1] = qemu_get_clock_ns(vm_clock);
 +
 +bs-slice_end[0]   = qemu_get_clock_ns(vm_clock) + 
 BLOCK_IO_SLICE_TIME;
 +bs-slice_end[1]   = qemu_get_clock_ns(vm_clock) + 
 BLOCK_IO_SLICE_TIME;
 +}
 +
  return 0;
 
  unlink_and_fail:
 @@ -680,6 +739,16 @@ void bdrv_close(BlockDriverState *bs)
  if (bs-change_cb)
  bs-change_cb(bs-change_opaque, CHANGE_MEDIA);
  }
 +
 +/* throttling