Re: [PATCH v2 07/20] block/block-copy: add ratelimit to block-copy

2020-09-25 Thread Vladimir Sementsov-Ogievskiy

22.07.2020 14:05, Max Reitz wrote:

On 01.06.20 20:11, Vladimir Sementsov-Ogievskiy wrote:

We are going to directly use one async block-copy operation for backup
job, so we need rate limitator.


%s/limitator/limiter/g, I think.


We want to maintain current backup behavior: only background copying is
limited and copy-before-write operations only participate in limit
calculation. Therefore we need one rate limitator for block-copy state
and boolean flag for block-copy call state for actual limitation.

Note, that we can't just calculate each chunk in limitator after
successful copying: it will not save us from starting a lot of async
sub-requests which will exceed limit too much. Instead let's use the
following scheme on sub-request creation:
1. If at the moment limit is not exceeded, create the request and
account it immediately.
2. If at the moment limit is already exceeded, drop create sub-request
and handle limit instead (by sleep).
With this approach we'll never exceed the limit more than by one
sub-request (which pretty much matches current backup behavior).


Sounds reasonable.


Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  include/block/block-copy.h |  8 +++
  block/block-copy.c | 44 ++
  2 files changed, 52 insertions(+)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 600984c733..d40e691123 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
   int64_t max_chunk,
   BlockCopyAsyncCallbackFunc cb);
  
+/*

+ * Set speed limit for block-copy instance. All block-copy operations related 
to
+ * this BlockCopyState will participate in speed calculation, but only
+ * block_copy_async calls with @ratelimit=true will be actually limited.
+ */
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+  uint64_t speed);
+
  BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
  void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
  
diff --git a/block/block-copy.c b/block/block-copy.c

index 4114d1fd25..851d9c8aaf 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -26,6 +26,7 @@
  #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
  #define BLOCK_COPY_MAX_MEM (128 * MiB)
  #define BLOCK_COPY_MAX_WORKERS 64
+#define BLOCK_COPY_SLICE_TIME 1ULL /* ns */
  
  static coroutine_fn int block_copy_task_entry(AioTask *task);
  
@@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {

  int64_t bytes;
  int max_workers;
  int64_t max_chunk;
+bool ratelimit;
  BlockCopyAsyncCallbackFunc cb;
  
  /* State */

  bool failed;
  bool finished;
+QemuCoSleepState *sleep_state;
  
  /* OUT parameters */

  bool error_is_read;
@@ -103,6 +106,9 @@ typedef struct BlockCopyState {
  void *progress_opaque;
  
  SharedResource *mem;

+
+uint64_t speed;
+RateLimit rate_limit;
  } BlockCopyState;
  
  static BlockCopyTask *find_conflicting_task(BlockCopyState *s,

@@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
  }
  task->zeroes = ret & BDRV_BLOCK_ZERO;
  
+if (s->speed) {

+if (call_state->ratelimit) {
+uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+if (ns > 0) {
+block_copy_task_end(task, -EAGAIN);
+g_free(task);
+qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
+  &call_state->sleep_state);
+continue;
+}
+}
+
+ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+}
+


Looks good.


  trace_block_copy_process(s, task->offset);
  
  co_get_from_shres(s->mem, task->bytes);

@@ -649,6 +670,13 @@ out:
  return ret < 0 ? ret : found_dirty;
  }
  
+static void block_copy_kick(BlockCopyCallState *call_state)

+{
+if (call_state->sleep_state) {
+qemu_co_sleep_wake(call_state->sleep_state);
+}
+}
+
  /*
   * block_copy_common
   *
@@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
  .s = s,
  .offset = offset,
  .bytes = bytes,
+.ratelimit = ratelimit,


Hm, same problem/question as in patch 6: Should the @ratelimit parameter
really be added in patch 5 if it’s used only now?


  .cb = cb,
  .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
  .max_chunk = max_chunk,
@@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, 
bool skip)
  {
  s->skip_unallocated = skip;
  }
+
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+  uint64_t speed)
+{
+uint64_t old_speed = s->speed;
+
+

Re: [PATCH v2 07/20] block/block-copy: add ratelimit to block-copy

2020-07-22 Thread Max Reitz
On 01.06.20 20:11, Vladimir Sementsov-Ogievskiy wrote:
> We are going to directly use one async block-copy operation for backup
> job, so we need rate limitator.

%s/limitator/limiter/g, I think.

> We want to maintain current backup behavior: only background copying is
> limited and copy-before-write operations only participate in limit
> calculation. Therefore we need one rate limitator for block-copy state
> and boolean flag for block-copy call state for actual limitation.
> 
> Note, that we can't just calculate each chunk in limitator after
> successful copying: it will not save us from starting a lot of async
> sub-requests which will exceed limit too much. Instead let's use the
> following scheme on sub-request creation:
> 1. If at the moment limit is not exceeded, create the request and
> account it immediately.
> 2. If at the moment limit is already exceeded, drop create sub-request
> and handle limit instead (by sleep).
> With this approach we'll never exceed the limit more than by one
> sub-request (which pretty much matches current backup behavior).

Sounds reasonable.

> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> ---
>  include/block/block-copy.h |  8 +++
>  block/block-copy.c | 44 ++
>  2 files changed, 52 insertions(+)
> 
> diff --git a/include/block/block-copy.h b/include/block/block-copy.h
> index 600984c733..d40e691123 100644
> --- a/include/block/block-copy.h
> +++ b/include/block/block-copy.h
> @@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>   int64_t max_chunk,
>   BlockCopyAsyncCallbackFunc cb);
>  
> +/*
> + * Set speed limit for block-copy instance. All block-copy operations 
> related to
> + * this BlockCopyState will participate in speed calculation, but only
> + * block_copy_async calls with @ratelimit=true will be actually limited.
> + */
> +void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
> +  uint64_t speed);
> +
>  BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
>  void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
>  
> diff --git a/block/block-copy.c b/block/block-copy.c
> index 4114d1fd25..851d9c8aaf 100644
> --- a/block/block-copy.c
> +++ b/block/block-copy.c
> @@ -26,6 +26,7 @@
>  #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
>  #define BLOCK_COPY_MAX_MEM (128 * MiB)
>  #define BLOCK_COPY_MAX_WORKERS 64
> +#define BLOCK_COPY_SLICE_TIME 1ULL /* ns */
>  
>  static coroutine_fn int block_copy_task_entry(AioTask *task);
>  
> @@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
>  int64_t bytes;
>  int max_workers;
>  int64_t max_chunk;
> +bool ratelimit;
>  BlockCopyAsyncCallbackFunc cb;
>  
>  /* State */
>  bool failed;
>  bool finished;
> +QemuCoSleepState *sleep_state;
>  
>  /* OUT parameters */
>  bool error_is_read;
> @@ -103,6 +106,9 @@ typedef struct BlockCopyState {
>  void *progress_opaque;
>  
>  SharedResource *mem;
> +
> +uint64_t speed;
> +RateLimit rate_limit;
>  } BlockCopyState;
>  
>  static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
> @@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
>  }
>  task->zeroes = ret & BDRV_BLOCK_ZERO;
>  
> +if (s->speed) {
> +if (call_state->ratelimit) {
> +uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
> +if (ns > 0) {
> +block_copy_task_end(task, -EAGAIN);
> +g_free(task);
> +qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
> +  &call_state->sleep_state);
> +continue;
> +}
> +}
> +
> +ratelimit_calculate_delay(&s->rate_limit, task->bytes);
> +}
> +

Looks good.

>  trace_block_copy_process(s, task->offset);
>  
>  co_get_from_shres(s->mem, task->bytes);
> @@ -649,6 +670,13 @@ out:
>  return ret < 0 ? ret : found_dirty;
>  }
>  
> +static void block_copy_kick(BlockCopyCallState *call_state)
> +{
> +if (call_state->sleep_state) {
> +qemu_co_sleep_wake(call_state->sleep_state);
> +}
> +}
> +
>  /*
>   * block_copy_common
>   *
> @@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
>  .s = s,
>  .offset = offset,
>  .bytes = bytes,
> +.ratelimit = ratelimit,

Hm, same problem/question as in patch 6: Should the @ratelimit parameter
really be added in patch 5 if it’s used only now?

>  .cb = cb,
>  .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
>  .max_chunk = max_chunk,
> @@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, 
> bool skip)
>  {
>  s->skip_unallocated = skip;
>  }
> +
> +void block_

[PATCH v2 07/20] block/block-copy: add ratelimit to block-copy

2020-06-01 Thread Vladimir Sementsov-Ogievskiy
We are going to directly use one async block-copy operation for backup
job, so we need rate limitator.

We want to maintain current backup behavior: only background copying is
limited and copy-before-write operations only participate in limit
calculation. Therefore we need one rate limitator for block-copy state
and boolean flag for block-copy call state for actual limitation.

Note, that we can't just calculate each chunk in limitator after
successful copying: it will not save us from starting a lot of async
sub-requests which will exceed limit too much. Instead let's use the
following scheme on sub-request creation:
1. If at the moment limit is not exceeded, create the request and
account it immediately.
2. If at the moment limit is already exceeded, drop create sub-request
and handle limit instead (by sleep).
With this approach we'll never exceed the limit more than by one
sub-request (which pretty much matches current backup behavior).

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/block-copy.h |  8 +++
 block/block-copy.c | 44 ++
 2 files changed, 52 insertions(+)

diff --git a/include/block/block-copy.h b/include/block/block-copy.h
index 600984c733..d40e691123 100644
--- a/include/block/block-copy.h
+++ b/include/block/block-copy.h
@@ -59,6 +59,14 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
  int64_t max_chunk,
  BlockCopyAsyncCallbackFunc cb);
 
+/*
+ * Set speed limit for block-copy instance. All block-copy operations related 
to
+ * this BlockCopyState will participate in speed calculation, but only
+ * block_copy_async calls with @ratelimit=true will be actually limited.
+ */
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+  uint64_t speed);
+
 BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
 void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
 
diff --git a/block/block-copy.c b/block/block-copy.c
index 4114d1fd25..851d9c8aaf 100644
--- a/block/block-copy.c
+++ b/block/block-copy.c
@@ -26,6 +26,7 @@
 #define BLOCK_COPY_MAX_BUFFER (1 * MiB)
 #define BLOCK_COPY_MAX_MEM (128 * MiB)
 #define BLOCK_COPY_MAX_WORKERS 64
+#define BLOCK_COPY_SLICE_TIME 1ULL /* ns */
 
 static coroutine_fn int block_copy_task_entry(AioTask *task);
 
@@ -36,11 +37,13 @@ typedef struct BlockCopyCallState {
 int64_t bytes;
 int max_workers;
 int64_t max_chunk;
+bool ratelimit;
 BlockCopyAsyncCallbackFunc cb;
 
 /* State */
 bool failed;
 bool finished;
+QemuCoSleepState *sleep_state;
 
 /* OUT parameters */
 bool error_is_read;
@@ -103,6 +106,9 @@ typedef struct BlockCopyState {
 void *progress_opaque;
 
 SharedResource *mem;
+
+uint64_t speed;
+RateLimit rate_limit;
 } BlockCopyState;
 
 static BlockCopyTask *find_conflicting_task(BlockCopyState *s,
@@ -611,6 +617,21 @@ block_copy_dirty_clusters(BlockCopyCallState *call_state)
 }
 task->zeroes = ret & BDRV_BLOCK_ZERO;
 
+if (s->speed) {
+if (call_state->ratelimit) {
+uint64_t ns = ratelimit_calculate_delay(&s->rate_limit, 0);
+if (ns > 0) {
+block_copy_task_end(task, -EAGAIN);
+g_free(task);
+qemu_co_sleep_ns_wakeable(QEMU_CLOCK_REALTIME, ns,
+  &call_state->sleep_state);
+continue;
+}
+}
+
+ratelimit_calculate_delay(&s->rate_limit, task->bytes);
+}
+
 trace_block_copy_process(s, task->offset);
 
 co_get_from_shres(s->mem, task->bytes);
@@ -649,6 +670,13 @@ out:
 return ret < 0 ? ret : found_dirty;
 }
 
+static void block_copy_kick(BlockCopyCallState *call_state)
+{
+if (call_state->sleep_state) {
+qemu_co_sleep_wake(call_state->sleep_state);
+}
+}
+
 /*
  * block_copy_common
  *
@@ -729,6 +757,7 @@ BlockCopyCallState *block_copy_async(BlockCopyState *s,
 .s = s,
 .offset = offset,
 .bytes = bytes,
+.ratelimit = ratelimit,
 .cb = cb,
 .max_workers = max_workers ?: BLOCK_COPY_MAX_WORKERS,
 .max_chunk = max_chunk,
@@ -752,3 +781,18 @@ void block_copy_set_skip_unallocated(BlockCopyState *s, 
bool skip)
 {
 s->skip_unallocated = skip;
 }
+
+void block_copy_set_speed(BlockCopyState *s, BlockCopyCallState *call_state,
+  uint64_t speed)
+{
+uint64_t old_speed = s->speed;
+
+s->speed = speed;
+if (speed > 0) {
+ratelimit_set_speed(&s->rate_limit, speed, BLOCK_COPY_SLICE_TIME);
+}
+
+if (call_state && old_speed && (speed > old_speed || speed == 0)) {
+block_copy_kick(call_state);
+}
+}
-- 
2.21.0