Re: [Qemu-devel] rate limiting issues

2018-02-06 Thread Stefan Hajnoczi
On Tue, Feb 06, 2018 at 11:54:51AM +0100, Wolfgang Bumiller wrote:
> On Mon, Feb 05, 2018 at 03:31:40PM +, Stefan Hajnoczi wrote:
> > On Fri, Feb 02, 2018 at 12:10:22PM +0100, Wolfgang Bumiller wrote:
> (...)
> > > Explanation:
> > > The ratelimiting code in include/qemu/ratelimit.h currently uses slices 
> > > with
> > > quotas. Finishing up the quota for one slice means it'll wait for the end 
> > > of
> > > this _and_ the next slice before resetting the accounting and start over.
> > > If that first slice was exceeded by only a tiny bit, we effectively spend 
> > > every
> > > second slice waiting around. before starting over.
> (...)
> > > typedef struct {
> > > int64_t last_time;
> > > uint64_t speed;
> > > int64_t allowed;
> > > } RateLimit;
> > > 
> > > static inline int64_t ratelimit_calculate_delay(RateLimit *limit, 
> > > uint64_t n)
> > > {
> (... wrong code)
> > > }
> > 
> > This does not implement a rate limit, at least not in the way that users
> > expect:
> > 
> > Imagine there is no activity for 24 hours.  We accumulate a huge number
> > of allowed units and can exceed the rate limit for some time.
> 
> Indeed, I forgot that blockjobs can be paused, which affects both
> versions.
> So we need slices, but do they need to be aligned? Changing the current
> code to not try align the waiting period with where a slice would start
> also seems to work better...
> 
> ---8<---
> From e50097e88a04eae0d1d40bed5fb940dc4baa835d Mon Sep 17 00:00:00 2001
> From: Wolfgang Bumiller 
> Date: Tue, 6 Feb 2018 11:34:34 +0100
> Subject: [PATCH] ratelimit: don't align wait time with slices
> 
> It is possible for rate limited writes to keep overshooting a slice's
> quota by a tiny amount causing the slice-aligned waiting period to
> effectively halve the rate.
> 
> Signed-off-by: Wolfgang Bumiller 
> ---
>  include/qemu/ratelimit.h | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 
> diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h
> index 8da1232574..01a5d535ec 100644
> --- a/include/qemu/ratelimit.h
> +++ b/include/qemu/ratelimit.h
> @@ -35,7 +35,7 @@ typedef struct {
>  static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
>  {
>  int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> -uint64_t delay_slices;
> +double delay_slices;
>  
>  assert(limit->slice_quota && limit->slice_ns);
>  
> @@ -54,12 +54,11 @@ static inline int64_t ratelimit_calculate_delay(RateLimit 
> *limit, uint64_t n)
>  return 0;
>  }
>  
> -/* Quota exceeded. Calculate the next time slice we may start
> - * sending data again. */
> -delay_slices = (limit->dispatched + limit->slice_quota - 1) /
> -limit->slice_quota;
> +/* Quota exceeded. Wait based on the excess amount and then start a new
> + * slice. */
> +delay_slices = (double)limit->dispatched / limit->slice_quota;
>  limit->slice_end_time = limit->slice_start_time +
> -delay_slices * limit->slice_ns;
> +(uint64_t)(delay_slices * limit->slice_ns);
>  return limit->slice_end_time - now;
>  }

Looks good, thanks!  Please send the patch as a top-level email thread
so it can be merged:
https://wiki.qemu.org/Contribute/SubmitAPatch

Stefan


signature.asc
Description: PGP signature


Re: [Qemu-devel] rate limiting issues

2018-02-06 Thread Wolfgang Bumiller
On Mon, Feb 05, 2018 at 03:31:40PM +, Stefan Hajnoczi wrote:
> On Fri, Feb 02, 2018 at 12:10:22PM +0100, Wolfgang Bumiller wrote:
(...)
> > Explanation:
> > The ratelimiting code in include/qemu/ratelimit.h currently uses slices with
> > quotas. Finishing up the quota for one slice means it'll wait for the end of
> > this _and_ the next slice before resetting the accounting and start over.
> > If that first slice was exceeded by only a tiny bit, we effectively spend 
> > every
> > second slice waiting around. before starting over.
(...)
> > typedef struct {
> > int64_t last_time;
> > uint64_t speed;
> > int64_t allowed;
> > } RateLimit;
> > 
> > static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t 
> > n)
> > {
(... wrong code)
> > }
> 
> This does not implement a rate limit, at least not in the way that users
> expect:
> 
> Imagine there is no activity for 24 hours.  We accumulate a huge number
> of allowed units and can exceed the rate limit for some time.

Indeed, I forgot that blockjobs can be paused, which affects both
versions.
So we need slices, but do they need to be aligned? Changing the current
code to not try align the waiting period with where a slice would start
also seems to work better...

---8<---
>From e50097e88a04eae0d1d40bed5fb940dc4baa835d Mon Sep 17 00:00:00 2001
From: Wolfgang Bumiller 
Date: Tue, 6 Feb 2018 11:34:34 +0100
Subject: [PATCH] ratelimit: don't align wait time with slices

It is possible for rate limited writes to keep overshooting a slice's
quota by a tiny amount causing the slice-aligned waiting period to
effectively halve the rate.

Signed-off-by: Wolfgang Bumiller 
---
 include/qemu/ratelimit.h | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/include/qemu/ratelimit.h b/include/qemu/ratelimit.h
index 8da1232574..01a5d535ec 100644
--- a/include/qemu/ratelimit.h
+++ b/include/qemu/ratelimit.h
@@ -35,7 +35,7 @@ typedef struct {
 static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
 {
 int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
-uint64_t delay_slices;
+double delay_slices;
 
 assert(limit->slice_quota && limit->slice_ns);
 
@@ -54,12 +54,11 @@ static inline int64_t ratelimit_calculate_delay(RateLimit 
*limit, uint64_t n)
 return 0;
 }
 
-/* Quota exceeded. Calculate the next time slice we may start
- * sending data again. */
-delay_slices = (limit->dispatched + limit->slice_quota - 1) /
-limit->slice_quota;
+/* Quota exceeded. Wait based on the excess amount and then start a new
+ * slice. */
+delay_slices = (double)limit->dispatched / limit->slice_quota;
 limit->slice_end_time = limit->slice_start_time +
-delay_slices * limit->slice_ns;
+(uint64_t)(delay_slices * limit->slice_ns);
 return limit->slice_end_time - now;
 }
 
-- 
2.11.0





Re: [Qemu-devel] rate limiting issues

2018-02-05 Thread Stefan Hajnoczi
On Fri, Feb 02, 2018 at 12:10:22PM +0100, Wolfgang Bumiller wrote:
> Summary:
> Rate limit is effectively halved when the size of written chunks adds up to
> exceeding the quota of a slice only slightly. This is surprisingly reliable.
> 
> Explanation:
> The ratelimiting code in include/qemu/ratelimit.h currently uses slices with
> quotas. Finishing up the quota for one slice means it'll wait for the end of
> this _and_ the next slice before resetting the accounting and start over.
> If that first slice was exceeded by only a tiny bit, we effectively spend 
> every
> second slice waiting around. before starting over.
> 
> Here if I use a limit of 3KiB/s I get 3KiB/s.
> Increasing the limit to 30700KiB/s gives me 30700KiB/s.
> Increasing it to 30720KiB/s reliably gives me 15000KiB/s.
> 
> Making it wait to the end of only the current slice means the excess data is 
> not
> counted at all and we may go above the limit (though by at most one 
> write-chunk,
> so I'm not sure if that's fine for most of the users, for backup jobs it seems
> to be 64k always).
> 
> I'd like to fix this and am unsure about which way to go. On the one hand I
> think the old code (before f14a39ccb922) may be fixable in a better way by not
> resetting the accounting completely but subtracting the amount of data the
> wait-period would have added.
> 
> At the same time, though, this could be simplified to not using slices but
> always comparing the amount of actually written data to the amount of data
> which should at most have been written.
> 
> Here are two approaches which seem to fix my issues:
> 
> --- Old code revised:
> 
> typedef struct {
> int64_t next_slice_time;
> uint64_t slice_quota;
> uint64_t slice_ns;
> int64_t dispatched;
> } RateLimit;
> 
> static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
> {
> int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> 
> assert(limit->slice_quota && limit->slice_ns);
> 
> if (limit->next_slice_time == 0) { /* first call */
> limit->dispatched = 0;
> limit->next_slice_time = now + limit->slice_ns;
> return 0;
> }
> 
> if (limit->next_slice_time < now) {
> uint64_t passed_slices = DIV_ROUND_UP(now - limit->next_slice_time,
> limit->slice_ns);
> limit->next_slice_time = now + limit->slice_ns;
> limit->dispatched -= passed_slices * limit->slice_quota;
> }
> limit->dispatched += n;
> if (limit->dispatched+n <= limit->slice_quota) {

This looks buggy.  n is added twice?

> return 0;
> }
> return limit->next_slice_time - now;
> }
> 
> static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed,
>uint64_t slice_ns)
> {
> limit->slice_ns = slice_ns;
> limit->slice_quota = MAX(((double)speed * slice_ns) / 10ULL, 1);
> }
> 
> ---
> 
> And this is a short slice-less version. I wonder if there's any particular
> reason for sticking to slices?
> 
> --- Version without slices:
> 
> typedef struct {
> int64_t last_time;
> uint64_t speed;
> int64_t allowed;
> } RateLimit;
> 
> static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
> {
> int64_t delta, now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> 
> if (limit->last_time == 0) { /* first call */
> limit->allowed = -n;
> limit->last_time = now;
> return (n * 10ULL) / limit->speed;
> }
> 
> delta = (now - limit->last_time);
> limit->allowed += (delta * limit->speed)/10ULL - n;
> limit->last_time = now;
> if (limit->allowed < 0) {
> return ((uint64_t)-limit->allowed * 10ULL) / limit->speed;
> }
> return 0;
> }

This does not implement a rate limit, at least not in the way that users
expect:

Imagine there is no activity for 24 hours.  We accumulate a huge number
of allowed units and can exceed the rate limit for some time.


signature.asc
Description: PGP signature


Re: [Qemu-devel] rate limiting issues

2018-02-05 Thread Stefan Hajnoczi
On Fri, Feb 02, 2018 at 04:52:00PM -0500, John Snow wrote:
> CCing qemu-block and Berto

To avoid confusion: ratelimit.h is only used by block jobs.  It is
unrelated to throttle.h, which Berto maintains.

Unifying the two came up in the past as a cleanup task.  No one has
attempted it yet.


signature.asc
Description: PGP signature


Re: [Qemu-devel] rate limiting issues

2018-02-02 Thread John Snow
CCing qemu-block and Berto

On 02/02/2018 06:10 AM, Wolfgang Bumiller wrote:
> Summary:
> Rate limit is effectively halved when the size of written chunks adds up to
> exceeding the quota of a slice only slightly. This is surprisingly reliable.
> 
> Explanation:
> The ratelimiting code in include/qemu/ratelimit.h currently uses slices with
> quotas. Finishing up the quota for one slice means it'll wait for the end of
> this _and_ the next slice before resetting the accounting and start over.
> If that first slice was exceeded by only a tiny bit, we effectively spend 
> every
> second slice waiting around. before starting over.
> 
> Here if I use a limit of 3KiB/s I get 3KiB/s.
> Increasing the limit to 30700KiB/s gives me 30700KiB/s.
> Increasing it to 30720KiB/s reliably gives me 15000KiB/s.
> 
> Making it wait to the end of only the current slice means the excess data is 
> not
> counted at all and we may go above the limit (though by at most one 
> write-chunk,
> so I'm not sure if that's fine for most of the users, for backup jobs it seems
> to be 64k always).
> 
> I'd like to fix this and am unsure about which way to go. On the one hand I
> think the old code (before f14a39ccb922) may be fixable in a better way by not
> resetting the accounting completely but subtracting the amount of data the
> wait-period would have added.
> 
> At the same time, though, this could be simplified to not using slices but
> always comparing the amount of actually written data to the amount of data
> which should at most have been written.
> 
> Here are two approaches which seem to fix my issues:
> 
> --- Old code revised:
> 
> typedef struct {
> int64_t next_slice_time;
> uint64_t slice_quota;
> uint64_t slice_ns;
> int64_t dispatched;
> } RateLimit;
> 
> static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
> {
> int64_t now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> 
> assert(limit->slice_quota && limit->slice_ns);
> 
> if (limit->next_slice_time == 0) { /* first call */
> limit->dispatched = 0;
> limit->next_slice_time = now + limit->slice_ns;
> return 0;
> }
> 
> if (limit->next_slice_time < now) {
> uint64_t passed_slices = DIV_ROUND_UP(now - limit->next_slice_time,
> limit->slice_ns);
> limit->next_slice_time = now + limit->slice_ns;
> limit->dispatched -= passed_slices * limit->slice_quota;
> }
> limit->dispatched += n;
> if (limit->dispatched+n <= limit->slice_quota) {
> return 0;
> }
> return limit->next_slice_time - now;
> }
> 
> static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed,
>uint64_t slice_ns)
> {
> limit->slice_ns = slice_ns;
> limit->slice_quota = MAX(((double)speed * slice_ns) / 10ULL, 1);
> }
> 
> ---
> 
> And this is a short slice-less version. I wonder if there's any particular
> reason for sticking to slices?
> 
> --- Version without slices:
> 
> typedef struct {
> int64_t last_time;
> uint64_t speed;
> int64_t allowed;
> } RateLimit;
> 
> static inline int64_t ratelimit_calculate_delay(RateLimit *limit, uint64_t n)
> {
> int64_t delta, now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
> 
> if (limit->last_time == 0) { /* first call */
> limit->allowed = -n;
> limit->last_time = now;
> return (n * 10ULL) / limit->speed;
> }
> 
> delta = (now - limit->last_time);
> limit->allowed += (delta * limit->speed)/10ULL - n;
> limit->last_time = now;
> if (limit->allowed < 0) {
> return ((uint64_t)-limit->allowed * 10ULL) / limit->speed;
> }
> return 0;
> }
> 
> static inline void ratelimit_set_speed(RateLimit *limit, uint64_t speed,
>uint64_t slice_ns)
> {
> (void)slice_ns; // TODO: remove
> limit->speed = speed;
> }
> 
> ---
> 
> Numerical note: a small delta means 'allowed' is incremented by 0, which
> should be fine since when we hit the quota, we'll have a longer wait after
> which the delta is for sure big enough to produce positive values.
> (I tried larger and smaller values (1KiB/s to some MiB/s)).
> Alternatively we could set last_time and do the quota increment
> conditionally only when the delta is big enough, but I have not found
> this to be necessary in my tests.
> 
>