On Mon, Aug 12, 2013 at 06:53:12PM +0200, Benoît Canet wrote: > +#ifndef THROTTLING_H > +#define THROTTLING_H
THROTTLE_H > + > +#include <stdint.h> > +#include "qemu-common.h" > +#include "qemu/timer.h" > + > +#define NANOSECONDS_PER_SECOND 1000000000.0 > + > +#define BUCKETS_COUNT 6 > + > +typedef enum { > + THROTTLE_BPS_TOTAL = 0, > + THROTTLE_BPS_READ = 1, > + THROTTLE_BPS_WRITE = 2, > + THROTTLE_OPS_TOTAL = 3, > + THROTTLE_OPS_READ = 4, > + THROTTLE_OPS_WRITE = 5, > +} BucketType; > + > +typedef struct LeakyBucket { > + double ups; /* units per second */ > + double max; /* leaky bucket max in units */ > + double bucket; /* bucket in units */ These comments aren't very clear to me :). So I guess bps or iops would be in ups. Max would be the total budget or maximum burst. Bucket might be the current level. > +} LeakyBucket; > + > +/* The following structure is used to configure a ThrottleState > + * It contains a bit of state: the bucket field of the LeakyBucket structure. > + * However it allows to keep the code clean and the bucket field is reset to > + * zero at the right time. > + */ > +typedef struct ThrottleConfig { > + LeakyBucket buckets[6]; /* leaky buckets */ s/6/THROTTLE_TYPE_MAX/ > + uint64_t unit_size; /* size of an unit in bytes */ > + uint64_t op_size; /* size of an operation in units */ It's not clear yet why we need both unit_size *and* op_size. I thought you would have a single granularity field for accounting big requests as multiple iops. > +/* This function make a bucket leak > + * > + * @bkt: the bucket to make leak > + * @delta: the time delta delta is in nanoseconds. Probably best to call it delta_ns. > +/* destroy a timer */ > +static void throttle_timer_destroy(QEMUTimer **timer) > +{ > + assert(*timer != NULL); > + > + if (qemu_timer_pending(*timer)) { > + qemu_del_timer(*timer); > + } You can always call qemu_del_timer(), the timer doesn't need to be pending. > +/* fix bucket parameters */ > +static void throttle_fix_bucket(LeakyBucket *bkt) > +{ > + double min = bkt->ups / 10; > + /* zero bucket level */ > + bkt->bucket = 0; > + > + /* take care of not using cpu and also improve throttling precision */ > + if (bkt->ups && > + bkt->max < min) { > + bkt->max = min; > + } > +} This function seems like magic. What is really going on here? Why divide by 10 and when does this case happen? > + > +/* take care of canceling a timer */ > +static void throttle_cancel_timer(QEMUTimer *timer) > +{ > + assert(timer != NULL); > + if (!qemu_timer_pending(timer)) { > + return; > + } No need to check pending first.