On Sun, May 10, 2020 at 12:19 AM William Tu <[email protected]> wrote:
>
> On Thu, Apr 30, 2020 at 07:00:36PM +0800, [email protected] wrote:
> > From: Tonghao Zhang <[email protected]>
> >
> > For now, ovs-vswitchd use the array of the dp_meter struct
> > to store meter's data, and at most, there are only 65536
> > (defined by MAX_METERS) meters that can be used. But in some
> > case, for example, in the edge gateway, we should use 200,000,
> > at least, meters for IP address bandwidth limitation.
> > Every one IP address will use two meters for its rx and tx
> > path[1]. In other way, ovs-vswitchd should support meter-offload
> > (rte_mtr_xxx api introduced by dpdk.), but there are more than
> > 65536 meters in the hardware, such as Mellanox ConnectX-6.
> >
> > This patch use array to manage the meter, but it can ben expanded.
> >
> > [1].
> > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
>
> I'm just curious why you need so many 'unique' meters?
> can you share the meter id if their settings are the same?
Hi William, thanks for your review.
In our production environment, we use the OVS-DPDK as a gateway. In
that gateway, we
use the meter to limit bandwidth for inbound/outbound for every IP
address of public network.

In other gateway, we also use shared meter for SNAT case.
> ex:
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:X ,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:X, output:p0
>
> if both flows have the same meter setup in X.
>
> >
> > Cc: Ilya Maximets <[email protected]>
> > Cc: William Tu <[email protected]>
> > Cc: Jarno Rajahalme <[email protected]>
> > Cc: Ben Pfaff <[email protected]>
> > Cc: Andy Zhou <[email protected]>
> > Signed-off-by: Tonghao Zhang <[email protected]>
> > ---
>
> Thanks for working on both kernel and userspace datapath!
> Also add Pravin in the loop.
>
> >  lib/dpif-netdev.c | 320 
> > ++++++++++++++++++++++++++++++++++++++++++------------
> >  1 file changed, 251 insertions(+), 69 deletions(-)
> >
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> > index ef14e83b5f06..17c0241aa2e2 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
> >
> >  /* Configuration parameters. */
> >  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. 
> > */
> > -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> > -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> > -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> > +
> > +/* Maximum number of meters in the table. */
> > +#define METER_ENTRY_MAX (200000ULL)
> > +/* Maximum number of bands / meter. */
> > +#define METER_BAND_MAX  (8)
> > +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)
> >
> >  COVERAGE_DEFINE(datapath_drop_meter);
> >  COVERAGE_DEFINE(datapath_drop_upcall_error);
> > @@ -283,12 +286,25 @@ struct dp_meter {
> >      uint16_t flags;
> >      uint16_t n_bands;
> >      uint32_t max_delta_t;
> > +    uint32_t id;
> > +    struct ovs_mutex lock;
> >      uint64_t used;
> >      uint64_t packet_count;
> >      uint64_t byte_count;
> >      struct dp_meter_band bands[];
> >  };
> >
> > +struct dp_meter_instance {
> > +    uint32_t n_meters;
> > +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
> usually we add a comments here, saying
>         /* Followed by
>      *    struct dp_meter[n];
>      * where n is the n_meters.
>      */
OK
> > +};
> > +
> > +struct dp_meter_table {
> > +    OVSRCU_TYPE(struct dp_meter_instance *) ti;
> > +    uint32_t count;
> > +    struct ovs_mutex lock;
> > +};
> > +
> >  struct pmd_auto_lb {
> >      bool auto_lb_requested;     /* Auto load balancing requested by user. 
> > */
> >      bool is_enabled;            /* Current status of Auto load balancing. 
> > */
> > @@ -329,8 +345,7 @@ struct dp_netdev {
> >      atomic_uint32_t tx_flush_interval;
> >
> >      /* Meters. */
> > -    struct ovs_mutex meter_locks[N_METER_LOCKS];
>
> Why removing the multiple locks and using only one lock?
> Do you see any performance overhead when switching to sinlge lock?
As I know, without this patch, multiple meters may share the one lock.
N_METER_LOCKS == 64, but MAX_METERS == 65535.
so in my patch, every meter has itself lock to protect
stats/buckets/timestamp of that meter.

> > -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> > +    struct dp_meter_table meter_tbl;
> >
> >      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
> >      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> > @@ -378,19 +393,6 @@ struct dp_netdev {
> >      struct pmd_auto_lb pmd_alb;
> >  };
> >
> > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> > -}
> > -
> > -
> >  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev 
> > *dp,
> >                                                      odp_port_t)
> >      OVS_REQUIRES(dp->port_mutex);
> > @@ -1523,6 +1525,9 @@ choose_port(struct dp_netdev *dp, const char *name)
> >      return ODPP_NONE;
> >  }
> >
> > +static void dp_netdev_meter_init(struct dp_meter_table *tbl);
> > +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);
> Maybe move them up?
The function about meter, I put them together, include that
dp_netdev_meter_init/destroy as below.
But I am fine move them up.
> > +
> >  static int
> >  create_dp_netdev(const char *name, const struct dpif_class *class,
> >                   struct dp_netdev **dpp)
> > @@ -1556,9 +1561,7 @@ create_dp_netdev(const char *name, const struct 
> > dpif_class *class,
> >      dp->reconfigure_seq = seq_create();
> >      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
> >
> > -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_init(&dp->meter_tbl);
> >
> >      /* Disable upcalls by default. */
> >      dp_netdev_disable_upcall(dp);
> > @@ -1647,16 +1650,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
> >      fat_rwlock_destroy(&dp->upcall_rwlock);
> >  }
> >
> > -static void
> > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> > -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> > -{
> > -    if (dp->meters[meter_id]) {
> > -        free(dp->meters[meter_id]);
> > -        dp->meters[meter_id] = NULL;
> > -    }
> > -}
> > -
> >  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
> >   * through the 'dp_netdevs' shash while freeing 'dp'. */
> >  static void
> > @@ -1694,16 +1687,7 @@ dp_netdev_free(struct dp_netdev *dp)
> >      /* Upcalls must be disabled at this point */
> >      dp_netdev_destroy_upcall_lock(dp);
> >
> > -    int i;
> > -
> > -    for (i = 0; i < MAX_METERS; ++i) {
> > -        meter_lock(dp, i);
> > -        dp_delete_meter(dp, i);
> > -        meter_unlock(dp, i);
> > -    }
> > -    for (i = 0; i < N_METER_LOCKS; ++i) {
> > -        ovs_mutex_destroy(&dp->meter_locks[i]);
> > -    }
> > +    dp_netdev_meter_destroy(&dp->meter_tbl);
> >
> >      free(dp->pmd_cmask);
> >      free(CONST_CAST(char *, dp->name));
> > @@ -5713,14 +5697,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
> >
> >
> >  /* Meters */
> > +static uint32_t
> > +meter_hash(struct dp_meter_instance *ti, uint32_t id)
> > +{
> > +    uint32_t n_meters = ti->n_meters;
> > +
> > +    return id % n_meters;
> > +}
> > +
> > +static void
> > +dp_meter_free(struct dp_meter *meter)
> > +{
> > +    ovs_mutex_destroy(&meter->lock);
> > +    free(meter);
> > +}
> > +
> > +static struct dp_meter_instance *
> > +dp_meter_instance_alloc(const uint32_t size)
> > +{
> > +    struct dp_meter_instance *ti;
> > +
> > +    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);
> > +    ti->n_meters = size;
> > +
> > +    return ti;
> > +}
> > +
> > +static void
> > +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> > +{
> > +    struct dp_meter_instance *new_ti;
> > +    struct dp_meter_instance *ti;
> > +    int n_meters;
> > +    int i;
> > +
> > +    new_ti = dp_meter_instance_alloc(size);
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> > +    n_meters = MIN(size, ti->n_meters);
> > +
> > +    for (i = 0; i < n_meters; i++) {
> > +        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> > +            new_ti->dp_meters[i] = ti->dp_meters[i];
> > +        }
> > +    }
> > +
> > +    ovsrcu_set(&tbl->ti, new_ti);
> > +    ovsrcu_postpone(free, ti);
> > +}
> > +
> > +static void
> > +dp_meter_instance_insert(struct dp_meter_instance *ti,
> > +                         struct dp_meter *meter)
> > +{
> > +    uint32_t hash;
> > +
> > +    hash = meter_hash(ti, meter->id);
> > +    ovsrcu_set(&ti->dp_meters[hash], meter);
> > +}
> > +
> > +static void
> > +dp_meter_instance_remove(struct dp_meter_instance *ti,
> > +                         struct dp_meter *meter)
> > +{
> > +    uint32_t hash;
> > +
> > +    hash = meter_hash(ti, meter->id);
> > +    ovsrcu_set(&ti->dp_meters[hash], NULL);
> > +}
> > +
> > +static void
> > +dp_netdev_meter_init(struct dp_meter_table *tbl)
> > +{
> > +    struct dp_meter_instance *ti;
> > +
> > +    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> > +    ovsrcu_set(&tbl->ti, ti);
> > +
> > +    ovs_mutex_init(&tbl->lock);
> > +    tbl->count = 0;
> > +}
> > +
> > +static void
> > +dp_netdev_meter_destroy(struct dp_meter_table *tbl)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    int i;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> > +    for (i = 0; i < ti->n_meters; i++) {
> > +        struct dp_meter *meter;
> > +
> > +        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
> > +        if (meter) {
> > +            ovsrcu_postpone(dp_meter_free, meter);
> > +        }
> > +    }
> > +
> > +    ovsrcu_postpone(free, ti);
> > +    ovs_mutex_destroy(&tbl->lock);
> > +}
> > +
> > +static struct dp_meter *
> > +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    struct dp_meter *meter;
> > +    uint32_t hash;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    hash = meter_hash(ti, meter_id);
> > +
> > +    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
> > +    if (meter && meter->id == meter_id) {
> > +        return meter;
> > +    }
> > +
> > +    return NULL;
> > +}
> > +
> > +static void
> > +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> > +                     OVS_REQUIRES(meter_tbl->lock)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    struct dp_meter *meter;
> > +
> > +    meter = dp_meter_lookup(meter_tbl, meter_id);
> > +    if (!meter) {
> > +        return;
> > +    }
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    dp_meter_instance_remove(ti, meter);
> > +    ovsrcu_postpone(dp_meter_free, meter);
> > +
> > +    meter_tbl->count--;
> > +    /* Shrink the meter array if necessary. */
> > +    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> > +        meter_tbl->count <= (ti->n_meters / 4)) {
> > +        int half_size = ti->n_meters / 2;
> > +        int i;
> > +
> > +        /* Avoid hash collision, don't move slots to other place.
> > +         * Make sure there are no references of meters in array
> > +         * which will be released.
> > +         */
> > +        for (i = half_size; i < ti->n_meters; i++) {
> > +            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> > +                return;
> > +            }
> > +        }
> > +
> > +        dp_meter_instance_realloc(meter_tbl, half_size);
> > +    }
> > +}
> > +
> > +static int
> > +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
> > +                OVS_REQUIRES(meter_tbl->lock)
> > +{
> > +    struct dp_meter_instance *ti;
> > +    uint32_t hash;
> > +
> > +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> > +    hash = meter_hash(ti, meter->id);
> > +
> > +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> > +                                &ti->dp_meters[hash]))) {
> > +        VLOG_WARN("When installing meter to %u/%u slot, but it's used.\n",
> > +                  hash, ti->n_meters);
> how about
> VLOG_WANR("Failed to attach meter id %u to slot %u/%u.\n"
OK
> > +        return EBUSY;
> > +    }
> > +
> > +    dp_meter_instance_insert(ti, meter);
> > +
> > +    meter_tbl->count++;
> > +    if (meter_tbl->count >= ti->n_meters) {
> > +        dp_meter_instance_realloc(meter_tbl, ti->n_meters *2);
> space missing between "*2"
OK
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> >  static void
> >  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
> >                                 struct ofputil_meter_features *features)
> >  {
> > -    features->max_meters = MAX_METERS;
> > +    features->max_meters = METER_ENTRY_MAX;
> >      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
> >      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> > -    features->max_bands = MAX_BANDS;
> > +    features->max_bands = METER_BAND_MAX;
> >      features->max_color = 0;
> >  }
> >
> > @@ -5742,14 +5909,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> > dp_packet_batch *packets_,
> >      uint32_t exceeded_rate[NETDEV_MAX_BURST];
> >      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
> >      if (!meter) {
> > -        goto out;
> > +        return;
> >      }
> >
> >      /* Initialize as negative values. */
> > @@ -5757,6 +5923,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> > dp_packet_batch *packets_,
> >      /* Initialize as zeroes. */
> >      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
> >
> > +    ovs_mutex_lock(&meter->lock);
>
> do you want to use multiple lock, like the current design?
As explain above, there should not be a performance drop.
> >      /* All packets will hit the meter at the same time. */
> >      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
> >
> > @@ -5874,8 +6041,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> > dp_packet_batch *packets_,
> >              dp_packet_batch_refill(packets_, packet, j);
> >          }
> >      }
> > - out:
> > -    meter_unlock(dp, meter_id);
> > +
> > +    ovs_mutex_unlock(&meter->lock);
> >  }
> >
> >  /* Meter set/get/del processing is still single-threaded. */
> > @@ -5884,11 +6051,12 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> > ofproto_meter_id meter_id,
> >                        struct ofputil_meter_config *config)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
> >      uint32_t mid = meter_id.uint32;
> >      struct dp_meter *meter;
> > -    int i;
> > +    int err, i;
> >
> > -    if (mid >= MAX_METERS) {
> > +    if (mid >= METER_ENTRY_MAX) {
> >          return EFBIG; /* Meter_id out of range. */
> >      }
> >
> > @@ -5896,7 +6064,7 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> > ofproto_meter_id meter_id,
> >          return EBADF; /* Unsupported flags set */
> >      }
> >
> > -    if (config->n_bands > MAX_BANDS) {
> > +    if (config->n_bands > METER_BAND_MAX) {
> >          return EINVAL;
> >      }
> >
> > @@ -5917,6 +6085,8 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> > ofproto_meter_id meter_id,
> >      meter->n_bands = config->n_bands;
> >      meter->max_delta_t = 0;
> >      meter->used = time_usec();
> > +    meter->id = mid;
> > +    ovs_mutex_init(&meter->lock);
> >
> >      /* set up bands */
> >      for (i = 0; i < config->n_bands; ++i) {
> > @@ -5942,12 +6112,22 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> > ofproto_meter_id meter_id,
> >          }
> >      }
> >
> > -    meter_lock(dp, mid);
> > -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> > -    dp->meters[mid] = meter;
> > -    meter_unlock(dp, mid);
> > +    ovs_mutex_lock(&meter_tbl->lock);
> > +
> > +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */
> > +    err = dp_meter_attach(meter_tbl, meter);
> > +    if (err) {
> > +        goto unlock_out;
> > +    }
> > +
> > +    ovs_mutex_unlock(&meter_tbl->lock);
> >
> >      return 0;
> > +
> > +unlock_out:
> > +    ovs_mutex_unlock(&meter_tbl->lock);
> > +    dp_meter_free(meter);
> > +    return err;
> >  }
> >
> >  static int
> > @@ -5955,23 +6135,24 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >                        ofproto_meter_id meter_id_,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> > -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> We can still keep the 'const'
>
> > +    struct dp_netdev *dp = get_dp_netdev(dpif);
> >      uint32_t meter_id = meter_id_.uint32;
> > -    int retval = 0;
> > +    const struct dp_meter *meter;
> >
> > -    if (meter_id >= MAX_METERS) {
> > +    if (meter_id >= METER_ENTRY_MAX) {
> >          return EFBIG;
> >      }
> >
> > -    meter_lock(dp, meter_id);
> > -    const struct dp_meter *meter = dp->meters[meter_id];
> > +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
> >      if (!meter) {
> > -        retval = ENOENT;
> > -        goto done;
> > +        return ENOENT;
> >      }
> > +
> >      if (stats) {
> >          int i = 0;
> >
> > +        ovs_mutex_lock(&meter->lock);
> > +
> remove extra newline
OK
> >          stats->packet_in_count = meter->packet_count;
> >          stats->byte_in_count = meter->byte_count;
> >
> > @@ -5980,12 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
> >              stats->bands[i].byte_count = meter->bands[i].byte_count;
> >          }
> >
> > +        ovs_mutex_unlock(&meter->lock);
> > +
> >          stats->n_bands = i;
> >      }
> >
> > -done:
> > -    meter_unlock(dp, meter_id);
> > -    return retval;
> > +    return 0;
> >  }
> >
> >  static int
> > @@ -5994,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
> >                        struct ofputil_meter_stats *stats, uint16_t n_bands)
> >  {
> >      struct dp_netdev *dp = get_dp_netdev(dpif);
> > +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
> >      int error;
> >
> >      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
> >      if (!error) {
> >          uint32_t meter_id = meter_id_.uint32;
> >
> > -        meter_lock(dp, meter_id);
> > -        dp_delete_meter(dp, meter_id);
> > -        meter_unlock(dp, meter_id);
> > +        ovs_mutex_lock(&meter_tbl->lock);
> > +        dp_meter_detach_free(meter_tbl, meter_id);
> > +        ovs_mutex_unlock(&meter_tbl->lock);
> >      }
> >      return error;
> >  }
> > --
> > 1.8.3.1
> >
> Thanks,
> William



-- 
Best regards, Tonghao
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to