On 5/13/20 3:31 PM, [email protected] wrote:
> From: Tonghao Zhang <[email protected]>
> 
> For now, ovs-vswitchd use the array of the dp_meter struct
> to store meter's data, and at most, there are only 65536
> (defined by MAX_METERS) meters that can be used. But in some
> case, for example, in the edge gateway, we should use 200,000,
> at least, meters for IP address bandwidth limitation.
> Every one IP address will use two meters for its rx and tx
> path[1]. In other way, ovs-vswitchd should support meter-offload
> (rte_mtr_xxx api introduced by dpdk.), but there are more than
> 65536 meters in the hardware, such as Mellanox ConnectX-6.
> 
> This patch use array to manage the meter, but it can ben expanded.
> 
> [1].
> $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1
> $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0
> 
> Cc: Ilya Maximets <[email protected]>
> Cc: William Tu <[email protected]>
> Cc: Jarno Rajahalme <[email protected]>
> Cc: Ben Pfaff <[email protected]>
> Cc: Andy Zhou <[email protected]>
> Cc: Pravin Shelar <[email protected]>
> Signed-off-by: Tonghao Zhang <[email protected]>
> ---
> v2:
> * add comments for dp_meter_instance
> * change the log
> * remove extra newline
> * I don't move the dp_netdev_meter_init/destroy up. because
>   them depends other meters function and put all meter function
>   together may make the codes clean.
> ---

Hi.  Thanks for working on this!

This is not a full review, just a few things that I spotted on a quick glance.
I didn't review any thread safety/rcu aspects yet.

Best regards, Ilya Maximets.


>  lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++----------
>  1 file changed, 250 insertions(+), 69 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index ef14e83b5f06..b5deaab31eb0 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -98,9 +98,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
>  
>  /* Configuration parameters. */
>  enum { MAX_FLOWS = 65536 };     /* Maximum number of flows in flow table. */
> -enum { MAX_METERS = 65536 };    /* Maximum number of meters. */
> -enum { MAX_BANDS = 8 };         /* Maximum number of bands / meter. */
> -enum { N_METER_LOCKS = 64 };    /* Maximum number of meters. */
> +
> +/* Maximum number of meters in the table. */
> +#define METER_ENTRY_MAX (200000ULL)
> +/* Maximum number of bands / meter. */
> +#define METER_BAND_MAX  (8)
> +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10)

Why we need to change enums to defines and also rename them?

>  
>  COVERAGE_DEFINE(datapath_drop_meter);
>  COVERAGE_DEFINE(datapath_drop_upcall_error);
> @@ -283,12 +286,26 @@ struct dp_meter {
>      uint16_t flags;
>      uint16_t n_bands;
>      uint32_t max_delta_t;
> +    uint32_t id;
> +    struct ovs_mutex lock;
>      uint64_t used;
>      uint64_t packet_count;
>      uint64_t byte_count;
>      struct dp_meter_band bands[];
>  };
>  
> +struct dp_meter_instance {
> +    uint32_t n_meters;

This should be called 'n_allocated' or smething like this.
'n_meters' makes me think that it's the number of actually used meters.

> +    /* Followed by struct dp_meter[n]; where n is the n_meters. */
> +    OVSRCU_TYPE(struct dp_meter *) dp_meters[];
> +};
> +
> +struct dp_meter_table {
> +    OVSRCU_TYPE(struct dp_meter_instance *) ti;

What does 'ti' mean?  I looked throught the code it always stands for meter 
instance,
but how 'meter instance' relates to 'ti'?  That is confusing.

> +    uint32_t count;

Why count is part of 'dp_meter_table'?  I think it should be part of 
'dp_meter_instance'
and named something like 'n_used', or actually 'n_meters'.

> +    struct ovs_mutex lock;
> +};

Why we need this structure at all?  Can it be just 3 fields inside struct 
dp_netdev?
Why it is table?  It's not a table. 'instance' is a table.  Confusing.

> +
>  struct pmd_auto_lb {
>      bool auto_lb_requested;     /* Auto load balancing requested by user. */
>      bool is_enabled;            /* Current status of Auto load balancing. */
> @@ -329,8 +346,7 @@ struct dp_netdev {
>      atomic_uint32_t tx_flush_interval;
>  
>      /* Meters. */
> -    struct ovs_mutex meter_locks[N_METER_LOCKS];
> -    struct dp_meter *meters[MAX_METERS]; /* Meter bands. */
> +    struct dp_meter_table meter_tbl;
>  
>      /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/
>      OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min;
> @@ -378,19 +394,6 @@ struct dp_netdev {
>      struct pmd_auto_lb pmd_alb;
>  };
>  
> -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]);
> -}
> -
> -
>  static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev 
> *dp,
>                                                      odp_port_t)
>      OVS_REQUIRES(dp->port_mutex);
> @@ -1523,6 +1526,9 @@ choose_port(struct dp_netdev *dp, const char *name)
>      return ODPP_NONE;
>  }
>  
> +static void dp_netdev_meter_init(struct dp_meter_table *tbl);
> +static void dp_netdev_meter_destroy(struct dp_meter_table *tbl);

These functions should be named dp_netdev_meter_table_{init,destroy}.

> +
>  static int
>  create_dp_netdev(const char *name, const struct dpif_class *class,
>                   struct dp_netdev **dpp)
> @@ -1556,9 +1562,7 @@ create_dp_netdev(const char *name, const struct 
> dpif_class *class,
>      dp->reconfigure_seq = seq_create();
>      dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq);
>  
> -    for (int i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_init_adaptive(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_init(&dp->meter_tbl);
>  
>      /* Disable upcalls by default. */
>      dp_netdev_disable_upcall(dp);
> @@ -1647,16 +1651,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp)
>      fat_rwlock_destroy(&dp->upcall_rwlock);
>  }
>  
> -static void
> -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id)
> -    OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS])
> -{
> -    if (dp->meters[meter_id]) {
> -        free(dp->meters[meter_id]);
> -        dp->meters[meter_id] = NULL;
> -    }
> -}
> -
>  /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp'
>   * through the 'dp_netdevs' shash while freeing 'dp'. */
>  static void
> @@ -1694,16 +1688,7 @@ dp_netdev_free(struct dp_netdev *dp)
>      /* Upcalls must be disabled at this point */
>      dp_netdev_destroy_upcall_lock(dp);
>  
> -    int i;
> -
> -    for (i = 0; i < MAX_METERS; ++i) {
> -        meter_lock(dp, i);
> -        dp_delete_meter(dp, i);
> -        meter_unlock(dp, i);
> -    }
> -    for (i = 0; i < N_METER_LOCKS; ++i) {
> -        ovs_mutex_destroy(&dp->meter_locks[i]);
> -    }
> +    dp_netdev_meter_destroy(&dp->meter_tbl);
>  
>      free(dp->pmd_cmask);
>      free(CONST_CAST(char *, dp->name));
> @@ -5713,14 +5698,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp)
>  
>  
>  /* Meters */
> +static uint32_t
> +meter_hash(struct dp_meter_instance *ti, uint32_t id)
> +{
> +    uint32_t n_meters = ti->n_meters;
> +
> +    return id % n_meters;
> +}

Why we need a hash here in this implementation?
Below code will be broken if meter_hash will hash different ids to the
same hash value.  There should be no hash or there should be good collision
protection.

> +
> +static void
> +dp_meter_free(struct dp_meter *meter)
> +{
> +    ovs_mutex_destroy(&meter->lock);
> +    free(meter);
> +}
> +
> +static struct dp_meter_instance *
> +dp_meter_instance_alloc(const uint32_t size)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = xzalloc(sizeof(*ti) + sizeof(struct dp_meter *) * size);

Don't parenthesize argument of sizeof if it's a variable.

> +    ti->n_meters = size;
> +
> +    return ti;
> +}
> +
> +static void
> +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size)
> +{
> +    struct dp_meter_instance *new_ti;
> +    struct dp_meter_instance *ti;
> +    int n_meters;
> +    int i;
> +
> +    new_ti = dp_meter_instance_alloc(size);
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    n_meters = MIN(size, ti->n_meters);
> +
> +    for (i = 0; i < n_meters; i++) {
> +        if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +            new_ti->dp_meters[i] = ti->dp_meters[i];
> +        }
> +    }
> +
> +    ovsrcu_set(&tbl->ti, new_ti);
> +    ovsrcu_postpone(free, ti);
> +}
> +
> +static void
> +dp_meter_instance_insert(struct dp_meter_instance *ti,

'dp_meter_instance_insert' sounds like we're going to create a new
dp_meter_instance and insert it to dp_meter_table, but it's not the
case.

> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], meter);
> +}
> +
> +static void
> +dp_meter_instance_remove(struct dp_meter_instance *ti,
> +                         struct dp_meter *meter)
> +{
> +    uint32_t hash;
> +
> +    hash = meter_hash(ti, meter->id);
> +    ovsrcu_set(&ti->dp_meters[hash], NULL);
> +}
> +
> +static void
> +dp_netdev_meter_init(struct dp_meter_table *tbl)
> +{
> +    struct dp_meter_instance *ti;
> +
> +    ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
> +    ovsrcu_set(&tbl->ti, ti);
> +
> +    ovs_mutex_init(&tbl->lock);
> +    tbl->count = 0;
> +}
> +
> +static void
> +dp_netdev_meter_destroy(struct dp_meter_table *tbl)

'dp_netdev_meter_destroy' sounds like we're going to destroy a single meter.

> +{
> +    struct dp_meter_instance *ti;
> +    int i;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &tbl->ti);
> +    for (i = 0; i < ti->n_meters; i++) {
> +        struct dp_meter *meter;
> +
> +        meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[i]);
> +        if (meter) {
> +            ovsrcu_postpone(dp_meter_free, meter);
> +        }
> +    }
> +
> +    ovsrcu_postpone(free, ti);
> +    ovs_mutex_destroy(&tbl->lock);
> +}
> +
> +static struct dp_meter *
> +dp_meter_lookup(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);

After gitting rcu protected pointer, you have to check if it's a valid pointer.

> +    hash = meter_hash(ti, meter_id);
> +
> +    meter = ovsrcu_get(struct dp_meter *, &ti->dp_meters[hash]);
> +    if (meter && meter->id == meter_id) {
> +        return meter;
> +    }
> +
> +    return NULL;
> +}
> +
> +static void
> +dp_meter_detach_free(struct dp_meter_table *meter_tbl, uint32_t meter_id)
> +                     OVS_REQUIRES(meter_tbl->lock)

Please, keep thread safety annotations indented with 4 spaces from the left.

> +{
> +    struct dp_meter_instance *ti;
> +    struct dp_meter *meter;
> +
> +    meter = dp_meter_lookup(meter_tbl, meter_id);
> +    if (!meter) {
> +        return;
> +    }
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    dp_meter_instance_remove(ti, meter);
> +    ovsrcu_postpone(dp_meter_free, meter);
> +
> +    meter_tbl->count--;
> +    /* Shrink the meter array if necessary. */
> +    if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
> +        meter_tbl->count <= (ti->n_meters / 4)) {
> +        int half_size = ti->n_meters / 2;
> +        int i;
> +
> +        /* Avoid hash collision, don't move slots to other place.
> +         * Make sure there are no references of meters in array
> +         * which will be released.
> +         */
> +        for (i = half_size; i < ti->n_meters; i++) {
> +            if (ovsrcu_get(struct dp_meter *, &ti->dp_meters[i])) {
> +                return;
> +            }
> +        }
> +
> +        dp_meter_instance_realloc(meter_tbl, half_size);
> +    }
> +}
> +
> +static int
> +dp_meter_attach(struct dp_meter_table *meter_tbl, struct dp_meter *meter)
> +                OVS_REQUIRES(meter_tbl->lock)

ditto.

> +{
> +    struct dp_meter_instance *ti;
> +    uint32_t hash;
> +
> +    ti = ovsrcu_get(struct dp_meter_instance *, &meter_tbl->ti);
> +    hash = meter_hash(ti, meter->id);
> +
> +    if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *,
> +                                &ti->dp_meters[hash]))) {
> +        VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n",
> +                  meter->id, hash, ti->n_meters);
> +        return EBUSY;

How this could happen if you're always calling _detach_free before calling 
attach?

> +    }
> +
> +    dp_meter_instance_insert(ti, meter);
> +
> +    meter_tbl->count++;
> +    if (meter_tbl->count >= ti->n_meters) {
> +        dp_meter_instance_realloc(meter_tbl, ti->n_meters * 2);
> +    }
> +
> +    return 0;
> +}
> +
>  static void
>  dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED,
>                                 struct ofputil_meter_features *features)
>  {
> -    features->max_meters = MAX_METERS;
> +    features->max_meters = METER_ENTRY_MAX;
>      features->band_types = DP_SUPPORTED_METER_BAND_TYPES;
>      features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK;
> -    features->max_bands = MAX_BANDS;
> +    features->max_bands = METER_BAND_MAX;
>      features->max_color = 0;
>  }
>  
> @@ -5742,14 +5910,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>      uint32_t exceeded_rate[NETDEV_MAX_BURST];
>      int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        goto out;
> +        return;
>      }
>  
>      /* Initialize as negative values. */
> @@ -5757,6 +5924,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>      /* Initialize as zeroes. */
>      memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate);
>  
> +    ovs_mutex_lock(&meter->lock);
>      /* All packets will hit the meter at the same time. */
>      long_delta_t = now / 1000 - meter->used / 1000; /* msec */
>  
> @@ -5874,8 +6042,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
> dp_packet_batch *packets_,
>              dp_packet_batch_refill(packets_, packet, j);
>          }
>      }
> - out:
> -    meter_unlock(dp, meter_id);
> +
> +    ovs_mutex_unlock(&meter->lock);
>  }
>  
>  /* Meter set/get/del processing is still single-threaded. */
> @@ -5884,11 +6052,12 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>                        struct ofputil_meter_config *config)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      uint32_t mid = meter_id.uint32;
>      struct dp_meter *meter;
> -    int i;
> +    int err, i;
>  
> -    if (mid >= MAX_METERS) {
> +    if (mid >= METER_ENTRY_MAX) {
>          return EFBIG; /* Meter_id out of range. */
>      }
>  
> @@ -5896,7 +6065,7 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>          return EBADF; /* Unsupported flags set */
>      }
>  
> -    if (config->n_bands > MAX_BANDS) {
> +    if (config->n_bands > METER_BAND_MAX) {
>          return EINVAL;
>      }
>  
> @@ -5917,6 +6086,8 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>      meter->n_bands = config->n_bands;
>      meter->max_delta_t = 0;
>      meter->used = time_usec();
> +    meter->id = mid;
> +    ovs_mutex_init(&meter->lock);
>  
>      /* set up bands */
>      for (i = 0; i < config->n_bands; ++i) {
> @@ -5942,12 +6113,22 @@ dpif_netdev_meter_set(struct dpif *dpif, 
> ofproto_meter_id meter_id,
>          }
>      }
>  
> -    meter_lock(dp, mid);
> -    dp_delete_meter(dp, mid); /* Free existing meter, if any */
> -    dp->meters[mid] = meter;
> -    meter_unlock(dp, mid);
> +    ovs_mutex_lock(&meter_tbl->lock);
> +
> +    dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */

This doesn't look correct.  Why should we destroy some other meter to create
this new one?

> +    err = dp_meter_attach(meter_tbl, meter);
> +    if (err) {
> +        goto unlock_out;
> +    }
> +
> +    ovs_mutex_unlock(&meter_tbl->lock);
>  
>      return 0;
> +
> +unlock_out:
> +    ovs_mutex_unlock(&meter_tbl->lock);
> +    dp_meter_free(meter);
> +    return err;
>  }
>  
>  static int
> @@ -5955,23 +6136,23 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>                        ofproto_meter_id meter_id_,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
> -    const struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_netdev *dp = get_dp_netdev(dpif);
>      uint32_t meter_id = meter_id_.uint32;
> -    int retval = 0;
> +    const struct dp_meter *meter;
>  
> -    if (meter_id >= MAX_METERS) {
> +    if (meter_id >= METER_ENTRY_MAX) {
>          return EFBIG;
>      }
>  
> -    meter_lock(dp, meter_id);
> -    const struct dp_meter *meter = dp->meters[meter_id];
> +    meter = dp_meter_lookup(&dp->meter_tbl, meter_id);
>      if (!meter) {
> -        retval = ENOENT;
> -        goto done;
> +        return ENOENT;
>      }
> +
>      if (stats) {
>          int i = 0;
>  
> +        ovs_mutex_lock(&meter->lock);
>          stats->packet_in_count = meter->packet_count;
>          stats->byte_in_count = meter->byte_count;
>  
> @@ -5979,13 +6160,12 @@ dpif_netdev_meter_get(const struct dpif *dpif,
>              stats->bands[i].packet_count = meter->bands[i].packet_count;
>              stats->bands[i].byte_count = meter->bands[i].byte_count;
>          }
> +        ovs_mutex_unlock(&meter->lock);
>  
>          stats->n_bands = i;
>      }
>  
> -done:
> -    meter_unlock(dp, meter_id);
> -    return retval;
> +    return 0;
>  }
>  
>  static int
> @@ -5994,15 +6174,16 @@ dpif_netdev_meter_del(struct dpif *dpif,
>                        struct ofputil_meter_stats *stats, uint16_t n_bands)
>  {
>      struct dp_netdev *dp = get_dp_netdev(dpif);
> +    struct dp_meter_table *meter_tbl = &dp->meter_tbl;
>      int error;
>  
>      error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands);
>      if (!error) {
>          uint32_t meter_id = meter_id_.uint32;
>  
> -        meter_lock(dp, meter_id);
> -        dp_delete_meter(dp, meter_id);
> -        meter_unlock(dp, meter_id);
> +        ovs_mutex_lock(&meter_tbl->lock);
> +        dp_meter_detach_free(meter_tbl, meter_id);
> +        ovs_mutex_unlock(&meter_tbl->lock);
>      }
>      return error;
>  }
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to