Now this patch version is v3. and stay a long time. Any maintainer will continue to review this patch ? Thanks!
On Sat, May 23, 2020 at 6:33 PM <[email protected]> wrote: > > From: Tonghao Zhang <[email protected]> > > For now, ovs-vswitchd use the array of the dp_meter struct > to store meter's data, and at most, there are only 65536 > (defined by MAX_METERS) meters that can be used. But in some > case, for example, in the edge gateway, we should use 200,000, > at least, meters for IP address bandwidth limitation. > Every one IP address will use two meters for its rx and tx > path[1]. In other way, ovs-vswitchd should support meter-offload > (rte_mtr_xxx api introduced by dpdk.), but there are more than > 65536 meters in the hardware, such as Mellanox ConnectX-6. > > This patch use array to manage the meter, but it can ben expanded. > > [1]. > $ in_port=p0,ip,ip_dst=1.1.1.x action=meter:n,output:p1 > $ in_port=p1,ip,ip_src=1.1.1.x action=meter:m,output:p0 > > Cc: Ilya Maximets <[email protected]> > Cc: William Tu <[email protected]> > Cc: Jarno Rajahalme <[email protected]> > Cc: Ben Pfaff <[email protected]> > Cc: Andy Zhou <[email protected]> > Cc: Pravin Shelar <[email protected]> > Signed-off-by: Tonghao Zhang <[email protected]> > --- > v3: > * rename n_meters -> n_allocated in dp_meter_instance > * rename count -> n_used in dp_meter_table > * rename "ti" -> "meter_inst" or "inst" in different functions/struction > * remove parenthesize for sizeof > * rename dp_netdev_meter_destroy/init to dp_netdev_meter_table_destroy/init > * fix OVS_REQUIRES style > v2: > * add comments for dp_meter_instance > * change the log > * remove extra newline > * I don't move the dp_netdev_meter_init/destroy up. because > them depends other meters function and put all meter function > together may make the codes clean. > --- > lib/dpif-netdev.c | 319 ++++++++++++++++++++++++++++++++++++---------- > 1 file changed, 250 insertions(+), 69 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 51c888501bdf..920fef3ec572 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -99,9 +99,12 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0) > > /* Configuration parameters. */ > enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ > -enum { MAX_METERS = 65536 }; /* Maximum number of meters. */ > -enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ > -enum { N_METER_LOCKS = 64 }; /* Maximum number of meters. */ > + > +/* Maximum number of meters in the table. */ > +#define METER_ENTRY_MAX (200000ULL) > +/* Maximum number of bands / meter. */ > +#define METER_BAND_MAX (8) > +#define DP_METER_ARRAY_SIZE_MIN (1ULL << 10) > > COVERAGE_DEFINE(datapath_drop_meter); > COVERAGE_DEFINE(datapath_drop_upcall_error); > @@ -284,12 +287,26 @@ struct dp_meter { > uint16_t flags; > uint16_t n_bands; > uint32_t max_delta_t; > + uint32_t id; > + struct ovs_mutex lock; > uint64_t used; > uint64_t packet_count; > uint64_t byte_count; > struct dp_meter_band bands[]; > }; > > +struct dp_meter_instance { > + uint32_t n_allocated; > + /* Followed by struct dp_meter[n]; where n is the n_allocated. */ > + OVSRCU_TYPE(struct dp_meter *) dp_meters[]; > +}; > + > +struct dp_meter_table { > + OVSRCU_TYPE(struct dp_meter_instance *) meter_inst; > + uint32_t n_used; > + struct ovs_mutex lock; > +}; > + > struct pmd_auto_lb { > bool auto_lb_requested; /* Auto load balancing requested by user. */ > bool is_enabled; /* Current status of Auto load balancing. */ > @@ -330,8 +347,7 @@ struct dp_netdev { > atomic_uint32_t tx_flush_interval; > > /* Meters. */ > - struct ovs_mutex meter_locks[N_METER_LOCKS]; > - struct dp_meter *meters[MAX_METERS]; /* Meter bands. */ > + struct dp_meter_table meter_tbl; > > /* Probability of EMC insertions is a factor of 'emc_insert_min'.*/ > OVS_ALIGNED_VAR(CACHE_LINE_SIZE) atomic_uint32_t emc_insert_min; > @@ -379,19 +395,6 @@ struct dp_netdev { > struct pmd_auto_lb pmd_alb; > }; > > -static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id) > - OVS_ACQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - ovs_mutex_lock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > -} > - > -static void meter_unlock(const struct dp_netdev *dp, uint32_t meter_id) > - OVS_RELEASES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - ovs_mutex_unlock(&dp->meter_locks[meter_id % N_METER_LOCKS]); > -} > - > - > static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev > *dp, > odp_port_t) > OVS_REQUIRES(dp->port_mutex); > @@ -1524,6 +1527,9 @@ choose_port(struct dp_netdev *dp, const char *name) > return ODPP_NONE; > } > > +static void dp_netdev_meter_table_init(struct dp_meter_table *tbl); > +static void dp_netdev_meter_table_destroy(struct dp_meter_table *tbl); > + > static int > create_dp_netdev(const char *name, const struct dpif_class *class, > struct dp_netdev **dpp) > @@ -1557,9 +1563,7 @@ create_dp_netdev(const char *name, const struct > dpif_class *class, > dp->reconfigure_seq = seq_create(); > dp->last_reconfigure_seq = seq_read(dp->reconfigure_seq); > > - for (int i = 0; i < N_METER_LOCKS; ++i) { > - ovs_mutex_init_adaptive(&dp->meter_locks[i]); > - } > + dp_netdev_meter_table_init(&dp->meter_tbl); > > /* Disable upcalls by default. */ > dp_netdev_disable_upcall(dp); > @@ -1648,16 +1652,6 @@ dp_netdev_destroy_upcall_lock(struct dp_netdev *dp) > fat_rwlock_destroy(&dp->upcall_rwlock); > } > > -static void > -dp_delete_meter(struct dp_netdev *dp, uint32_t meter_id) > - OVS_REQUIRES(dp->meter_locks[meter_id % N_METER_LOCKS]) > -{ > - if (dp->meters[meter_id]) { > - free(dp->meters[meter_id]); > - dp->meters[meter_id] = NULL; > - } > -} > - > /* Requires dp_netdev_mutex so that we can't get a new reference to 'dp' > * through the 'dp_netdevs' shash while freeing 'dp'. */ > static void > @@ -1695,16 +1689,7 @@ dp_netdev_free(struct dp_netdev *dp) > /* Upcalls must be disabled at this point */ > dp_netdev_destroy_upcall_lock(dp); > > - int i; > - > - for (i = 0; i < MAX_METERS; ++i) { > - meter_lock(dp, i); > - dp_delete_meter(dp, i); > - meter_unlock(dp, i); > - } > - for (i = 0; i < N_METER_LOCKS; ++i) { > - ovs_mutex_destroy(&dp->meter_locks[i]); > - } > + dp_netdev_meter_table_destroy(&dp->meter_tbl); > > free(dp->pmd_cmask); > free(CONST_CAST(char *, dp->name)); > @@ -5714,14 +5699,197 @@ dp_netdev_disable_upcall(struct dp_netdev *dp) > > > /* Meters */ > +static uint32_t > +meter_hash(struct dp_meter_instance *inst, uint32_t id) > +{ > + uint32_t n_allocated = inst->n_allocated; > + > + return id % n_allocated; > +} > + > +static void > +dp_meter_free(struct dp_meter *meter) > +{ > + ovs_mutex_destroy(&meter->lock); > + free(meter); > +} > + > +static struct dp_meter_instance * > +dp_meter_instance_alloc(const uint32_t size) > +{ > + struct dp_meter_instance *inst; > + > + inst = xzalloc(sizeof *inst + sizeof(struct dp_meter *) * size); > + inst->n_allocated = size; > + > + return inst; > +} > + > +static void > +dp_meter_instance_realloc(struct dp_meter_table *tbl, const uint32_t size) > +{ > + struct dp_meter_instance *new_inst; > + struct dp_meter_instance *inst; > + int n_meters; > + int i; > + > + new_inst = dp_meter_instance_alloc(size); > + > + inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst); > + n_meters = MIN(size, inst->n_allocated); > + > + for (i = 0; i < n_meters; i++) { > + if (ovsrcu_get(struct dp_meter *, &inst->dp_meters[i])) { > + new_inst->dp_meters[i] = inst->dp_meters[i]; > + } > + } > + > + ovsrcu_set(&tbl->meter_inst, new_inst); > + ovsrcu_postpone(free, inst); > +} > + > +static void > +dp_meter_instance_insert(struct dp_meter_instance *inst, > + struct dp_meter *meter) > +{ > + uint32_t hash; > + > + hash = meter_hash(inst, meter->id); > + ovsrcu_set(&inst->dp_meters[hash], meter); > +} > + > +static void > +dp_meter_instance_remove(struct dp_meter_instance *inst, > + struct dp_meter *meter) > +{ > + uint32_t hash; > + > + hash = meter_hash(inst, meter->id); > + ovsrcu_set(&inst->dp_meters[hash], NULL); > +} > + > +static void > +dp_netdev_meter_table_init(struct dp_meter_table *tbl) > +{ > + struct dp_meter_instance *inst; > + > + inst = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN); > + ovsrcu_set(&tbl->meter_inst, inst); > + > + ovs_mutex_init(&tbl->lock); > + tbl->n_used = 0; > +} > + > +static void > +dp_netdev_meter_table_destroy(struct dp_meter_table *tbl) > +{ > + struct dp_meter_instance *inst; > + int i; > + > + inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst); > + for (i = 0; i < inst->n_allocated; i++) { > + struct dp_meter *meter; > + > + meter = ovsrcu_get(struct dp_meter *, &inst->dp_meters[i]); > + if (meter) { > + ovsrcu_postpone(dp_meter_free, meter); > + } > + } > + > + ovsrcu_postpone(free, inst); > + ovs_mutex_destroy(&tbl->lock); > +} > + > +static struct dp_meter * > +dp_meter_lookup(struct dp_meter_table *tbl, uint32_t meter_id) > +{ > + struct dp_meter_instance *meter_inst; > + struct dp_meter *meter; > + uint32_t hash; > + > + meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst); > + hash = meter_hash(meter_inst, meter_id); > + > + meter = ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[hash]); > + if (meter && meter->id == meter_id) { > + return meter; > + } > + > + return NULL; > +} > + > +static void > +dp_meter_detach_free(struct dp_meter_table *tbl, uint32_t meter_id) > + OVS_REQUIRES(tbl->lock) > +{ > + struct dp_meter_instance *meter_inst; > + struct dp_meter *meter; > + > + meter = dp_meter_lookup(tbl, meter_id); > + if (!meter) { > + return; > + } > + > + meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst); > + dp_meter_instance_remove(meter_inst, meter); > + ovsrcu_postpone(dp_meter_free, meter); > + > + tbl->n_used--; > + /* Shrink the meter array if necessary. */ > + if (meter_inst->n_allocated > DP_METER_ARRAY_SIZE_MIN && > + tbl->n_used <= (meter_inst->n_allocated / 4)) { > + int half_size = meter_inst->n_allocated / 2; > + int i; > + > + /* Avoid hash collision, don't move slots to other place. > + * Make sure there are no references of meters in array > + * which will be released. > + */ > + for (i = half_size; i < meter_inst->n_allocated; i++) { > + if (ovsrcu_get(struct dp_meter *, &meter_inst->dp_meters[i])) { > + return; > + } > + } > + > + dp_meter_instance_realloc(tbl, half_size); > + } > +} > + > +static int > +dp_meter_attach(struct dp_meter_table *tbl, struct dp_meter *meter) > + OVS_REQUIRES(tbl->lock) > +{ > + struct dp_meter_instance *meter_inst; > + uint32_t hash; > + > + meter_inst = ovsrcu_get(struct dp_meter_instance *, &tbl->meter_inst); > + hash = meter_hash(meter_inst, meter->id); > + > + if (OVS_UNLIKELY(ovsrcu_get(struct dp_meter *, > + &meter_inst->dp_meters[hash]))) { > + VLOG_WARN("Failed to attach meter id %u to slot %u/%u.\n", > + meter->id, hash, meter_inst->n_allocated); > + return EBUSY; > + } > + > + dp_meter_instance_insert(meter_inst, meter); > + > + tbl->n_used++; > + if (tbl->n_used >= meter_inst->n_allocated) { > + dp_meter_instance_realloc(tbl, meter_inst->n_allocated * 2); > + } > + > + return 0; > +} > + > static void > dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, > struct ofputil_meter_features *features) > { > - features->max_meters = MAX_METERS; > + features->max_meters = METER_ENTRY_MAX; > features->band_types = DP_SUPPORTED_METER_BAND_TYPES; > features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK; > - features->max_bands = MAX_BANDS; > + features->max_bands = METER_BAND_MAX; > features->max_color = 0; > } > > @@ -5743,14 +5911,13 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct > dp_packet_batch *packets_, > uint32_t exceeded_rate[NETDEV_MAX_BURST]; > int exceeded_pkt = cnt; /* First packet that exceeded a band rate. */ > > - if (meter_id >= MAX_METERS) { > + if (meter_id >= METER_ENTRY_MAX) { > return; > } > > - meter_lock(dp, meter_id); > - meter = dp->meters[meter_id]; > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > if (!meter) { > - goto out; > + return; > } > > /* Initialize as negative values. */ > @@ -5758,6 +5925,7 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct > dp_packet_batch *packets_, > /* Initialize as zeroes. */ > memset(exceeded_rate, 0, cnt * sizeof *exceeded_rate); > > + ovs_mutex_lock(&meter->lock); > /* All packets will hit the meter at the same time. */ > long_delta_t = now / 1000 - meter->used / 1000; /* msec */ > > @@ -5875,8 +6043,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct > dp_packet_batch *packets_, > dp_packet_batch_refill(packets_, packet, j); > } > } > - out: > - meter_unlock(dp, meter_id); > + > + ovs_mutex_unlock(&meter->lock); > } > > /* Meter set/get/del processing is still single-threaded. */ > @@ -5885,11 +6053,12 @@ dpif_netdev_meter_set(struct dpif *dpif, > ofproto_meter_id meter_id, > struct ofputil_meter_config *config) > { > struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > uint32_t mid = meter_id.uint32; > struct dp_meter *meter; > - int i; > + int err, i; > > - if (mid >= MAX_METERS) { > + if (mid >= METER_ENTRY_MAX) { > return EFBIG; /* Meter_id out of range. */ > } > > @@ -5897,7 +6066,7 @@ dpif_netdev_meter_set(struct dpif *dpif, > ofproto_meter_id meter_id, > return EBADF; /* Unsupported flags set */ > } > > - if (config->n_bands > MAX_BANDS) { > + if (config->n_bands > METER_BAND_MAX) { > return EINVAL; > } > > @@ -5918,6 +6087,8 @@ dpif_netdev_meter_set(struct dpif *dpif, > ofproto_meter_id meter_id, > meter->n_bands = config->n_bands; > meter->max_delta_t = 0; > meter->used = time_usec(); > + meter->id = mid; > + ovs_mutex_init(&meter->lock); > > /* set up bands */ > for (i = 0; i < config->n_bands; ++i) { > @@ -5943,12 +6114,22 @@ dpif_netdev_meter_set(struct dpif *dpif, > ofproto_meter_id meter_id, > } > } > > - meter_lock(dp, mid); > - dp_delete_meter(dp, mid); /* Free existing meter, if any */ > - dp->meters[mid] = meter; > - meter_unlock(dp, mid); > + ovs_mutex_lock(&meter_tbl->lock); > + > + dp_meter_detach_free(meter_tbl, mid); /* Free existing meter, if any */ > + err = dp_meter_attach(meter_tbl, meter); > + if (err) { > + goto unlock_out; > + } > + > + ovs_mutex_unlock(&meter_tbl->lock); > > return 0; > + > +unlock_out: > + ovs_mutex_unlock(&meter_tbl->lock); > + dp_meter_free(meter); > + return err; > } > > static int > @@ -5956,23 +6137,23 @@ dpif_netdev_meter_get(const struct dpif *dpif, > ofproto_meter_id meter_id_, > struct ofputil_meter_stats *stats, uint16_t n_bands) > { > - const struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_netdev *dp = get_dp_netdev(dpif); > uint32_t meter_id = meter_id_.uint32; > - int retval = 0; > + const struct dp_meter *meter; > > - if (meter_id >= MAX_METERS) { > + if (meter_id >= METER_ENTRY_MAX) { > return EFBIG; > } > > - meter_lock(dp, meter_id); > - const struct dp_meter *meter = dp->meters[meter_id]; > + meter = dp_meter_lookup(&dp->meter_tbl, meter_id); > if (!meter) { > - retval = ENOENT; > - goto done; > + return ENOENT; > } > + > if (stats) { > int i = 0; > > + ovs_mutex_lock(&meter->lock); > stats->packet_in_count = meter->packet_count; > stats->byte_in_count = meter->byte_count; > > @@ -5980,13 +6161,12 @@ dpif_netdev_meter_get(const struct dpif *dpif, > stats->bands[i].packet_count = meter->bands[i].packet_count; > stats->bands[i].byte_count = meter->bands[i].byte_count; > } > + ovs_mutex_unlock(&meter->lock); > > stats->n_bands = i; > } > > -done: > - meter_unlock(dp, meter_id); > - return retval; > + return 0; > } > > static int > @@ -5995,15 +6175,16 @@ dpif_netdev_meter_del(struct dpif *dpif, > struct ofputil_meter_stats *stats, uint16_t n_bands) > { > struct dp_netdev *dp = get_dp_netdev(dpif); > + struct dp_meter_table *meter_tbl = &dp->meter_tbl; > int error; > > error = dpif_netdev_meter_get(dpif, meter_id_, stats, n_bands); > if (!error) { > uint32_t meter_id = meter_id_.uint32; > > - meter_lock(dp, meter_id); > - dp_delete_meter(dp, meter_id); > - meter_unlock(dp, meter_id); > + ovs_mutex_lock(&meter_tbl->lock); > + dp_meter_detach_free(meter_tbl, meter_id); > + ovs_mutex_unlock(&meter_tbl->lock); > } > return error; > } > -- > 2.26.1 > -- Best regards, Tonghao _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
