When adding support for OpenFlow group and bucket stats, a group entry is added to the xlate_cache. If the main thread removes the group from an ofproto, we need to guarantee that the group remains accessible to users of xlate_group_actions and the xlate_cache, as the xlate_cache will not be cleaned up until the corresponding datapath flows are revalidated.
To make modify_group compatible with group reference counting, the group is re-created and then replaces the old group in ofproto's ofgroup hash map. Thus, the group is never altered while users of the xlate module hold a pointer to the group. This also eliminates the need for ofgroup's lock as all properties of ofgroup are read-only. Signed-off-by: Ryan Wilson <wr...@nicira.com> --- v2: Fixed bug with group stats all buckets, cleaned up ofgroup unref code, added Andy Zhou's test for more complete test coverage v3: Split group reference count, support for group and bucket stats into 2 patches. Commit Andy Zhou's test patch separately. v4: Addressed Joe's comments making it more clear why group refcount is needed for the xlate cache. v5: Added Joe's comments in git commit for more accuracy regarding xlate cache, edited modify_group in ofproto.c to be compatible with group ref counting --- ofproto/ofproto-dpif-xlate.c | 4 +- ofproto/ofproto-dpif.c | 22 ++--- ofproto/ofproto-dpif.h | 16 ++++ ofproto/ofproto-provider.h | 25 +++--- ofproto/ofproto.c | 195 +++++++++++++++++++++++------------------- 5 files changed, 143 insertions(+), 119 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index a87db54..2095f37 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -854,7 +854,7 @@ group_is_alive(const struct xlate_ctx *ctx, uint32_t group_id, int depth) hit = group_first_live_bucket(ctx, group, depth) != NULL; - group_dpif_release(group); + group_dpif_unref(group); return hit; } @@ -2202,7 +2202,7 @@ xlate_group_action__(struct xlate_ctx *ctx, struct group_dpif *group) default: OVS_NOT_REACHED(); } - group_dpif_release(group); + group_dpif_unref(group); ctx->in_group = false; } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7fdd71f..c79602c 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3596,18 +3596,9 @@ group_destruct(struct ofgroup *group_) } static enum ofperr -group_modify(struct ofgroup *group_, struct ofgroup *victim_) +group_modify(struct ofgroup *group_) { struct ofproto_dpif *ofproto = ofproto_dpif_cast(group_->ofproto); - struct group_dpif *group = group_dpif_cast(group_); - struct group_dpif *victim = group_dpif_cast(victim_); - - ovs_mutex_lock(&group->stats_mutex); - if (victim->up.n_buckets < group->up.n_buckets) { - group_destruct__(group); - } - group_construct_stats(group); - ovs_mutex_unlock(&group->stats_mutex); ofproto->backer->need_revalidate = REV_FLOW_TABLE; @@ -3629,6 +3620,10 @@ group_get_stats(const struct ofgroup *group_, struct ofputil_group_stats *ogs) return 0; } +/* If the group exists, this function increments the groups's reference count. + * + * Make sure to call group_dpif_unref() after no longer needing to maintain + * a reference to the group. */ bool group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, struct group_dpif **group) @@ -3645,13 +3640,6 @@ group_dpif_lookup(struct ofproto_dpif *ofproto, uint32_t group_id, } void -group_dpif_release(struct group_dpif *group) - OVS_RELEASES(group->up.rwlock) -{ - ofproto_group_release(&group->up); -} - -void group_dpif_get_buckets(const struct group_dpif *group, const struct list **buckets) { diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h index e49616e..345117a 100644 --- a/ofproto/ofproto-dpif.h +++ b/ofproto/ofproto-dpif.h @@ -232,6 +232,22 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255); /* struct rule_dpif has struct rule as it's first member. */ #define RULE_CAST(RULE) ((struct rule *)RULE) +#define GROUP_CAST(GROUP) ((struct ofgroup *)GROUP) + +static inline struct group_dpif* group_dpif_ref(struct group_dpif *group) +{ + if (group) { + ofproto_group_ref(GROUP_CAST(group)); + } + return group; +} + +static inline void group_dpif_unref(struct group_dpif *group) +{ + if (group) { + ofproto_group_unref(GROUP_CAST(group)); + } +} static inline void rule_dpif_ref(struct rule_dpif *rule) { diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h index 141adec..7a52b13 100644 --- a/ofproto/ofproto-provider.h +++ b/ofproto/ofproto-provider.h @@ -474,20 +474,22 @@ bool ofoperation_has_out_port(const struct ofoperation *, ofp_port_t out_port) * With few exceptions, ofproto implementations may look at these fields but * should not modify them. */ struct ofgroup { - /* The rwlock is used to prevent groups from being deleted while child - * threads are using them to xlate flows. A read lock means the - * group is currently being used. A write lock means the group is - * in the process of being deleted or updated. Note that since - * a read lock on the groups container is held while searching, and - * a group is ever write locked only while holding a write lock - * on the container, the user's of groups will never face a group - * in the write locked state. */ - struct ovs_rwlock rwlock OVS_ACQ_AFTER(ofproto_mutex); struct hmap_node hmap_node; /* In struct ofproto's "groups" hmap. */ struct ofproto *ofproto; /* The ofproto that contains this group. */ uint32_t group_id; enum ofp11_group_type type; /* One of OFPGT_*. */ + /* Number of references. + * + * This is needed to keep track of references to the group in the xlate + * module. + * + * If the main thread removes the group from an ofproto, we need to + * guarantee that the group remains accessible to users of + * xlate_group_actions and the xlate_cache, as the xlate_cache will not be + * cleaned up until the corresponding datapath flows are revalidated. */ + struct ovs_refcount ref_count; + long long int created; /* Creation time. */ long long int modified; /* Time of last modification. */ @@ -502,6 +504,9 @@ bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id, void ofproto_group_release(struct ofgroup *group) OVS_RELEASES(group->rwlock); +void ofproto_group_ref(struct ofgroup *); +void ofproto_group_unref(struct ofgroup *); + /* ofproto class structure, to be defined by each ofproto implementation. * * @@ -1684,7 +1689,7 @@ struct ofproto_class { void (*group_destruct)(struct ofgroup *); void (*group_dealloc)(struct ofgroup *); - enum ofperr (*group_modify)(struct ofgroup *, struct ofgroup *victim); + enum ofperr (*group_modify)(struct ofgroup *); enum ofperr (*group_get_stats)(const struct ofgroup *, struct ofputil_group_stats *); diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c index e728921..fb0244a 100644 --- a/ofproto/ofproto.c +++ b/ofproto/ofproto.c @@ -2647,6 +2647,30 @@ ofproto_rule_unref(struct rule *rule) } } +static void +group_destroy_cb(struct ofgroup *group) +{ + group->ofproto->ofproto_class->group_destruct(group); + ofputil_bucket_list_destroy(&group->buckets); + group->ofproto->ofproto_class->group_dealloc(group); +} + +void +ofproto_group_ref(struct ofgroup *group) +{ + if (group) { + ovs_refcount_ref(&group->ref_count); + } +} + +void +ofproto_group_unref(struct ofgroup *group) +{ + if (group && ovs_refcount_unref(&group->ref_count) == 1) { + ovsrcu_postpone(group_destroy_cb, group); + } +} + static uint32_t get_provider_meter_id(const struct ofproto *, uint32_t of_meter_id); @@ -5334,16 +5358,19 @@ handle_meter_request(struct ofconn *ofconn, const struct ofp_header *request, return 0; } +/* If the group exists, this function increments the groups's reference count. + * + * Make sure to call ofproto_group_unref() after no longer needing to maintain + * a reference to the group. */ bool ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id, struct ofgroup **group) - OVS_TRY_RDLOCK(true, (*group)->rwlock) { ovs_rwlock_rdlock(&ofproto->groups_rwlock); HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node, hash_int(group_id, 0), &ofproto->groups) { if ((*group)->group_id == group_id) { - ovs_rwlock_rdlock(&(*group)->rwlock); + ofproto_group_ref(*group); ovs_rwlock_unlock(&ofproto->groups_rwlock); return true; } @@ -5352,24 +5379,16 @@ ofproto_group_lookup(const struct ofproto *ofproto, uint32_t group_id, return false; } -void -ofproto_group_release(struct ofgroup *group) - OVS_RELEASES(group->rwlock) -{ - ovs_rwlock_unlock(&group->rwlock); -} - static bool ofproto_group_write_lookup(const struct ofproto *ofproto, uint32_t group_id, struct ofgroup **group) OVS_TRY_WRLOCK(true, ofproto->groups_rwlock) - OVS_TRY_WRLOCK(true, (*group)->rwlock) { ovs_rwlock_wrlock(&ofproto->groups_rwlock); HMAP_FOR_EACH_IN_BUCKET (*group, hmap_node, hash_int(group_id, 0), &ofproto->groups) { if ((*group)->group_id == group_id) { - ovs_rwlock_wrlock(&(*group)->rwlock); + ofproto_group_ref(*group); return true; } } @@ -5432,7 +5451,6 @@ group_get_ref_count(struct ofgroup *group) static void append_group_stats(struct ofgroup *group, struct list *replies) - OVS_REQ_RDLOCK(group->rwlock) { struct ofputil_group_stats ogs; struct ofproto *ofproto = group->ofproto; @@ -5476,15 +5494,15 @@ handle_group_request(struct ofconn *ofconn, if (group_id == OFPG_ALL) { ovs_rwlock_rdlock(&ofproto->groups_rwlock); HMAP_FOR_EACH (group, hmap_node, &ofproto->groups) { - ovs_rwlock_rdlock(&group->rwlock); + ofproto_group_ref(group); cb(group, &replies); - ovs_rwlock_unlock(&group->rwlock); + ofproto_group_unref(group); } ovs_rwlock_unlock(&ofproto->groups_rwlock); } else { if (ofproto_group_lookup(ofproto, group_id, &group)) { cb(group, &replies); - ofproto_group_release(group); + ofproto_group_unref(group); } } ofconn_send_replies(ofconn, &replies); @@ -5584,6 +5602,43 @@ handle_queue_get_config_request(struct ofconn *ofconn, return 0; } +static enum ofperr +init_group(struct ofproto *ofproto, struct ofputil_group_mod *gm, + struct ofgroup **ofgroup) +{ + enum ofperr error; + + if (gm->group_id > OFPG_MAX) { + return OFPERR_OFPGMFC_INVALID_GROUP; + } + if (gm->type > OFPGT11_FF) { + return OFPERR_OFPGMFC_BAD_TYPE; + } + + *ofgroup = ofproto->ofproto_class->group_alloc(); + if (!*ofgroup) { + VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); + return OFPERR_OFPGMFC_OUT_OF_GROUPS; + } + + (*ofgroup)->ofproto = ofproto; + (*ofgroup)->group_id = gm->group_id; + (*ofgroup)->type = gm->type; + (*ofgroup)->created = (*ofgroup)->modified = time_msec(); + ovs_refcount_init(&(*ofgroup)->ref_count); + + list_move(&(*ofgroup)->buckets, &gm->buckets); + (*ofgroup)->n_buckets = list_size(&(*ofgroup)->buckets); + + /* Construct called BEFORE any locks are held. */ + error = ofproto->ofproto_class->group_construct(*ofgroup); + if (error) { + ofputil_bucket_list_destroy(&(*ofgroup)->buckets); + ofproto->ofproto_class->group_dealloc(*ofgroup); + } + return error; +} + /* Implements OFPGC11_ADD * in which no matching flow already exists in the flow table. * @@ -5603,33 +5658,10 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) struct ofgroup *ofgroup; enum ofperr error; - if (gm->group_id > OFPG_MAX) { - return OFPERR_OFPGMFC_INVALID_GROUP; - } - if (gm->type > OFPGT11_FF) { - return OFPERR_OFPGMFC_BAD_TYPE; - } - /* Allocate new group and initialize it. */ - ofgroup = ofproto->ofproto_class->group_alloc(); - if (!ofgroup) { - VLOG_WARN_RL(&rl, "%s: failed to create group", ofproto->name); - return OFPERR_OFPGMFC_OUT_OF_GROUPS; - } - - ovs_rwlock_init(&ofgroup->rwlock); - ofgroup->ofproto = ofproto; - ofgroup->group_id = gm->group_id; - ofgroup->type = gm->type; - ofgroup->created = ofgroup->modified = time_msec(); - - list_move(&ofgroup->buckets, &gm->buckets); - ofgroup->n_buckets = list_size(&ofgroup->buckets); - - /* Construct called BEFORE any locks are held. */ - error = ofproto->ofproto_class->group_construct(ofgroup); + error = init_group(ofproto, gm, &ofgroup); if (error) { - goto free_out; + return error; } /* We wrlock as late as possible to minimize the time we jam any other @@ -5659,76 +5691,67 @@ add_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) unlock_out: ovs_rwlock_unlock(&ofproto->groups_rwlock); ofproto->ofproto_class->group_destruct(ofgroup); - free_out: ofputil_bucket_list_destroy(&ofgroup->buckets); ofproto->ofproto_class->group_dealloc(ofgroup); return error; } +static void +replace_group(struct ofproto *ofproto, struct ofgroup *ofgroup, + struct ofgroup *new_ofgroup) +{ + hmap_remove(&ofproto->groups, &ofgroup->hmap_node); + hmap_insert(&ofproto->groups, &new_ofgroup->hmap_node, + hash_int(new_ofgroup->group_id, 0)); + if (ofgroup->type != new_ofgroup->type) { + ofproto->n_groups[ofgroup->type]--; + ofproto->n_groups[new_ofgroup->type]++; + } + ofproto_group_unref(ofgroup); +} + /* Implements OFPFC_MODIFY. Returns 0 on success or an OpenFlow error code on * failure. * + * Note that the group is re-created and then replaces the old group in + * ofproto's ofgroup hash map. Thus, the group is never altered while users of + * the xlate module hold a pointer to the group. + * * 'ofconn' is used to retrieve the packet buffer specified in fm->buffer_id, * if any. */ static enum ofperr modify_group(struct ofproto *ofproto, struct ofputil_group_mod *gm) { - struct ofgroup *ofgroup; - struct ofgroup *victim; + struct ofgroup *ofgroup, *new_ofgroup; enum ofperr error; - if (gm->group_id > OFPG_MAX) { - return OFPERR_OFPGMFC_INVALID_GROUP; - } - - if (gm->type > OFPGT11_FF) { - return OFPERR_OFPGMFC_BAD_TYPE; - } - - victim = ofproto->ofproto_class->group_alloc(); - if (!victim) { - VLOG_WARN_RL(&rl, "%s: failed to allocate group", ofproto->name); - return OFPERR_OFPGMFC_OUT_OF_GROUPS; + error = init_group(ofproto, gm, &new_ofgroup); + if (error) { + return error; } if (!ofproto_group_write_lookup(ofproto, gm->group_id, &ofgroup)) { error = OFPERR_OFPGMFC_UNKNOWN_GROUP; - goto free_out; + ofproto_group_unref(new_ofgroup); + return error; } - /* Both group's and its container's write locks held now. - * Also, n_groups[] is protected by ofproto->groups_rwlock. */ + + /* Ofproto's group write lock is held now. */ if (ofgroup->type != gm->type && ofproto->n_groups[gm->type] >= ofproto->ogf.max_groups[gm->type]) { error = OFPERR_OFPGMFC_OUT_OF_GROUPS; - goto unlock_out; - } - - *victim = *ofgroup; - list_move(&victim->buckets, &ofgroup->buckets); - - ofgroup->type = gm->type; - list_move(&ofgroup->buckets, &gm->buckets); - ofgroup->n_buckets = list_size(&ofgroup->buckets); - - error = ofproto->ofproto_class->group_modify(ofgroup, victim); - if (!error) { - ofputil_bucket_list_destroy(&victim->buckets); - ofproto->n_groups[victim->type]--; - ofproto->n_groups[ofgroup->type]++; - ofgroup->modified = time_msec(); + ofproto_group_unref(new_ofgroup); } else { - ofputil_bucket_list_destroy(&ofgroup->buckets); + /* The group creation time does not change during modification. */ + new_ofgroup->created = ofgroup->created; - *ofgroup = *victim; - list_move(&ofgroup->buckets, &victim->buckets); + replace_group(ofproto, ofgroup, new_ofgroup); + ofproto->ofproto_class->group_modify(new_ofgroup); } - unlock_out: - ovs_rwlock_unlock(&ofgroup->rwlock); ovs_rwlock_unlock(&ofproto->groups_rwlock); - free_out: - ofproto->ofproto_class->group_dealloc(victim); + ofproto_group_unref(ofgroup); return error; } @@ -5745,19 +5768,11 @@ delete_group__(struct ofproto *ofproto, struct ofgroup *ofgroup) fm.out_group = ofgroup->group_id; handle_flow_mod__(ofproto, NULL, &fm, NULL); - /* Must wait until existing readers are done, - * while holding the container's write lock at the same time. */ - ovs_rwlock_wrlock(&ofgroup->rwlock); hmap_remove(&ofproto->groups, &ofgroup->hmap_node); /* No-one can find this group any more. */ ofproto->n_groups[ofgroup->type]--; ovs_rwlock_unlock(&ofproto->groups_rwlock); - - ofproto->ofproto_class->group_destruct(ofgroup); - ofputil_bucket_list_destroy(&ofgroup->buckets); - ovs_rwlock_unlock(&ofgroup->rwlock); - ovs_rwlock_destroy(&ofgroup->rwlock); - ofproto->ofproto_class->group_dealloc(ofgroup); + ofproto_group_unref(ofgroup); } /* Implements OFPGC_DELETE. */ -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev