This patch uses a read-write lock to prevent rules from being evicted
while they're used by child threads.

Signed-off-by: Ethan Jackson <et...@nicira.com>
---
 ofproto/ofproto-dpif-xlate.c |   79 ++++++++++++------------------
 ofproto/ofproto-dpif.c       |  111 ++++++++++++++++++++++++++----------------
 ofproto/ofproto-dpif.h       |   14 ++++--
 ofproto/ofproto-provider.h   |   13 ++++-
 ofproto/ofproto.c            |   89 ++++++++++++++++++++++-----------
 5 files changed, 182 insertions(+), 124 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index ef45b54..a258f24 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -1585,34 +1585,6 @@ compose_output_action(struct xlate_ctx *ctx, ofp_port_t 
ofp_port)
     compose_output_action__(ctx, ofp_port, true);
 }
 
-/* Common rule processing in one place to avoid duplicating code. */
-static struct rule_dpif *
-ctx_rule_hooks(struct xlate_ctx *ctx, struct rule_dpif *rule,
-               bool may_packet_in)
-{
-    if (ctx->xin->resubmit_hook) {
-        ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse);
-    }
-    if (rule == NULL && may_packet_in) {
-        struct xport *xport;
-
-        /* XXX
-         * check if table configuration flags
-         * OFPTC_TABLE_MISS_CONTROLLER, default.
-         * OFPTC_TABLE_MISS_CONTINUE,
-         * OFPTC_TABLE_MISS_DROP
-         * When OF1.0, OFPTC_TABLE_MISS_CONTINUE is used. What to do? */
-        xport = get_ofp_port(ctx->xbridge, ctx->xin->flow.in_port.ofp_port);
-        rule = choose_miss_rule(xport ? xport->config : 0,
-                                ctx->xbridge->miss_rule,
-                                ctx->xbridge->no_packet_in_rule);
-    }
-    if (rule && ctx->xin->resubmit_stats) {
-        rule_credit_stats(rule, ctx->xin->resubmit_stats);
-    }
-    return rule;
-}
-
 static void
 xlate_table_action(struct xlate_ctx *ctx,
                    ofp_port_t in_port, uint8_t table_id, bool may_packet_in)
@@ -1626,15 +1598,39 @@ xlate_table_action(struct xlate_ctx *ctx,
 
         /* Look up a flow with 'in_port' as the input port. */
         ctx->xin->flow.in_port.ofp_port = in_port;
-        rule = rule_dpif_lookup_in_table(ctx->xbridge->ofproto,
-                                         &ctx->xin->flow, &ctx->xout->wc,
-                                         table_id);
+        rule_dpif_lookup_in_table(ctx->xbridge->ofproto, &ctx->xin->flow,
+                                  &ctx->xout->wc, table_id, &rule);
 
         /* Restore the original input port.  Otherwise OFPP_NORMAL and
          * OFPP_IN_PORT will have surprising behavior. */
         ctx->xin->flow.in_port.ofp_port = old_in_port;
 
-        rule = ctx_rule_hooks(ctx, rule, may_packet_in);
+        if (ctx->xin->resubmit_hook) {
+            ctx->xin->resubmit_hook(ctx->xin, rule, ctx->recurse);
+        }
+
+        if (rule == NULL && may_packet_in) {
+            struct xport *xport;
+
+            /* Makes clang's thread safety analysis happy. */
+            rule_release(rule);
+
+            /* XXX
+             * check if table configuration flags
+             * OFPTC_TABLE_MISS_CONTROLLER, default.
+             * OFPTC_TABLE_MISS_CONTINUE,
+             * OFPTC_TABLE_MISS_DROP
+             * When OF1.0, OFPTC_TABLE_MISS_CONTINUE is used. What to do? */
+            xport = get_ofp_port(ctx->xbridge, 
ctx->xin->flow.in_port.ofp_port);
+            rule = choose_miss_rule(xport ? xport->config : 0,
+                                    ctx->xbridge->miss_rule,
+                                    ctx->xbridge->no_packet_in_rule);
+            ovs_rwlock_rdlock(&rule->up.evict);
+        }
+
+        if (rule && ctx->xin->resubmit_stats) {
+            rule_credit_stats(rule, ctx->xin->resubmit_stats);
+        }
 
         if (rule) {
             struct rule_dpif *old_rule = ctx->rule;
@@ -1645,6 +1641,7 @@ xlate_table_action(struct xlate_ctx *ctx,
             ctx->rule = old_rule;
             ctx->recurse--;
         }
+        rule_release(rule);
 
         ctx->table_id = old_table_id;
     } else {
@@ -2123,15 +2120,8 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t 
ofpacts_len,
 {
     struct flow_wildcards *wc = &ctx->xout->wc;
     struct flow *flow = &ctx->xin->flow;
-    bool was_evictable = true;
     const struct ofpact *a;
 
-    if (ctx->rule) {
-        /* Don't let the rule we're working on get evicted underneath us. */
-        was_evictable = ctx->rule->up.evictable;
-        ctx->rule->up.evictable = false;
-    }
-
     OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) {
         struct ofpact_controller *controller;
         const struct ofpact_metadata *metadata;
@@ -2277,20 +2267,20 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t 
ofpacts_len,
         case OFPACT_SET_MPLS_TTL:
             if (compose_set_mpls_ttl_action(ctx,
                                             ofpact_get_SET_MPLS_TTL(a)->ttl)) {
-                goto out;
+                return;
             }
             break;
 
         case OFPACT_DEC_MPLS_TTL:
             if (compose_dec_mpls_ttl_action(ctx)) {
-                goto out;
+                return;
             }
             break;
 
         case OFPACT_DEC_TTL:
             wc->masks.nw_ttl = 0xff;
             if (compose_dec_ttl(ctx, ofpact_get_DEC_TTL(a))) {
-                goto out;
+                return;
             }
             break;
 
@@ -2357,11 +2347,6 @@ do_xlate_actions(const struct ofpact *ofpacts, size_t 
ofpacts_len,
             break;
         }
     }
-
-out:
-    if (ctx->rule) {
-        ctx->rule->up.evictable = was_evictable;
-    }
 }
 
 void
diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 10240fd..f4f46e5 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -82,10 +82,6 @@ BUILD_ASSERT_DECL(N_TABLES >= 2 && N_TABLES <= 255);
 struct flow_miss;
 struct facet;
 
-static struct rule_dpif *rule_dpif_lookup(struct ofproto_dpif *,
-                                          const struct flow *,
-                                          struct flow_wildcards *wc);
-
 static void rule_get_stats(struct rule *, uint64_t *packets, uint64_t *bytes);
 
 struct ofbundle {
@@ -1381,9 +1377,12 @@ add_internal_flow(struct ofproto_dpif *ofproto, int id,
         return error;
     }
 
-    *rulep = rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL,
-                                       TBL_INTERNAL);
-    ovs_assert(*rulep != NULL);
+    if (rule_dpif_lookup_in_table(ofproto, &fm.match.flow, NULL, TBL_INTERNAL,
+                                  rulep)) {
+        ovs_rwlock_unlock(&(*rulep)->up.evict);
+    } else {
+        NOT_REACHED();
+    }
 
     return 0;
 }
@@ -3488,9 +3487,10 @@ handle_flow_miss_with_facet(struct flow_miss *miss, 
struct facet *facet,
             struct rule_dpif *rule;
             struct xlate_in xin;
 
-            rule = rule_dpif_lookup(facet->ofproto, &facet->flow, NULL);
+            rule_dpif_lookup(facet->ofproto, &facet->flow, NULL, &rule);
             xlate_in_init(&xin, facet->ofproto, &miss->flow, rule, 0, packet);
             xlate_actions_for_side_effects(&xin);
+            rule_release(rule);
         }
 
         if (facet->xout.odp_actions.size) {
@@ -3587,7 +3587,7 @@ handle_flow_miss(struct flow_miss *miss, struct 
flow_miss_op *ops,
         struct xlate_in xin;
 
         flow_wildcards_init_catchall(&wc);
-        rule = rule_dpif_lookup(ofproto, &miss->flow, &wc);
+        rule_dpif_lookup(ofproto, &miss->flow, &wc, &rule);
         rule_credit_stats(rule, stats);
 
         xlate_in_init(&xin, ofproto, &miss->flow, rule, stats->tcp_flags,
@@ -3605,10 +3605,12 @@ handle_flow_miss(struct flow_miss *miss, struct 
flow_miss_op *ops,
         if (miss->key_fitness == ODP_FIT_TOO_LITTLE
             || !flow_miss_should_make_facet(miss, &xout.wc)) {
             handle_flow_miss_without_facet(rule, &xout, miss, ops, n_ops);
+            rule_release(rule);
             return;
         }
 
         facet = facet_create(miss, rule, &xout, stats);
+        rule_release(rule);
         stats = NULL;
     }
     handle_flow_miss_with_facet(miss, facet, now, stats, ops, n_ops);
@@ -4324,10 +4326,12 @@ rule_expire(struct rule_dpif *rule)
         return;
     }
 
-    COVERAGE_INC(ofproto_dpif_expired);
+    if (!ovs_rwlock_trywrlock(&rule->up.evict)) {
+        COVERAGE_INC(ofproto_dpif_expired);
 
-    /* Get rid of the rule. */
-    ofproto_rule_expire(&rule->up, reason);
+        /* Get rid of the rule. */
+        ofproto_rule_expire(&rule->up, reason);
+    }
 }
 
 /* Facets. */
@@ -4524,16 +4528,19 @@ facet_is_controller_flow(struct facet *facet)
 {
     if (facet) {
         struct ofproto_dpif *ofproto = facet->ofproto;
-        const struct rule_dpif *rule = rule_dpif_lookup(ofproto, &facet->flow,
-                                                        NULL);
-        const struct ofpact *ofpacts = rule->up.ofpacts;
-        size_t ofpacts_len = rule->up.ofpacts_len;
-
-        if (ofpacts_len > 0 &&
-            ofpacts->type == OFPACT_CONTROLLER &&
-            ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len)) {
-            return true;
-        }
+        const struct ofpact *ofpacts;
+        struct rule_dpif *rule;
+        size_t ofpacts_len;
+        bool is_controller;
+
+        rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
+        ofpacts_len = rule->up.ofpacts_len;
+        ofpacts = rule->up.ofpacts;
+        is_controller = ofpacts_len > 0
+            && ofpacts->type == OFPACT_CONTROLLER
+            && ofpact_next(ofpacts) >= ofpact_end(ofpacts, ofpacts_len);
+        rule_release(rule);
+        return is_controller;
     }
     return false;
 }
@@ -4623,9 +4630,10 @@ facet_check_consistency(struct facet *facet)
     bool ok, fail_open;
 
     /* Check the datapath actions for consistency. */
-    rule = rule_dpif_lookup(facet->ofproto, &facet->flow, NULL);
+    rule_dpif_lookup(facet->ofproto, &facet->flow, NULL, &rule);
     xlate_in_init(&xin, facet->ofproto, &facet->flow, rule, 0, NULL);
     xlate_actions(&xin, &xout);
+    rule_release(rule);
 
     fail_open = rule->up.cr.priority == FAIL_OPEN_PRIORITY;
     ok = ofpbuf_equal(&facet->xout.odp_actions, &xout.odp_actions)
@@ -4706,7 +4714,7 @@ facet_revalidate(struct facet *facet)
     }
 
     flow_wildcards_init_catchall(&wc);
-    new_rule = rule_dpif_lookup(ofproto, &facet->flow, &wc);
+    rule_dpif_lookup(ofproto, &facet->flow, &wc, &new_rule);
 
     /* Calculate new datapath actions.
      *
@@ -4729,6 +4737,7 @@ facet_revalidate(struct facet *facet)
         || memcmp(&facet->xout.wc, &xout.wc, sizeof xout.wc)) {
         facet_remove(facet);
         xlate_out_uninit(&xout);
+        rule_release(new_rule);
         return false;
     }
 
@@ -4761,6 +4770,7 @@ facet_revalidate(struct facet *facet)
     facet->fail_open = new_rule->up.cr.priority == FAIL_OPEN_PRIORITY;
 
     xlate_out_uninit(&xout);
+    rule_release(new_rule);
     return true;
 }
 
@@ -4803,7 +4813,7 @@ facet_push_stats(struct facet *facet, bool may_learn)
             netdev_vport_inc_rx(in_port->up.netdev, &stats);
         }
 
-        rule = rule_dpif_lookup(ofproto, &facet->flow, NULL);
+        rule_dpif_lookup(ofproto, &facet->flow, NULL, &rule);
         rule_credit_stats(rule, &stats);
         netflow_flow_update_time(ofproto->netflow, &facet->nf_flow,
                                  facet->used);
@@ -4816,6 +4826,7 @@ facet_push_stats(struct facet *facet, bool may_learn)
         xin.resubmit_stats = &stats;
         xin.may_learn = may_learn;
         xlate_actions_for_side_effects(&xin);
+        rule_release(rule);
     }
 }
 
@@ -5114,16 +5125,14 @@ subfacet_update_stats(struct subfacet *subfacet,
 
 /* Lookup 'flow' in 'ofproto''s classifier.  If 'wc' is non-null, sets
  * the fields that were relevant as part of the lookup. */
-static struct rule_dpif *
+void
 rule_dpif_lookup(struct ofproto_dpif *ofproto, const struct flow *flow,
-                 struct flow_wildcards *wc)
+                 struct flow_wildcards *wc, struct rule_dpif **rule)
 {
     struct ofport_dpif *port;
-    struct rule_dpif *rule;
 
-    rule = rule_dpif_lookup_in_table(ofproto, flow, wc, 0);
-    if (rule) {
-        return rule;
+    if (rule_dpif_lookup_in_table(ofproto, flow, wc, 0, rule)) {
+        return;
     }
     port = get_ofp_port(ofproto, flow->in_port.ofp_port);
     if (!port) {
@@ -5131,21 +5140,23 @@ rule_dpif_lookup(struct ofproto_dpif *ofproto, const 
struct flow *flow,
                      flow->in_port.ofp_port);
     }
 
-    return choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
-                            ofproto->no_packet_in_rule);
+    *rule = choose_miss_rule(port ? port->up.pp.config : 0, ofproto->miss_rule,
+                             ofproto->no_packet_in_rule);
+    ovs_rwlock_rdlock(&(*rule)->up.evict);
 }
 
-struct rule_dpif *
+bool
 rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
                           const struct flow *flow, struct flow_wildcards *wc,
-                          uint8_t table_id)
+                          uint8_t table_id, struct rule_dpif **rule)
 {
     struct cls_rule *cls_rule;
     struct classifier *cls;
     bool frag;
 
+    *rule = NULL;
     if (table_id >= N_TABLES) {
-        return NULL;
+        return false;
     }
 
     if (wc) {
@@ -5154,26 +5165,32 @@ rule_dpif_lookup_in_table(struct ofproto_dpif *ofproto,
     }
 
     cls = &ofproto->up.tables[table_id].cls;
+    ovs_rwlock_rdlock(&cls->rwlock);
     frag = (flow->nw_frag & FLOW_NW_FRAG_ANY) != 0;
     if (frag && ofproto->up.frag_handling == OFPC_FRAG_NORMAL) {
         /* We must pretend that transport ports are unavailable. */
         struct flow ofpc_normal_flow = *flow;
         ofpc_normal_flow.tp_src = htons(0);
         ofpc_normal_flow.tp_dst = htons(0);
-        ovs_rwlock_rdlock(&cls->rwlock);
         cls_rule = classifier_lookup(cls, &ofpc_normal_flow, wc);
-        ovs_rwlock_unlock(&cls->rwlock);
     } else if (frag && ofproto->up.frag_handling == OFPC_FRAG_DROP) {
         cls_rule = &ofproto->drop_frags_rule->up.cr;
         if (wc) {
             flow_wildcards_init_exact(wc);
         }
     } else {
-        ovs_rwlock_rdlock(&cls->rwlock);
         cls_rule = classifier_lookup(cls, flow, wc);
-        ovs_rwlock_unlock(&cls->rwlock);
     }
-    return rule_dpif_cast(rule_from_cls_rule(cls_rule));
+
+    *rule = rule_dpif_cast(rule_from_cls_rule(cls_rule));
+    if (*rule && ovs_rwlock_tryrdlock(&(*rule)->up.evict)) {
+        /* The rule is in the process of being removed.  Best we can do is
+         * pretend it isn't there. */
+        *rule = NULL;
+    }
+    ovs_rwlock_unlock(&cls->rwlock);
+
+    return *rule != NULL;
 }
 
 /* Given a port configuration (specificied as zero if there's no port), chooses
@@ -5186,6 +5203,14 @@ choose_miss_rule(enum ofputil_port_config config, struct 
rule_dpif *miss_rule,
     return config & OFPUTIL_PC_NO_PACKET_IN ? no_packet_in_rule : miss_rule;
 }
 
+void
+rule_release(struct rule_dpif *rule)
+{
+    if (rule) {
+        ovs_rwlock_unlock(&rule->up.evict);
+    }
+}
+
 static void
 complete_operation(struct rule_dpif *rule)
 {
@@ -5803,7 +5828,7 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct 
flow *flow,
     flow_format(ds, flow);
     ds_put_char(ds, '\n');
 
-    rule = rule_dpif_lookup(ofproto, flow, NULL);
+    rule_dpif_lookup(ofproto, flow, NULL, &rule);
 
     trace_format_rule(ds, 0, rule);
     if (rule == ofproto->miss_rule) {
@@ -5873,6 +5898,8 @@ ofproto_trace(struct ofproto_dpif *ofproto, const struct 
flow *flow,
 
         xlate_out_uninit(&trace.xout);
     }
+
+    rule_release(rule);
 }
 
 static void
diff --git a/ofproto/ofproto-dpif.h b/ofproto/ofproto-dpif.h
index a74146b..30ba566 100644
--- a/ofproto/ofproto-dpif.h
+++ b/ofproto/ofproto-dpif.h
@@ -55,10 +55,16 @@ static inline struct rule_dpif *rule_dpif_cast(const struct 
rule *rule)
     return rule ? CONTAINER_OF(rule, struct rule_dpif, up) : NULL;
 }
 
-struct rule_dpif *rule_dpif_lookup_in_table(struct ofproto_dpif *,
-                                            const struct flow *,
-                                            struct flow_wildcards *,
-                                            uint8_t table_id);
+void rule_dpif_lookup(struct ofproto_dpif *, const struct flow *,
+                      struct flow_wildcards *, struct rule_dpif **rule)
+    OVS_ACQ_RDLOCK((*rule)->up.evict);
+
+bool rule_dpif_lookup_in_table(struct ofproto_dpif *, const struct flow *,
+                               struct flow_wildcards *, uint8_t table_id,
+                               struct rule_dpif **rule)
+    OVS_ACQ_RDLOCK((*rule)->up.evict);
+
+void rule_release(struct rule_dpif *rule) OVS_RELEASES(rule->up.evict);
 
 void rule_credit_stats(struct rule_dpif *, const struct dpif_flow_stats *);
 
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index 3ac9aaa..2d33221 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -229,10 +229,18 @@ struct rule {
     uint16_t idle_timeout OVS_GUARDED; /* In seconds from ->used. */
 
     /* Eviction groups. */
-    bool evictable;              /* If false, prevents eviction. */
     struct heap_node evg_node;   /* In eviction_group's "rules" heap. */
     struct eviction_group *eviction_group; /* NULL if not in any group. */
 
+    /* The evict lock is used to prevent rules from being evicted while child
+     * threads are using them to xlate flows.  A read lock means the rule is
+     * currently being used.  A write lock means the rule is in the process of
+     * being evicted and should be considered gone.  A rule will not be evicted
+     * unless both its own and its classifiers write locks are held.
+     * Therefore, while holding a classifier readlock, one can be assured that
+     * even write locked rules are safe. */
+    struct ovs_rwlock evict;
+
     struct ofpact *ofpacts;      /* Sequence of "struct ofpacts". */
     unsigned int ofpacts_len;    /* Size of 'ofpacts', in bytes. */
 
@@ -264,7 +272,8 @@ rule_from_cls_rule(const struct cls_rule *cls_rule)
 }
 
 void ofproto_rule_update_used(struct rule *, long long int used);
-void ofproto_rule_expire(struct rule *, uint8_t reason);
+void ofproto_rule_expire(struct rule *rule, uint8_t reason)
+    OVS_RELEASES(rule->evict);
 void ofproto_rule_destroy(struct rule *);
 
 bool ofproto_rule_has_out_port(const struct rule *, ofp_port_t out_port);
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index 82089b7..a24f978 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -152,7 +152,7 @@ static void oftable_enable_eviction(struct oftable *,
                                     const struct mf_subfield *fields,
                                     size_t n_fields);
 
-static void oftable_remove_rule(struct rule *);
+static void oftable_remove_rule(struct rule *rule) OVS_RELEASES(rule->evict);
 static struct rule *oftable_replace_rule(struct rule *);
 static void oftable_substitute_rule(struct rule *old, struct rule *new);
 
@@ -178,7 +178,8 @@ struct eviction_group {
     struct heap rules;          /* Contains "struct rule"s. */
 };
 
-static struct rule *choose_rule_to_evict(struct oftable *);
+static bool choose_rule_to_evict(struct oftable *table, struct rule **rulep)
+    OVS_TRY_WRLOCK(true, (*rulep)->evict);
 static void ofproto_evict(struct ofproto *);
 static uint32_t rule_eviction_priority(struct rule *);
 
@@ -199,8 +200,9 @@ static bool rule_is_modifiable(const struct rule *);
 static enum ofperr add_flow(struct ofproto *, struct ofconn *,
                             struct ofputil_flow_mod *,
                             const struct ofp_header *);
-static void delete_flow__(struct rule *, struct ofopgroup *,
-                          enum ofp_flow_removed_reason);
+static void delete_flow__(struct rule *rule, struct ofopgroup *,
+                          enum ofp_flow_removed_reason)
+    OVS_RELEASES(rule->evict);
 static bool handle_openflow(struct ofconn *, const struct ofpbuf *);
 static enum ofperr handle_flow_mod__(struct ofproto *, struct ofconn *,
                                      struct ofputil_flow_mod *,
@@ -1071,6 +1073,13 @@ ofproto_flush__(struct ofproto *ofproto)
         cls_cursor_init(&cursor, &table->cls, NULL);
         CLS_CURSOR_FOR_EACH_SAFE (rule, next_rule, cr, &cursor) {
             if (!rule->pending) {
+                continue;
+            }
+
+            /* Clang isn't smart enough to handle the trywrylock thread safety
+             * analysis if it shares the if statement with the above
+             * rule->pending check. */
+            if (!ovs_rwlock_trywrlock(&rule->evict)) {
                 ofoperation_create(group, rule, OFOPERATION_DELETE,
                                    OFPRR_DELETE);
                 oftable_remove_rule(rule);
@@ -1669,6 +1678,10 @@ ofproto_delete_flow(struct ofproto *ofproto,
         /* An operation on the rule is already pending -> failure.
          * Caller must retry later if it's important. */
         return false;
+    } else if (ovs_rwlock_trywrlock(&rule->evict)) {
+        /* Someone's currently using this rule.  Caller must retry later if
+         * it's important. */
+        return false;
     } else {
         /* Initiate deletion -> success. */
         struct ofopgroup *group = ofopgroup_create_unattached(ofproto);
@@ -2192,7 +2205,11 @@ void
 ofproto_rule_destroy(struct rule *rule)
 {
     ovs_assert(!rule->pending);
-    oftable_remove_rule(rule);
+    if (!ovs_rwlock_trywrlock(&rule->evict)) {
+        oftable_remove_rule(rule);
+    } else {
+        NOT_REACHED();
+    }
     ofproto_rule_destroy__(rule);
 }
 
@@ -3412,12 +3429,12 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
     rule->ofpacts_len = fm->ofpacts_len;
     rule->meter_id = find_meter(rule->ofpacts, rule->ofpacts_len);
     list_init(&rule->meter_list_node);
-    rule->evictable = true;
     rule->eviction_group = NULL;
     list_init(&rule->expirable);
     rule->monitor_flags = 0;
     rule->add_seqno = 0;
     rule->modify_seqno = 0;
+    ovs_rwlock_init(&rule->evict);
 
     /* Insert new rule. */
     victim = oftable_replace_rule(rule);
@@ -3434,19 +3451,18 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
         n_rules = classifier_count(&table->cls);
         ovs_rwlock_unlock(&table->cls.rwlock);
         if (n_rules > table->max_flows) {
-            bool was_evictable;
-
-            was_evictable = rule->evictable;
-            rule->evictable = false;
-            evict = choose_rule_to_evict(table);
-            rule->evictable = was_evictable;
-
-            if (!evict) {
+            ovs_rwlock_rdlock(&rule->evict);
+            if (choose_rule_to_evict(table, &evict)) {
+                ovs_rwlock_unlock(&rule->evict);
+                ovs_rwlock_unlock(&evict->evict);
+                if (evict->pending) {
+                    error = OFPROTO_POSTPONE;
+                    goto exit;
+                }
+            } else {
+                ovs_rwlock_unlock(&rule->evict);
                 error = OFPERR_OFPFMFC_TABLE_FULL;
                 goto exit;
-            } else if (evict->pending) {
-                error = OFPROTO_POSTPONE;
-                goto exit;
             }
         } else {
             evict = NULL;
@@ -3461,6 +3477,13 @@ add_flow(struct ofproto *ofproto, struct ofconn *ofconn,
             op->group->n_running--;
             ofoperation_destroy(rule->pending);
         } else if (evict) {
+            /* It would be better if we maintained the lock we took in
+             * choose_rule_to_evict() earlier, but that confuses the thread
+             * safety analysis, and this code is fragile enough that we really
+             * need it.  In the worst case, we'll have to block a little while
+             * before we perform the eviction, which doesn't seem like a big
+             * problem. */
+            ovs_rwlock_wrlock(&evict->evict)
             delete_flow__(evict, group, OFPRR_EVICTION);
         }
         ofopgroup_submit(group);
@@ -3631,6 +3654,7 @@ delete_flows__(struct ofproto *ofproto, struct ofconn 
*ofconn,
 
     group = ofopgroup_create(ofproto, ofconn, request, UINT32_MAX);
     LIST_FOR_EACH_SAFE (rule, next, ofproto_node, rules) {
+        ovs_rwlock_wrlock(&rule->evict);
         delete_flow__(rule, group, reason);
     }
     ofopgroup_submit(group);
@@ -5052,17 +5076,18 @@ pick_fallback_dpid(void)
 
 /* Table overflow policy. */
 
-/* Chooses and returns a rule to evict from 'table'.  Returns NULL if the table
- * is not configured to evict rules or if the table contains no evictable
- * rules.  (Rules with 'evictable' set to false or with no timeouts are not
- * evictable.) */
-static struct rule *
-choose_rule_to_evict(struct oftable *table)
+/* Chooses and updates 'rulep' with a rule to evict from 'table'.  Sets 'rulep'
+ * to NULL if the table is not configured to evict rules or if the table
+ * contains no evictable rules.  (Rules with a readlock on their evict rwlock,
+ * or with no timeouts are not evictable.) */
+static bool
+choose_rule_to_evict(struct oftable *table, struct rule **rulep)
 {
     struct eviction_group *evg;
 
+    *rulep = NULL;
     if (!table->eviction_fields) {
-        return NULL;
+        return false;
     }
 
     /* In the common case, the outer and inner loops here will each be entered
@@ -5081,13 +5106,14 @@ choose_rule_to_evict(struct oftable *table)
         struct rule *rule;
 
         HEAP_FOR_EACH (rule, evg_node, &evg->rules) {
-            if (rule->evictable) {
-                return rule;
+            if (!ovs_rwlock_trywrlock(&rule->evict)) {
+                *rulep = rule;
+                return true;
             }
         }
     }
 
-    return NULL;
+    return false;
 }
 
 /* Searches 'ofproto' for tables that have more flows than their configured
@@ -5116,8 +5142,12 @@ ofproto_evict(struct ofproto *ofproto)
                 break;
             }
 
-            rule = choose_rule_to_evict(table);
-            if (!rule || rule->pending) {
+            if (!choose_rule_to_evict(table, &rule)) {
+                break;
+            }
+
+            if (rule->pending) {
+                ovs_rwlock_unlock(&rule->evict);
                 break;
             }
 
@@ -5496,6 +5526,7 @@ oftable_substitute_rule(struct rule *old, struct rule 
*new)
     if (new) {
         oftable_replace_rule(new);
     } else {
+        ovs_rwlock_wrlock(&old->evict);
         oftable_remove_rule(old);
     }
 }
-- 
1.7.9.5

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to