Hello Ilya, Could you review the patch?
Thanks Eiichi > On Apr 30, 2020, at 14:16, Eiichi Tsukata <[email protected]> wrote: > > Currently classifier tries and n_tries can be updated not atomically, > there is a race condition which can lead to NULL dereference. > The race can happen when main thread updates a classifier tries and > n_tries in classifier_set_prefix_fields() and at the same time revalidator > or handler thread try to lookup them in classifier_lookup__(). Such race > can be triggered when user changes prefixes of flow_table. > > Race(user changes flow_table prefixes: ip_dst,ip_src => none): > > [main thread] [revalidator/handler thread] > =========================================================== > /* cls->n_tries == 2 */ > for (int i = 0; i < cls->n_tries; i++) { > trie_init(cls, i, NULL); > /* n_tries == 0 */ > cls->n_tries = n_tries; > /* cls->tries[i]->feild is NULL */ > trie_ctx_init(&trie_ctx[i],&cls->tries[i]); > /* trie->field is NULL */ > ctx->be32ofs = trie->field->flow_be32ofs; > > To prevent the race, instead of re-introducing internal mutex > implemented in the commit fccd7c092e09 ("classifier: Remove internal > mutex."), this patch makes trie field RCU protected and checks it after > read. > > Fixes: fccd7c092e09 ("classifier: Remove internal mutex.") > Signed-off-by: Eiichi Tsukata <[email protected]> > --- > lib/classifier.c | 37 ++++++++++++++++++++++--------------- > lib/classifier.h | 6 ++++-- > tests/test-classifier.c | 5 +++-- > 3 files changed, 29 insertions(+), 19 deletions(-) > > diff --git a/lib/classifier.c b/lib/classifier.c > index 0fad953..10909a6 100644 > --- a/lib/classifier.c > +++ b/lib/classifier.c > @@ -393,7 +393,9 @@ classifier_set_prefix_fields(struct classifier *cls, > bitmap_set1(fields.bm, trie_fields[i]); > > new_fields[n_tries] = NULL; > - if (n_tries >= cls->n_tries || field != cls->tries[n_tries].field) { > + const struct mf_field *cls_field > + = ovsrcu_get(struct mf_field *, &cls->tries[n_tries].field); > + if (n_tries >= cls->n_tries || field != cls_field) { > new_fields[n_tries] = field; > changed = true; > } > @@ -454,7 +456,7 @@ trie_init(struct classifier *cls, int trie_idx, const > struct mf_field *field) > } else { > ovsrcu_set_hidden(&trie->root, NULL); > } > - trie->field = field; > + ovsrcu_set_hidden(&trie->field, field); > > /* Add existing rules to the new trie. */ > CMAP_FOR_EACH (subtable, cmap_node, &cls->subtables_map) { > @@ -839,7 +841,6 @@ classifier_remove_assert(struct classifier *cls, > struct trie_ctx { > const struct cls_trie *trie; > bool lookup_done; /* Status of the lookup. */ > - uint8_t be32ofs; /* U32 offset of the field in question. */ > unsigned int maskbits; /* Prefix length needed to avoid false matches. > */ > union trie_prefix match_plens; /* Bitmask of prefix lengths with possible > * matches. */ > @@ -849,7 +850,6 @@ static void > trie_ctx_init(struct trie_ctx *ctx, const struct cls_trie *trie) > { > ctx->trie = trie; > - ctx->be32ofs = trie->field->flow_be32ofs; > ctx->lookup_done = false; > } > > @@ -1531,8 +1531,10 @@ insert_subtable(struct classifier *cls, const struct > minimask *mask) > *CONST_CAST(uint8_t *, &subtable->n_indices) = index; > > for (i = 0; i < cls->n_tries; i++) { > - subtable->trie_plen[i] = minimask_get_prefix_len(mask, > - > cls->tries[i].field); > + const struct mf_field *field > + = ovsrcu_get(struct mf_field *, &cls->tries[i].field); > + subtable->trie_plen[i] > + = field ? minimask_get_prefix_len(mask, field) : 0; > } > > /* Ports trie. */ > @@ -1577,8 +1579,10 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], > unsigned int n_tries, > for (j = 0; j < n_tries; j++) { > /* Is the trie field relevant for this subtable, and > is the trie field within the current range of fields? */ > - if (field_plen[j] && > - flowmap_is_set(&range_map, trie_ctx[j].be32ofs / 2)) { > + const struct mf_field *ctx_field > + = ovsrcu_get(struct mf_field *, &trie_ctx[j].trie->field); > + if (field_plen[j] && ctx_field && > + flowmap_is_set(&range_map, ctx_field->flow_be32ofs / 2)) { > struct trie_ctx *ctx = &trie_ctx[j]; > > /* On-demand trie lookup. */ > @@ -1601,14 +1605,16 @@ check_tries(struct trie_ctx trie_ctx[CLS_MAX_TRIES], > unsigned int n_tries, > * than this subtable would otherwise. */ > if (ctx->maskbits <= field_plen[j]) { > /* Unwildcard the bits and skip the rest. */ > - mask_set_prefix_bits(wc, ctx->be32ofs, ctx->maskbits); > + mask_set_prefix_bits(wc, ctx_field->flow_be32ofs, > + ctx->maskbits); > /* Note: Prerequisite already unwildcarded, as the only > * prerequisite of the supported trie lookup fields is > * the ethertype, which is always unwildcarded. */ > return true; > } > /* Can skip if the field is already unwildcarded. */ > - if (mask_prefix_bits_set(wc, ctx->be32ofs, ctx->maskbits)) { > + if (mask_prefix_bits_set(wc, ctx_field->flow_be32ofs, > + ctx->maskbits)) { > return true; > } > } > @@ -2001,12 +2007,12 @@ static unsigned int > trie_lookup(const struct cls_trie *trie, const struct flow *flow, > union trie_prefix *plens) > { > - const struct mf_field *mf = trie->field; > + const struct mf_field *mf = ovsrcu_get(struct mf_field *, &trie->field); > > /* Check that current flow matches the prerequisites for the trie > * field. Some match fields are used for multiple purposes, so we > * must check that the trie is relevant for this flow. */ > - if (mf_are_prereqs_ok(mf, flow, NULL)) { > + if (mf && mf_are_prereqs_ok(mf, flow, NULL)) { > return trie_lookup_value(&trie->root, > &((ovs_be32 *)flow)[mf->flow_be32ofs], > &plens->be32, mf->n_bits); > @@ -2053,8 +2059,9 @@ minimask_get_prefix_len(const struct minimask *minimask, > * happened to be zeros. > */ > static const ovs_be32 * > -minimatch_get_prefix(const struct minimatch *match, const struct mf_field > *mf) > +minimatch_get_prefix(const struct minimatch *match, rcu_field_ptr *field) > { > + struct mf_field *mf = ovsrcu_get_protected(struct mf_field *, field); > size_t u64_ofs = mf->flow_be32ofs / 2; > > return (OVS_FORCE const ovs_be32 *)miniflow_get__(match->flow, u64_ofs) > @@ -2068,7 +2075,7 @@ static void > trie_insert(struct cls_trie *trie, const struct cls_rule *rule, int mlen) > { > trie_insert_prefix(&trie->root, > - minimatch_get_prefix(&rule->match, trie->field), > mlen); > + minimatch_get_prefix(&rule->match, &trie->field), > mlen); > } > > static void > @@ -2123,7 +2130,7 @@ static void > trie_remove(struct cls_trie *trie, const struct cls_rule *rule, int mlen) > { > trie_remove_prefix(&trie->root, > - minimatch_get_prefix(&rule->match, trie->field), > mlen); > + minimatch_get_prefix(&rule->match, &trie->field), > mlen); > } > > /* 'mlen' must be the (non-zero) CIDR prefix length of the 'trie->field' mask > diff --git a/lib/classifier.h b/lib/classifier.h > index d1bd4aa..f646a8f 100644 > --- a/lib/classifier.h > +++ b/lib/classifier.h > @@ -314,13 +314,15 @@ extern "C" { > struct cls_subtable; > struct cls_match; > > +struct mf_field; > +typedef OVSRCU_TYPE(struct mf_field *) rcu_field_ptr; > struct trie_node; > typedef OVSRCU_TYPE(struct trie_node *) rcu_trie_ptr; > > /* Prefix trie for a 'field' */ > struct cls_trie { > - const struct mf_field *field; /* Trie field, or NULL. */ > - rcu_trie_ptr root; /* NULL if none. */ > + rcu_field_ptr field; /* Trie field, or NULL. */ > + rcu_trie_ptr root; /* NULL if none. */ > }; > > enum { > diff --git a/tests/test-classifier.c b/tests/test-classifier.c > index 6d53d01..2d98fad 100644 > --- a/tests/test-classifier.c > +++ b/tests/test-classifier.c > @@ -512,8 +512,9 @@ verify_tries(struct classifier *cls) > int i; > > for (i = 0; i < cls->n_tries; i++) { > - n_rules += trie_verify(&cls->tries[i].root, 0, > - cls->tries[i].field->n_bits); > + const struct mf_field * cls_field > + = ovsrcu_get(struct mf_field *, &cls->tries[i].field); > + n_rules += trie_verify(&cls->tries[i].root, 0, cls_field->n_bits); > } > assert(n_rules <= cls->n_rules); > } > -- > 1.8.3.1 > _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
