On Thu, Aug 24, 2023 at 6:01 PM Ilya Maximets <[email protected]> wrote:
>
> On 8/22/23 22:45, Mike Pattrick wrote:
> > When the a revalidator thread is updating statistics for an XC_LEARN
> > xcache entry in xlate_push_stats_entry it uses ofproto_flow_mod_learn.
> > The revalidator will update stats for rules even if they are in a
> > removed state or marked as invisible. However, ofproto_flow_mod_learn
> > will detect if a flow has been removed and re-add it in that case. This
> > can result in an old learn action replacing the new learn action that
> > had replaced it in the first place.
> >
> > This change adds a new stats_used parameter to ofproto_flow_mod_learn
> > allowing the caller to provide a timestamp that will be fed into the
> > learned rule's modified time. The provided timestamp should be the time
> > of the last packet activity. If stats_used is not set then the current
> > time is used, as is the current behaviour.
> >
> > This change also adds a check when replacing a learned rule to favour the
> > newest rule.
> >
> > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213892
> > Signed-off-by: Mike Pattrick <[email protected]>
> > ---
> >  v2: Added additional checks if rule is removed
> >  v3: v2 patch was corrupted in transit
> >  v4: Added check against dpif flow stats
> >  v5: Fixed typos, updated commit message
> >      Changed timestamps to use datapath timestamp more consistently
> > ---
>
> I'll quote my previous review comment:
>
> It should be possible to add a test for this issue using time
> warping.  Please, add one.

I tried to make a test with netdev-dummy/receive and time/warp,
however, it didn't reproduce the behaviour that's observed with a full
test. Do you have any suggestions?


Thank you,
Mike

>
>
>
> >  ofproto/ofproto-dpif-xlate-cache.c |  2 +-
> >  ofproto/ofproto-dpif-xlate.c       |  8 +++-
> >  ofproto/ofproto-dpif.c             |  2 +-
> >  ofproto/ofproto-provider.h         |  6 ++-
> >  ofproto/ofproto.c                  | 65 +++++++++++++++++++++++++-----
> >  5 files changed, 69 insertions(+), 14 deletions(-)
> >
> > diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
> > b/ofproto/ofproto-dpif-xlate-cache.c
> > index 9224ee2e6..2e1fcb3a6 100644
> > --- a/ofproto/ofproto-dpif-xlate-cache.c
> > +++ b/ofproto/ofproto-dpif-xlate-cache.c
> > @@ -125,7 +125,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
> >      case XC_LEARN: {
> >          enum ofperr error;
> >          error = ofproto_flow_mod_learn(entry->learn.ofm, true,
> > -                                       entry->learn.limit, NULL);
> > +                                       entry->learn.limit, NULL, 
> > stats->used);
> >          if (error) {
> >              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
> >              VLOG_WARN_RL(&rl, "xcache LEARN action execution failed.");
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index 47ea0f47e..bcacb2f0a 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -5700,8 +5700,14 @@ xlate_learn_action(struct xlate_ctx *ctx, const 
> > struct ofpact_learn *learn)
> >          if (!error) {
> >              bool success = true;
> >              if (ctx->xin->allow_side_effects) {
> > +                long long int stats_used = 0;
> > +
> > +                if (ctx->xin->resubmit_stats) {
> > +                    stats_used = ctx->xin->resubmit_stats->used;
> > +                }
> >                  error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache != 
> > NULL,
> > -                                               learn->limit, &success);
> > +                                               learn->limit, &success,
> > +                                               stats_used);
> >              } else if (learn->limit) {
> >                  if (!ofm->temp_rule
> >                      || ofm->temp_rule->state != RULE_INSERTED) {
> > diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> > index e22ca757a..1efd9fc91 100644
> > --- a/ofproto/ofproto-dpif.c
> > +++ b/ofproto/ofproto-dpif.c
> > @@ -4880,7 +4880,7 @@ packet_xlate(struct ofproto *ofproto_, struct 
> > ofproto_packet_out *opo)
> >              if (entry->type == XC_LEARN) {
> >                  struct ofproto_flow_mod *ofm = entry->learn.ofm;
> >
> > -                error = ofproto_flow_mod_learn_refresh(ofm);
> > +                error = ofproto_flow_mod_learn_refresh(ofm, 0);
> >                  if (error) {
> >                      goto error_out;
> >                  }
> > diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> > index 143ded690..dc5c1a60d 100644
> > --- a/ofproto/ofproto-provider.h
> > +++ b/ofproto/ofproto-provider.h
> > @@ -2027,9 +2027,11 @@ enum ofperr ofproto_flow_mod_init_for_learn(struct 
> > ofproto *,
> >                                              struct ofproto_flow_mod *)
> >      OVS_EXCLUDED(ofproto_mutex);
> >  enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *, bool 
> > keep_ref,
> > -                                   unsigned limit, bool *below_limit)
> > +                                   unsigned limit, bool *below_limit,
> > +                                   long long int stats_used)
> >      OVS_EXCLUDED(ofproto_mutex);
> > -enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm);
> > +enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
> > +                                           long long int stats_used);
> >  enum ofperr ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm)
> >      OVS_REQUIRES(ofproto_mutex);
> >  void ofproto_flow_mod_learn_revert(struct ofproto_flow_mod *ofm)
> > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> > index dbf4958bc..92fafd9ad 100644
> > --- a/ofproto/ofproto.c
> > +++ b/ofproto/ofproto.c
> > @@ -5472,7 +5472,8 @@ ofproto_flow_mod_init_for_learn(struct ofproto 
> > *ofproto,
> >  }
> >
> >  enum ofperr
> > -ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm)
> > +ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
> > +                               long long int stats_used)
> >  {
> >      enum ofperr error = 0;
> >
> > @@ -5494,8 +5495,36 @@ ofproto_flow_mod_learn_refresh(struct 
> > ofproto_flow_mod *ofm)
> >      if (rule->state == RULE_REMOVED) {
> >          struct cls_rule cr;
> >
> > -        cls_rule_clone(&cr, &rule->cr);
> >          ovs_mutex_lock(&rule->mutex);
> > +        if (stats_used) {
>
> Can we remove this and other checks in this function by always passing
> a non-null value?  Using time_msec() wherever the stats are not available
> in the caller.
>
> And we may re-name the parameter to something like time_learned or something.
>
> > +            /* If the caller hasn't explicitly requested rule recreation 
> > and
>
> This is still outdated as in v4.
>
> > +             * the rule is removed and not visible, and the classifier has 
> > a
> > +             * newer version of this rule, then don't reinsert it. */
> > +            struct oftable *table = &rule->ofproto->tables[rule->table_id];
> > +            ovs_version_t tables_version = rule->ofproto->tables_version;
> > +
> > +            if (!cls_rule_visible_in_version(&rule->cr, tables_version)) {
> > +                const struct cls_rule *curr_cls_rule;
> > +
> > +                curr_cls_rule = classifier_find_rule_exactly(&table->cls,
> > +                                                             &rule->cr,
> > +                                                             
> > tables_version);
> > +                if (curr_cls_rule) {
> > +                    struct rule *curr_rule = 
> > rule_from_cls_rule(curr_cls_rule);
> > +
> > +                    ovs_mutex_lock(&curr_rule->mutex);
>
> Is it possible for two revalidators to deadlock each other by taking
> these mutexes in the opposite order?
>
> > +                    if (curr_rule->modified > stats_used) {
> > +                        ovs_mutex_unlock(&curr_rule->mutex);
> > +                        ovs_mutex_unlock(&rule->mutex);
> > +
> > +                        return OFPERR_OFPFMFC_UNKNOWN;
>
> This error will bubble up and will terminate the actions pipleine.  For 
> example,
> if a packet arrives from an upcall, we may have a race with a revalidator that
> is evaluating a competing learning flow.  If handler thread will get an error
> here it will stop processing the packet and further actions will not be 
> executed.
> The packet will likley be just dropped.
>
>
> > +                    }
> > +                    ovs_mutex_unlock(&curr_rule->mutex);
> > +                }
> > +            }
> > +        }
> > +
> > +        cls_rule_clone(&cr, &rule->cr);
> >          error = ofproto_rule_create(rule->ofproto, &cr, rule->table_id,
> >                                      rule->flow_cookie,
> >                                      rule->idle_timeout,
> > @@ -5506,6 +5535,9 @@ ofproto_flow_mod_learn_refresh(struct 
> > ofproto_flow_mod *ofm)
> >                                      rule->match_tlv_bitmap,
> >                                      rule->ofpacts_tlv_bitmap,
> >                                      &ofm->temp_rule);
> > +        if (stats_used) {
> > +            ofm->temp_rule->modified = stats_used;
> > +        }
> >          ovs_mutex_unlock(&rule->mutex);
> >          if (!error) {
> >              ofproto_rule_unref(rule);   /* Release old reference. */
> > @@ -5513,7 +5545,11 @@ ofproto_flow_mod_learn_refresh(struct 
> > ofproto_flow_mod *ofm)
> >      } else {
> >          /* Refresh the existing rule. */
> >          ovs_mutex_lock(&rule->mutex);
> > -        rule->modified = time_msec();
> > +        if (stats_used) {
> > +            rule->modified = stats_used;
> > +        } else {
> > +            rule->modified = time_msec();
> > +        }
> >          ovs_mutex_unlock(&rule->mutex);
> >      }
> >      return error;
> > @@ -5565,10 +5601,13 @@ ofproto_flow_mod_learn_finish(struct 
> > ofproto_flow_mod *ofm,
> >
> >  /* Refresh 'ofm->temp_rule', for which the caller holds a reference, if 
> > already
> >   * in the classifier, insert it otherwise.  If the rule has already been
> > - * removed from the classifier, a new rule is created using 
> > 'ofm->temp_rule' as
> > - * a template and the reference to the old 'ofm->temp_rule' is freed.  If
> > - * 'keep_ref' is true, then a reference to the current rule is held, 
> > otherwise
> > - * it is released and 'ofm->temp_rule' is set to NULL.
> > + * removed from the classifier and replaced by another rule, the stats_used
> > + * parameter can be used to determine whether the newer rule is replaced or
> > + * kept. If stats_used is set and greater than the current rule's last 
> > modified
> > + * time then a new rule is created using 'ofm->temp_rule' as a template 
> > and the
> > + * reference to the old 'ofm->temp_rule' is freed. If 'keep_ref' is true,
> > + * then a reference to the current rule is held, otherwise it is released 
> > and
> > + * 'ofm->temp_rule' is set to NULL.
> >   *
> >   * If 'limit' != 0, insertion will fail if there are more than 'limit' 
> > rules
> >   * in the same table with the same cookie.  If insertion succeeds,
> > @@ -5579,10 +5618,11 @@ ofproto_flow_mod_learn_finish(struct 
> > ofproto_flow_mod *ofm,
> >   * during the call. */
> >  enum ofperr
> >  ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref,
> > -                       unsigned limit, bool *below_limitp)
> > +                       unsigned limit, bool *below_limitp,
> > +                       long long int stats_used)
> >      OVS_EXCLUDED(ofproto_mutex)
> >  {
> > -    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm);
> > +    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm, stats_used);
> >      struct rule *rule = ofm->temp_rule;
> >      bool below_limit = true;
> >
> > @@ -5615,6 +5655,13 @@ ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, 
> > bool keep_ref,
> >
> >              error = ofproto_flow_mod_learn_start(ofm);
> >              if (!error) {
> > +                if (stats_used) {
> > +                    /* ofproto_flow_mod_learn_start may have overwritten
> > +                     * modified with current time. */
> > +                    ovs_mutex_lock(&ofm->temp_rule->mutex);
> > +                    ofm->temp_rule->modified = stats_used;
> > +                    ovs_mutex_unlock(&ofm->temp_rule->mutex);
> > +                }
> >                  error = ofproto_flow_mod_learn_finish(ofm, NULL);
> >              }
> >          } else {
>

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to