On 8/22/23 22:45, Mike Pattrick wrote:
> When the a revalidator thread is updating statistics for an XC_LEARN
> xcache entry in xlate_push_stats_entry it uses ofproto_flow_mod_learn.
> The revalidator will update stats for rules even if they are in a
> removed state or marked as invisible. However, ofproto_flow_mod_learn
> will detect if a flow has been removed and re-add it in that case. This
> can result in an old learn action replacing the new learn action that
> had replaced it in the first place.
> 
> This change adds a new stats_used parameter to ofproto_flow_mod_learn
> allowing the caller to provide a timestamp that will be fed into the
> learned rule's modified time. The provided timestamp should be the time
> of the last packet activity. If stats_used is not set then the current
> time is used, as is the current behaviour.
> 
> This change also adds a check when replacing a learned rule to favour the
> newest rule.
> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213892
> Signed-off-by: Mike Pattrick <[email protected]>
> ---
>  v2: Added additional checks if rule is removed
>  v3: v2 patch was corrupted in transit
>  v4: Added check against dpif flow stats
>  v5: Fixed typos, updated commit message
>      Changed timestamps to use datapath timestamp more consistently
> ---

I'll quote my previous review comment:

It should be possible to add a test for this issue using time
warping.  Please, add one.



>  ofproto/ofproto-dpif-xlate-cache.c |  2 +-
>  ofproto/ofproto-dpif-xlate.c       |  8 +++-
>  ofproto/ofproto-dpif.c             |  2 +-
>  ofproto/ofproto-provider.h         |  6 ++-
>  ofproto/ofproto.c                  | 65 +++++++++++++++++++++++++-----
>  5 files changed, 69 insertions(+), 14 deletions(-)
> 
> diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
> b/ofproto/ofproto-dpif-xlate-cache.c
> index 9224ee2e6..2e1fcb3a6 100644
> --- a/ofproto/ofproto-dpif-xlate-cache.c
> +++ b/ofproto/ofproto-dpif-xlate-cache.c
> @@ -125,7 +125,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
>      case XC_LEARN: {
>          enum ofperr error;
>          error = ofproto_flow_mod_learn(entry->learn.ofm, true,
> -                                       entry->learn.limit, NULL);
> +                                       entry->learn.limit, NULL, 
> stats->used);
>          if (error) {
>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>              VLOG_WARN_RL(&rl, "xcache LEARN action execution failed.");
> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> index 47ea0f47e..bcacb2f0a 100644
> --- a/ofproto/ofproto-dpif-xlate.c
> +++ b/ofproto/ofproto-dpif-xlate.c
> @@ -5700,8 +5700,14 @@ xlate_learn_action(struct xlate_ctx *ctx, const struct 
> ofpact_learn *learn)
>          if (!error) {
>              bool success = true;
>              if (ctx->xin->allow_side_effects) {
> +                long long int stats_used = 0;
> +
> +                if (ctx->xin->resubmit_stats) {
> +                    stats_used = ctx->xin->resubmit_stats->used;
> +                }
>                  error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache != NULL,
> -                                               learn->limit, &success);
> +                                               learn->limit, &success,
> +                                               stats_used);
>              } else if (learn->limit) {
>                  if (!ofm->temp_rule
>                      || ofm->temp_rule->state != RULE_INSERTED) {
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index e22ca757a..1efd9fc91 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -4880,7 +4880,7 @@ packet_xlate(struct ofproto *ofproto_, struct 
> ofproto_packet_out *opo)
>              if (entry->type == XC_LEARN) {
>                  struct ofproto_flow_mod *ofm = entry->learn.ofm;
>  
> -                error = ofproto_flow_mod_learn_refresh(ofm);
> +                error = ofproto_flow_mod_learn_refresh(ofm, 0);
>                  if (error) {
>                      goto error_out;
>                  }
> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
> index 143ded690..dc5c1a60d 100644
> --- a/ofproto/ofproto-provider.h
> +++ b/ofproto/ofproto-provider.h
> @@ -2027,9 +2027,11 @@ enum ofperr ofproto_flow_mod_init_for_learn(struct 
> ofproto *,
>                                              struct ofproto_flow_mod *)
>      OVS_EXCLUDED(ofproto_mutex);
>  enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *, bool keep_ref,
> -                                   unsigned limit, bool *below_limit)
> +                                   unsigned limit, bool *below_limit,
> +                                   long long int stats_used)
>      OVS_EXCLUDED(ofproto_mutex);
> -enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm);
> +enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
> +                                           long long int stats_used);
>  enum ofperr ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm)
>      OVS_REQUIRES(ofproto_mutex);
>  void ofproto_flow_mod_learn_revert(struct ofproto_flow_mod *ofm)
> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
> index dbf4958bc..92fafd9ad 100644
> --- a/ofproto/ofproto.c
> +++ b/ofproto/ofproto.c
> @@ -5472,7 +5472,8 @@ ofproto_flow_mod_init_for_learn(struct ofproto *ofproto,
>  }
>  
>  enum ofperr
> -ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm)
> +ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
> +                               long long int stats_used)
>  {
>      enum ofperr error = 0;
>  
> @@ -5494,8 +5495,36 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod 
> *ofm)
>      if (rule->state == RULE_REMOVED) {
>          struct cls_rule cr;
>  
> -        cls_rule_clone(&cr, &rule->cr);
>          ovs_mutex_lock(&rule->mutex);
> +        if (stats_used) {

Can we remove this and other checks in this function by always passing
a non-null value?  Using time_msec() wherever the stats are not available
in the caller.

And we may re-name the parameter to something like time_learned or something.

> +            /* If the caller hasn't explicitly requested rule recreation and

This is still outdated as in v4.

> +             * the rule is removed and not visible, and the classifier has a
> +             * newer version of this rule, then don't reinsert it. */
> +            struct oftable *table = &rule->ofproto->tables[rule->table_id];
> +            ovs_version_t tables_version = rule->ofproto->tables_version;
> +
> +            if (!cls_rule_visible_in_version(&rule->cr, tables_version)) {
> +                const struct cls_rule *curr_cls_rule;
> +
> +                curr_cls_rule = classifier_find_rule_exactly(&table->cls,
> +                                                             &rule->cr,
> +                                                             tables_version);
> +                if (curr_cls_rule) {
> +                    struct rule *curr_rule = 
> rule_from_cls_rule(curr_cls_rule);
> +
> +                    ovs_mutex_lock(&curr_rule->mutex);

Is it possible for two revalidators to deadlock each other by taking
these mutexes in the opposite order?

> +                    if (curr_rule->modified > stats_used) {
> +                        ovs_mutex_unlock(&curr_rule->mutex);
> +                        ovs_mutex_unlock(&rule->mutex);
> +
> +                        return OFPERR_OFPFMFC_UNKNOWN;

This error will bubble up and will terminate the actions pipleine.  For example,
if a packet arrives from an upcall, we may have a race with a revalidator that
is evaluating a competing learning flow.  If handler thread will get an error
here it will stop processing the packet and further actions will not be 
executed.
The packet will likley be just dropped.


> +                    }
> +                    ovs_mutex_unlock(&curr_rule->mutex);
> +                }
> +            }
> +        }
> +
> +        cls_rule_clone(&cr, &rule->cr);
>          error = ofproto_rule_create(rule->ofproto, &cr, rule->table_id,
>                                      rule->flow_cookie,
>                                      rule->idle_timeout,
> @@ -5506,6 +5535,9 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod 
> *ofm)
>                                      rule->match_tlv_bitmap,
>                                      rule->ofpacts_tlv_bitmap,
>                                      &ofm->temp_rule);
> +        if (stats_used) {
> +            ofm->temp_rule->modified = stats_used;
> +        }
>          ovs_mutex_unlock(&rule->mutex);
>          if (!error) {
>              ofproto_rule_unref(rule);   /* Release old reference. */
> @@ -5513,7 +5545,11 @@ ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod 
> *ofm)
>      } else {
>          /* Refresh the existing rule. */
>          ovs_mutex_lock(&rule->mutex);
> -        rule->modified = time_msec();
> +        if (stats_used) {
> +            rule->modified = stats_used;
> +        } else {
> +            rule->modified = time_msec();
> +        }
>          ovs_mutex_unlock(&rule->mutex);
>      }
>      return error;
> @@ -5565,10 +5601,13 @@ ofproto_flow_mod_learn_finish(struct ofproto_flow_mod 
> *ofm,
>  
>  /* Refresh 'ofm->temp_rule', for which the caller holds a reference, if 
> already
>   * in the classifier, insert it otherwise.  If the rule has already been
> - * removed from the classifier, a new rule is created using 'ofm->temp_rule' 
> as
> - * a template and the reference to the old 'ofm->temp_rule' is freed.  If
> - * 'keep_ref' is true, then a reference to the current rule is held, 
> otherwise
> - * it is released and 'ofm->temp_rule' is set to NULL.
> + * removed from the classifier and replaced by another rule, the stats_used
> + * parameter can be used to determine whether the newer rule is replaced or
> + * kept. If stats_used is set and greater than the current rule's last 
> modified
> + * time then a new rule is created using 'ofm->temp_rule' as a template and 
> the
> + * reference to the old 'ofm->temp_rule' is freed. If 'keep_ref' is true,
> + * then a reference to the current rule is held, otherwise it is released and
> + * 'ofm->temp_rule' is set to NULL.
>   *
>   * If 'limit' != 0, insertion will fail if there are more than 'limit' rules
>   * in the same table with the same cookie.  If insertion succeeds,
> @@ -5579,10 +5618,11 @@ ofproto_flow_mod_learn_finish(struct ofproto_flow_mod 
> *ofm,
>   * during the call. */
>  enum ofperr
>  ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref,
> -                       unsigned limit, bool *below_limitp)
> +                       unsigned limit, bool *below_limitp,
> +                       long long int stats_used)
>      OVS_EXCLUDED(ofproto_mutex)
>  {
> -    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm);
> +    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm, stats_used);
>      struct rule *rule = ofm->temp_rule;
>      bool below_limit = true;
>  
> @@ -5615,6 +5655,13 @@ ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, 
> bool keep_ref,
>  
>              error = ofproto_flow_mod_learn_start(ofm);
>              if (!error) {
> +                if (stats_used) {
> +                    /* ofproto_flow_mod_learn_start may have overwritten
> +                     * modified with current time. */
> +                    ovs_mutex_lock(&ofm->temp_rule->mutex);
> +                    ofm->temp_rule->modified = stats_used;
> +                    ovs_mutex_unlock(&ofm->temp_rule->mutex);
> +                }
>                  error = ofproto_flow_mod_learn_finish(ofm, NULL);
>              }
>          } else {

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to