Re: [ovs-dev] [PATCH v4] ofproto-dpif-xlate: Don't reinstall removed XC_LEARN rule

Ilya Maximets Mon, 21 Aug 2023 11:23:38 -0700

On 8/21/23 14:31, Mike Pattrick wrote:
> On Mon, Aug 21, 2023 at 7:15 AM Ilya Maximets <[email protected]> wrote:
>>
>> On 8/18/23 00:46, Mike Pattrick wrote:
>>> When the a revalidator thread is updating statistics for an XC_LEARN
>>> xcache entry in xlate_push_stats_entry it uses ofproto_flow_mod_learn.
>>> The revalidator will update stats for rules even if they are in a
>>> removed state or marked as invisible. However, ofproto_flow_mod_learn
>>> will detect if a flow has been removed and re-add it in that case. This
>>> can result in an old learn action replacing the new learn action that
>>> had replaced it in the first place.
>>>
>>> This change adds a new force parameter to ofproto_flow_mod_learn
>>> allowing the caller to specify an action to take if temp_rule is
>>> removed. If force is set to false and the rule has been replaced in the
>>> classifier with a more recent rule, then ofproto_flow_mod_learn will
>>> just return.
>>
>> The commit message is outdated.
>>
>> Also, it should be possible to add a test for this issue using time
>> warping.  Please, add one.
>>
>>>
>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213892
>>> Signed-off-by: Mike Pattrick <[email protected]>
>>> ---
>>>  v2: Added additional checks if rule is removed
>>>  v3: v2 patch was corrupted in transit
>>>  v4: Added check against dpif flow stats
>>> ---
>>>  ofproto/ofproto-dpif-xlate-cache.c |  2 +-
>>>  ofproto/ofproto-dpif-xlate.c       |  2 +-
>>>  ofproto/ofproto-dpif.c             |  2 +-
>>>  ofproto/ofproto-provider.h         |  6 ++--
>>>  ofproto/ofproto.c                  | 51 +++++++++++++++++++++++++-----
>>>  5 files changed, 50 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/ofproto/ofproto-dpif-xlate-cache.c 
>>> b/ofproto/ofproto-dpif-xlate-cache.c
>>> index 9224ee2e6..2e1fcb3a6 100644
>>> --- a/ofproto/ofproto-dpif-xlate-cache.c
>>> +++ b/ofproto/ofproto-dpif-xlate-cache.c
>>> @@ -125,7 +125,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
>>>      case XC_LEARN: {
>>>          enum ofperr error;
>>>          error = ofproto_flow_mod_learn(entry->learn.ofm, true,
>>> -                                       entry->learn.limit, NULL);
>>> +                                       entry->learn.limit, NULL, 
>>> stats->used);
>>>          if (error) {
>>>              static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>>              VLOG_WARN_RL(&rl, "xcache LEARN action execution failed.");
>>> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>>> index 47ea0f47e..3a68cef4a 100644
>>> --- a/ofproto/ofproto-dpif-xlate.c
>>> +++ b/ofproto/ofproto-dpif-xlate.c
>>> @@ -5701,7 +5701,7 @@ xlate_learn_action(struct xlate_ctx *ctx, const 
>>> struct ofpact_learn *learn)
>>>              bool success = true;
>>>              if (ctx->xin->allow_side_effects) {
>>>                  error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache != 
>>> NULL,
>>> -                                               learn->limit, &success);
>>> +                                               learn->limit, &success, 0);
>>>              } else if (learn->limit) {
>>>                  if (!ofm->temp_rule
>>>                      || ofm->temp_rule->state != RULE_INSERTED) {
>>> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
>>> index e22ca757a..1efd9fc91 100644
>>> --- a/ofproto/ofproto-dpif.c
>>> +++ b/ofproto/ofproto-dpif.c
>>> @@ -4880,7 +4880,7 @@ packet_xlate(struct ofproto *ofproto_, struct 
>>> ofproto_packet_out *opo)
>>>              if (entry->type == XC_LEARN) {
>>>                  struct ofproto_flow_mod *ofm = entry->learn.ofm;
>>>
>>> -                error = ofproto_flow_mod_learn_refresh(ofm);
>>> +                error = ofproto_flow_mod_learn_refresh(ofm, 0);
>>>                  if (error) {
>>>                      goto error_out;
>>>                  }
>>> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
>>> index 143ded690..dc5c1a60d 100644
>>> --- a/ofproto/ofproto-provider.h
>>> +++ b/ofproto/ofproto-provider.h
>>> @@ -2027,9 +2027,11 @@ enum ofperr ofproto_flow_mod_init_for_learn(struct 
>>> ofproto *,
>>>                                              struct ofproto_flow_mod *)
>>>      OVS_EXCLUDED(ofproto_mutex);
>>>  enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *, bool 
>>> keep_ref,
>>> -                                   unsigned limit, bool *below_limit)
>>> +                                   unsigned limit, bool *below_limit,
>>> +                                   long long int stats_used)
>>>      OVS_EXCLUDED(ofproto_mutex);
>>> -enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm);
>>> +enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
>>> +                                           long long int stats_used);
>>>  enum ofperr ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm)
>>>      OVS_REQUIRES(ofproto_mutex);
>>>  void ofproto_flow_mod_learn_revert(struct ofproto_flow_mod *ofm)
>>> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
>>> index dbf4958bc..a86d6a58e 100644
>>> --- a/ofproto/ofproto.c
>>> +++ b/ofproto/ofproto.c
>>> @@ -5472,7 +5472,8 @@ ofproto_flow_mod_init_for_learn(struct ofproto 
>>> *ofproto,
>>>  }
>>>
>>>  enum ofperr
>>> -ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm)
>>> +ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
>>> +                               long long int stats_used)
>>>  {
>>>      enum ofperr error = 0;
>>>
>>> @@ -5494,8 +5495,37 @@ ofproto_flow_mod_learn_refresh(struct 
>>> ofproto_flow_mod *ofm)
>>>      if (rule->state == RULE_REMOVED) {
>>>          struct cls_rule cr;
>>>
>>> -        cls_rule_clone(&cr, &rule->cr);
>>>          ovs_mutex_lock(&rule->mutex);
>>> +        if (stats_used) {
>>> +            /* If the caller hasn't explicitly requested rule recreation 
>>> and
>>
>> Seems outdated.
>>
>>> +             * the rule is removed and not visible, and the classifier has 
>>> a
>>> +             * newer version of this rule, then don't reinsert it. */
>>> +            struct oftable *table = &rule->ofproto->tables[rule->table_id];
>>> +            ovs_version_t tables_version = rule->ofproto->tables_version;
>>> +
>>> +            if (!cls_rule_visible_in_version(&rule->cr, tables_version)) {
>>> +                const struct cls_rule * curr_cls_rule;
>>
>> No space between the star and the variable.
>>
>>> +
>>> +                curr_cls_rule = classifier_find_rule_exactly(&table->cls,
>>> +                                                             &rule->cr,
>>> +                                                             
>>> tables_version);
>>> +                if (curr_cls_rule) {
>>> +                    struct rule *curr_rule = 
>>> rule_from_cls_rule(curr_cls_rule);
>>> +
>>> +                    ovs_mutex_lock(&curr_rule->mutex);
>>> +                    if (curr_rule->state != RULE_REMOVED &&
>>> +                        curr_rule->modified > stats_used) {
>>
>> We're comparing apples to oranges here.  'modified' is a time of
>> the last rule refresh during revalidation, and 'stats_used' is
>> a last time packet hit the learning flow in the datapath.
>> The 'modified' value has to be set to 'stats_used' somewhere.
> 
> We update the modified timestamp in replace_rule_start(), so we only
> enter this new code path if the new rule was created after stats.used.


True, but we update it with time_msec(), not the stats->used from
the learning datapath flow.  Also, a few lines below in this refresh
function we update it with time_msec() as well.  So we never update
the 'modified' timestamp with datapath flow timestamp, but we do
compare them in this new code.

Consider following scenario:

Flow A learns flow C,actions=a,
Flow B learns flow C,actions=b,

So, we have both flows A and B in the datapath.  Assuming, right now
the classifier has C,actions=a installed and C,actions=b in removed
state.  At some point in time we receive a packet on flow A and in
10ms we receive a packet on flow B.

Revalidation starts, A gets revalidated first, refresh() function sets
the (C,actions=a)->modified to time_msec(), which is always larger than
any of the A->used or B->used.  When it's time to revalidate B, the
refresh function will compare B->used with (C,actions=a)->modified and
will not reinstate C,actions=b, even though B saw packets 10ms after
these packets were seen by A.
If the flows A and B will be revalidated in the opposite order, we will
have an opposite result, i.e. C,actions=b reinstated.

So, the current patch doesn't really solve the undefined behavior.

Best regards, Ilya Maximets.

> 
> 
> Thanks,
> M
> 
>>
>>> +                        ovs_mutex_unlock(&curr_rule->mutex);
>>> +                        ovs_mutex_unlock(&rule->mutex);
>>> +
>>> +                        return OFPERR_OFPFMFC_UNKNOWN;
>>> +                    }
>>> +                    ovs_mutex_unlock(&curr_rule->mutex);
>>> +                }
>>> +            }
>>> +        }
>>> +
>>> +        cls_rule_clone(&cr, &rule->cr);
>>>          error = ofproto_rule_create(rule->ofproto, &cr, rule->table_id,
>>>                                      rule->flow_cookie,
>>>                                      rule->idle_timeout,
>>> @@ -5565,10 +5595,13 @@ ofproto_flow_mod_learn_finish(struct 
>>> ofproto_flow_mod *ofm,
>>>
>>>  /* Refresh 'ofm->temp_rule', for which the caller holds a reference, if 
>>> already
>>>   * in the classifier, insert it otherwise.  If the rule has already been
>>> - * removed from the classifier, a new rule is created using 
>>> 'ofm->temp_rule' as
>>> - * a template and the reference to the old 'ofm->temp_rule' is freed.  If
>>> - * 'keep_ref' is true, then a reference to the current rule is held, 
>>> otherwise
>>> - * it is released and 'ofm->temp_rule' is set to NULL.
>>> + * removed from the classifier and replaced by another rule, the stats_used
>>> + * parameter can be used to deptermine whether the newer rule is replaced 
>>> or
>>
>> Typo: deptermine
>>
>>> + * kept. If stats_used is set and greater than the current rule's last 
>>> modified
>>> + * time then a new rule is created using 'ofm->temp_rule' as a template 
>>> and the
>>> + * reference to the old 'ofm->temp_rule' is freed. If 'keep_ref' is true,
>>> + * then a reference to the current rule is held, otherwise it is released 
>>> and
>>> + * 'ofm->temp_rule' is set to NULL.
>>>   *
>>>   * If 'limit' != 0, insertion will fail if there are more than 'limit' 
>>> rules
>>>   * in the same table with the same cookie.  If insertion succeeds,
>>> @@ -5579,10 +5612,11 @@ ofproto_flow_mod_learn_finish(struct 
>>> ofproto_flow_mod *ofm,
>>>   * during the call. */
>>>  enum ofperr
>>>  ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref,
>>> -                       unsigned limit, bool *below_limitp)
>>> +                       unsigned limit, bool *below_limitp,
>>> +                       long long int stats_used)
>>>      OVS_EXCLUDED(ofproto_mutex)
>>>  {
>>> -    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm);
>>> +    enum ofperr error = ofproto_flow_mod_learn_refresh(ofm, stats_used);
>>>      struct rule *rule = ofm->temp_rule;
>>>      bool below_limit = true;
>>>
>>> @@ -5659,6 +5693,7 @@ replace_rule_start(struct ofproto *ofproto, struct 
>>> ofproto_flow_mod *ofm,
>>>                  *CONST_CAST(uint16_t *, &new_rule->importance) = 
>>> old_rule->importance;
>>>                  new_rule->flags = old_rule->flags;
>>>                  new_rule->created = old_rule->created;
>>> +                new_rule->modified = time_msec();
>>>              }
>>>              if (!change_cookie) {
>>>                  *CONST_CAST(ovs_be64 *, &new_rule->flow_cookie)
>>> --
>>> 2.39.3
>>
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH v4] ofproto-dpif-xlate: Don't reinstall removed XC_LEARN rule

Reply via email to