On 8/21/23 14:31, Mike Pattrick wrote:
> On Mon, Aug 21, 2023 at 7:15 AM Ilya Maximets <[email protected]> wrote:
>>
>> On 8/18/23 00:46, Mike Pattrick wrote:
>>> When the a revalidator thread is updating statistics for an XC_LEARN
>>> xcache entry in xlate_push_stats_entry it uses ofproto_flow_mod_learn.
>>> The revalidator will update stats for rules even if they are in a
>>> removed state or marked as invisible. However, ofproto_flow_mod_learn
>>> will detect if a flow has been removed and re-add it in that case. This
>>> can result in an old learn action replacing the new learn action that
>>> had replaced it in the first place.
>>>
>>> This change adds a new force parameter to ofproto_flow_mod_learn
>>> allowing the caller to specify an action to take if temp_rule is
>>> removed. If force is set to false and the rule has been replaced in the
>>> classifier with a more recent rule, then ofproto_flow_mod_learn will
>>> just return.
>>
>> The commit message is outdated.
>>
>> Also, it should be possible to add a test for this issue using time
>> warping. Please, add one.
>>
>>>
>>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2213892
>>> Signed-off-by: Mike Pattrick <[email protected]>
>>> ---
>>> v2: Added additional checks if rule is removed
>>> v3: v2 patch was corrupted in transit
>>> v4: Added check against dpif flow stats
>>> ---
>>> ofproto/ofproto-dpif-xlate-cache.c | 2 +-
>>> ofproto/ofproto-dpif-xlate.c | 2 +-
>>> ofproto/ofproto-dpif.c | 2 +-
>>> ofproto/ofproto-provider.h | 6 ++--
>>> ofproto/ofproto.c | 51 +++++++++++++++++++++++++-----
>>> 5 files changed, 50 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/ofproto/ofproto-dpif-xlate-cache.c
>>> b/ofproto/ofproto-dpif-xlate-cache.c
>>> index 9224ee2e6..2e1fcb3a6 100644
>>> --- a/ofproto/ofproto-dpif-xlate-cache.c
>>> +++ b/ofproto/ofproto-dpif-xlate-cache.c
>>> @@ -125,7 +125,7 @@ xlate_push_stats_entry(struct xc_entry *entry,
>>> case XC_LEARN: {
>>> enum ofperr error;
>>> error = ofproto_flow_mod_learn(entry->learn.ofm, true,
>>> - entry->learn.limit, NULL);
>>> + entry->learn.limit, NULL,
>>> stats->used);
>>> if (error) {
>>> static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
>>> VLOG_WARN_RL(&rl, "xcache LEARN action execution failed.");
>>> diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
>>> index 47ea0f47e..3a68cef4a 100644
>>> --- a/ofproto/ofproto-dpif-xlate.c
>>> +++ b/ofproto/ofproto-dpif-xlate.c
>>> @@ -5701,7 +5701,7 @@ xlate_learn_action(struct xlate_ctx *ctx, const
>>> struct ofpact_learn *learn)
>>> bool success = true;
>>> if (ctx->xin->allow_side_effects) {
>>> error = ofproto_flow_mod_learn(ofm, ctx->xin->xcache !=
>>> NULL,
>>> - learn->limit, &success);
>>> + learn->limit, &success, 0);
>>> } else if (learn->limit) {
>>> if (!ofm->temp_rule
>>> || ofm->temp_rule->state != RULE_INSERTED) {
>>> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
>>> index e22ca757a..1efd9fc91 100644
>>> --- a/ofproto/ofproto-dpif.c
>>> +++ b/ofproto/ofproto-dpif.c
>>> @@ -4880,7 +4880,7 @@ packet_xlate(struct ofproto *ofproto_, struct
>>> ofproto_packet_out *opo)
>>> if (entry->type == XC_LEARN) {
>>> struct ofproto_flow_mod *ofm = entry->learn.ofm;
>>>
>>> - error = ofproto_flow_mod_learn_refresh(ofm);
>>> + error = ofproto_flow_mod_learn_refresh(ofm, 0);
>>> if (error) {
>>> goto error_out;
>>> }
>>> diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
>>> index 143ded690..dc5c1a60d 100644
>>> --- a/ofproto/ofproto-provider.h
>>> +++ b/ofproto/ofproto-provider.h
>>> @@ -2027,9 +2027,11 @@ enum ofperr ofproto_flow_mod_init_for_learn(struct
>>> ofproto *,
>>> struct ofproto_flow_mod *)
>>> OVS_EXCLUDED(ofproto_mutex);
>>> enum ofperr ofproto_flow_mod_learn(struct ofproto_flow_mod *, bool
>>> keep_ref,
>>> - unsigned limit, bool *below_limit)
>>> + unsigned limit, bool *below_limit,
>>> + long long int stats_used)
>>> OVS_EXCLUDED(ofproto_mutex);
>>> -enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm);
>>> +enum ofperr ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
>>> + long long int stats_used);
>>> enum ofperr ofproto_flow_mod_learn_start(struct ofproto_flow_mod *ofm)
>>> OVS_REQUIRES(ofproto_mutex);
>>> void ofproto_flow_mod_learn_revert(struct ofproto_flow_mod *ofm)
>>> diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
>>> index dbf4958bc..a86d6a58e 100644
>>> --- a/ofproto/ofproto.c
>>> +++ b/ofproto/ofproto.c
>>> @@ -5472,7 +5472,8 @@ ofproto_flow_mod_init_for_learn(struct ofproto
>>> *ofproto,
>>> }
>>>
>>> enum ofperr
>>> -ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm)
>>> +ofproto_flow_mod_learn_refresh(struct ofproto_flow_mod *ofm,
>>> + long long int stats_used)
>>> {
>>> enum ofperr error = 0;
>>>
>>> @@ -5494,8 +5495,37 @@ ofproto_flow_mod_learn_refresh(struct
>>> ofproto_flow_mod *ofm)
>>> if (rule->state == RULE_REMOVED) {
>>> struct cls_rule cr;
>>>
>>> - cls_rule_clone(&cr, &rule->cr);
>>> ovs_mutex_lock(&rule->mutex);
>>> + if (stats_used) {
>>> + /* If the caller hasn't explicitly requested rule recreation
>>> and
>>
>> Seems outdated.
>>
>>> + * the rule is removed and not visible, and the classifier has
>>> a
>>> + * newer version of this rule, then don't reinsert it. */
>>> + struct oftable *table = &rule->ofproto->tables[rule->table_id];
>>> + ovs_version_t tables_version = rule->ofproto->tables_version;
>>> +
>>> + if (!cls_rule_visible_in_version(&rule->cr, tables_version)) {
>>> + const struct cls_rule * curr_cls_rule;
>>
>> No space between the star and the variable.
>>
>>> +
>>> + curr_cls_rule = classifier_find_rule_exactly(&table->cls,
>>> + &rule->cr,
>>> +
>>> tables_version);
>>> + if (curr_cls_rule) {
>>> + struct rule *curr_rule =
>>> rule_from_cls_rule(curr_cls_rule);
>>> +
>>> + ovs_mutex_lock(&curr_rule->mutex);
>>> + if (curr_rule->state != RULE_REMOVED &&
>>> + curr_rule->modified > stats_used) {
>>
>> We're comparing apples to oranges here. 'modified' is a time of
>> the last rule refresh during revalidation, and 'stats_used' is
>> a last time packet hit the learning flow in the datapath.
>> The 'modified' value has to be set to 'stats_used' somewhere.
>
> We update the modified timestamp in replace_rule_start(), so we only
> enter this new code path if the new rule was created after stats.used.
True, but we update it with time_msec(), not the stats->used from
the learning datapath flow. Also, a few lines below in this refresh
function we update it with time_msec() as well. So we never update
the 'modified' timestamp with datapath flow timestamp, but we do
compare them in this new code.
Consider following scenario:
Flow A learns flow C,actions=a,
Flow B learns flow C,actions=b,
So, we have both flows A and B in the datapath. Assuming, right now
the classifier has C,actions=a installed and C,actions=b in removed
state. At some point in time we receive a packet on flow A and in
10ms we receive a packet on flow B.
Revalidation starts, A gets revalidated first, refresh() function sets
the (C,actions=a)->modified to time_msec(), which is always larger than
any of the A->used or B->used. When it's time to revalidate B, the
refresh function will compare B->used with (C,actions=a)->modified and
will not reinstate C,actions=b, even though B saw packets 10ms after
these packets were seen by A.
If the flows A and B will be revalidated in the opposite order, we will
have an opposite result, i.e. C,actions=b reinstated.
So, the current patch doesn't really solve the undefined behavior.
Best regards, Ilya Maximets.
>
>
> Thanks,
> M
>
>>
>>> + ovs_mutex_unlock(&curr_rule->mutex);
>>> + ovs_mutex_unlock(&rule->mutex);
>>> +
>>> + return OFPERR_OFPFMFC_UNKNOWN;
>>> + }
>>> + ovs_mutex_unlock(&curr_rule->mutex);
>>> + }
>>> + }
>>> + }
>>> +
>>> + cls_rule_clone(&cr, &rule->cr);
>>> error = ofproto_rule_create(rule->ofproto, &cr, rule->table_id,
>>> rule->flow_cookie,
>>> rule->idle_timeout,
>>> @@ -5565,10 +5595,13 @@ ofproto_flow_mod_learn_finish(struct
>>> ofproto_flow_mod *ofm,
>>>
>>> /* Refresh 'ofm->temp_rule', for which the caller holds a reference, if
>>> already
>>> * in the classifier, insert it otherwise. If the rule has already been
>>> - * removed from the classifier, a new rule is created using
>>> 'ofm->temp_rule' as
>>> - * a template and the reference to the old 'ofm->temp_rule' is freed. If
>>> - * 'keep_ref' is true, then a reference to the current rule is held,
>>> otherwise
>>> - * it is released and 'ofm->temp_rule' is set to NULL.
>>> + * removed from the classifier and replaced by another rule, the stats_used
>>> + * parameter can be used to deptermine whether the newer rule is replaced
>>> or
>>
>> Typo: deptermine
>>
>>> + * kept. If stats_used is set and greater than the current rule's last
>>> modified
>>> + * time then a new rule is created using 'ofm->temp_rule' as a template
>>> and the
>>> + * reference to the old 'ofm->temp_rule' is freed. If 'keep_ref' is true,
>>> + * then a reference to the current rule is held, otherwise it is released
>>> and
>>> + * 'ofm->temp_rule' is set to NULL.
>>> *
>>> * If 'limit' != 0, insertion will fail if there are more than 'limit'
>>> rules
>>> * in the same table with the same cookie. If insertion succeeds,
>>> @@ -5579,10 +5612,11 @@ ofproto_flow_mod_learn_finish(struct
>>> ofproto_flow_mod *ofm,
>>> * during the call. */
>>> enum ofperr
>>> ofproto_flow_mod_learn(struct ofproto_flow_mod *ofm, bool keep_ref,
>>> - unsigned limit, bool *below_limitp)
>>> + unsigned limit, bool *below_limitp,
>>> + long long int stats_used)
>>> OVS_EXCLUDED(ofproto_mutex)
>>> {
>>> - enum ofperr error = ofproto_flow_mod_learn_refresh(ofm);
>>> + enum ofperr error = ofproto_flow_mod_learn_refresh(ofm, stats_used);
>>> struct rule *rule = ofm->temp_rule;
>>> bool below_limit = true;
>>>
>>> @@ -5659,6 +5693,7 @@ replace_rule_start(struct ofproto *ofproto, struct
>>> ofproto_flow_mod *ofm,
>>> *CONST_CAST(uint16_t *, &new_rule->importance) =
>>> old_rule->importance;
>>> new_rule->flags = old_rule->flags;
>>> new_rule->created = old_rule->created;
>>> + new_rule->modified = time_msec();
>>> }
>>> if (!change_cookie) {
>>> *CONST_CAST(ovs_be64 *, &new_rule->flow_cookie)
>>> --
>>> 2.39.3
>>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev