On Mon, Mar 28, 2022 at 3:15 PM Eelco Chaudron <[email protected]> wrote:

>
>
> On 25 Mar 2022, at 18:05, Tao Liu wrote:
>
> > On Fri, Mar 25, 2022 at 12:08:20PM +0100, Eelco Chaudron wrote:
> >>
> >>
> >> On 22 Mar 2022, at 13:04, Tao Liu wrote:
> >>
> >>> If netdev goes away(i.e. qemu with a vnet shutdown), kernel delete all
> tc
> >>> filters, those tcf_id related to the netdev will be left in ufid_to_tc
> >>> hashmap.
> >>> When qemu restart with a same vnet but different ifindex assigned,
> >>> a dup ufid may add. Especially after hashmap_expand, the old entry will
> >>> insert before the new one, delete or modify tc flow will always fail.
> >>>
> >>> So delete the stale entry to avoid a duplicated ufid in hashmap.
> >>
> >> There are some comments below, as I still do not fully understand the
> fix.
> >>
> > Please let me explain more.
> > As the scenery of qemu restart described above, filters are flushed by
> > kernel, but tcf_ids remain in ufid_to_tc hashmap.
> >
> > Considering a ping running continuously from other host to VM, the first
> > reply from VM triggers upcall after qemu restart. A same ufid is
> generated
> > if the request processed on same handler. At this time, an old ufid
> exists
> > in hashmap, and flow del returns ENODEV(ifindex is wrong).
>
> Can this not be solved by modifying the previous fix to this?
>
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -198,7 +198,9 @@ del_filter_and_ufid_mapping(struct tcf_id *id, const
> ovs_u128 *ufid)
>      int err;
>
>      err = tc_del_filter(id);
> -    if (!err) {
> +    if (!err || err = ENODEV) {
>         del_ufid_tc_mapping(ufid);
>      }
>      return err;
>  }
>
>
> > However flow put
> > still succeeds because there is no filter in kernel. And a dup ufid adds
> in
> > hashmap.
> >
> > After hashmap_expand, the old entry inserts before the new one.
> >
> > If flow expires, revalidator fails to del flow bacause we always get the
> > old entry. There are constant warnings in ovs-vswitch.log:
> >
> >     dpif(revalidator49)|WARN|system@ovs-system: failed to flow_del (No
> such
> >     file or directory) ufid:d81939b3-8c8d-4b35-8d58-88c098709b91 ...
> >
> > In case of flow mod, flow del inside flow put still fails with ENODEV,
> and
> > tc_replace_flower also fails because old filter exists in kernel.
> >
> > The idea of this patch is that, if a filter can't get from kernel, we can
> > just delete the mapping, and no need to call del filter as it does not
> > exist. In this way, we can keep consistent between kernel and userspace.
>
> Thanks for the details, and I think the problem is a bit more complex.
>
> As in theory duplicate ufid's could exist (guess even on the same port ;).
> We could also fix this by including the interface in the lookup, in
> get_ufid_tc_mapping()?
>

Not sure how OVS deals with the uniqueness of the ufid, if they assume its
unique within the system, the fix can be simplified to something like this:

--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c

@@ -218,8 +222,21 @@ add_ufid_tc_mapping(struct netdev *netdev, const
ovs_u128 *ufid,
 {
     struct ufid_tc_data *new_data = xzalloc(sizeof *new_data);
     size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
+    struct tcf_id existing_id;
     size_t tc_hash;

+    /* ADD COMMENT WHY ITS SAFE TO DELETE THIS. */
+    if (!get_ufid_tc_mapping(ufid, &existing_id)) {
+        del_ufid_tc_mapping(ufid);
+    }
+
     tc_hash = hash_int(hash_int(id->prio, id->handle), id->ifindex);
     tc_hash = hash_int(id->chain, tc_hash);



> For example something like this:
>
> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 12d0a9af3..8be3c5dc3 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -239,14 +239,15 @@ add_ufid_tc_mapping(struct netdev *netdev, const
> ovs_u128 *ufid,
>   * Otherwise returns the error.
>   */
>  static int
> -get_ufid_tc_mapping(const ovs_u128 *ufid, struct tcf_id *id)
> +get_ufid_tc_mapping(struct netdev *netdev, const ovs_u128 *ufid,
> +                    struct tcf_id *id)
>  {
>      size_t ufid_hash = hash_bytes(ufid, sizeof *ufid, 0);
>      struct ufid_tc_data *data;
>
>      ovs_mutex_lock(&ufid_lock);
>      HMAP_FOR_EACH_WITH_HASH (data, ufid_to_tc_node, ufid_hash,
> &ufid_to_tc) {
> -        if (ovs_u128_equals(*ufid, data->ufid)) {
> +        if (netdev == data->netdev && ovs_u128_equals(*ufid, data->ufid)
> ) {
>              *id = data->id;
>              ovs_mutex_unlock(&ufid_lock);
>              return 0;
> @@ -1943,7 +1944,7 @@ netdev_tc_flow_put(struct netdev *netdev, struct
> match *match,
>          return EOPNOTSUPP;
>      }
>
> -    if (get_ufid_tc_mapping(ufid, &id) == 0) {
> +    if (get_ufid_tc_mapping(netdev, ufid, &id) == 0) {
>          VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
>                      id.handle, id.prio);
>          info->tc_modify_flow_deleted = !del_filter_and_ufid_mapping(&id,
> ufid);
> @@ -1986,7 +1987,7 @@ netdev_tc_flow_get(struct netdev *netdev,
>      struct tcf_id id;
>      int err;
>
> -    err = get_ufid_tc_mapping(ufid, &id);
> +    err = get_ufid_tc_mapping(netdev, ufid, &id);
>      if (err) {
>          return err;
>      }
> @@ -2013,7 +2014,7 @@ netdev_tc_flow_get(struct netdev *netdev,
>  }
>
>  static int
> -netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
> +netdev_tc_flow_del(struct netdev *netdev,
>                     const ovs_u128 *ufid,
>                     struct dpif_flow_stats *stats)
>  {
> @@ -2021,7 +2022,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
>      struct tcf_id id;
>      int error;
>
> -    error = get_ufid_tc_mapping(ufid, &id);
> +    error = get_ufid_tc_mapping(netdev, ufid, &id);
>      if (error) {
>          return error;
>      }
>
> Hover this does not fix the root cause (same with your fix below). We need
> a way to delete these stale mappings. Maybe we can watch some netlink
> messages, or re-use the netdev layer, but I think we hold a reference in
> the mapping so we won't delete either.
>
> I think this is what we should try to fix, not working around the leftover
> entries.
>
> >>> Fixes: dd8ca104acd7 ("netdev-tc-offloads: Don't delete ufid mapping if
> fail to delete filter")))
> >>> Signed-off-by: Tao Liu <[email protected]>
> >>> ---
> >>>  lib/netdev-offload-tc.c | 37 ++++++++++++++++++++++---------------
> >>>  1 file changed, 22 insertions(+), 15 deletions(-)
> >>>
> >>> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> >>> index 12d0a9af3..e9dd66790 100644
> >>> --- a/lib/netdev-offload-tc.c
> >>> +++ b/lib/netdev-offload-tc.c
> >>> @@ -167,6 +167,9 @@ struct ufid_tc_data {
> >>>      struct netdev *netdev;
> >>>  };
> >>>
> >>> +static void parse_tc_flower_to_stats(struct tc_flower *flower,
> >>> +                                     struct dpif_flow_stats *stats);
> >>> +
> >>>  static void
> >>>  del_ufid_tc_mapping_unlocked(const ovs_u128 *ufid)
> >>>  {
> >>> @@ -200,11 +203,24 @@ del_ufid_tc_mapping(const ovs_u128 *ufid)
> >>>
> >>>  /* Wrapper function to delete filter and ufid tc mapping */
> >>>  static int
> >>> -del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid)
> >>> +del_filter_and_ufid_mapping(struct tcf_id *id, const ovs_u128 *ufid,
> >>> +                            struct dpif_flow_stats *stats)
> >>>  {
> >>> -    int err;
> >>> +    struct tc_flower flower;
> >>> +    int err = 0, get_err;
> >>> +
> >>> +    get_err = tc_get_flower(id, &flower);
> >>> +    if (!get_err) {
> >>
> >> We should definitely add a comment here explaining the reasoning behind
> doing the get first and then the delete!
> >>
> > Will add it.
> >
> >> However, it’s still not clear to me why this change does not undo,
> dd8ca104acd7?
> >>
> >> Because tc_get_flower() just finds out if the entry with the ID exists
> in the kernel, and than only try to delete it.
> >>
> >> For this can you just not check a specific return value in the
> tc_del_filter() call?
> >>
> >> I assume dd8ca104acd7 just want to skip removing the
> del_ufid_tc_mapping() if the entry does NOT exist in the kernel?
> >> If not maybe Jianbo can explain a bit more the corner case, or maybe
> has a test case for this?
> >>
> > commit dd8ca104acd7 prevents the case of mapping deleted but filter
> exists
> > in kernel(del fails). This patch includes both caeses of the
> inconsistency.
> >
> >>> +        err = tc_del_filter(id);
> >>> +    }
> >>> +
> >>> +    if (stats) {
> >>> +        memset(stats, 0, sizeof *stats);
> >>> +        if (!get_err) {
> >>> +            parse_tc_flower_to_stats(&flower, stats);
> >>> +        }
> >>> +    }
> >>
> >> I guess this was moved here to avoid the additional tc_get_flower()
> call below.
> >>
> > Yes, it is.
> >
> >> I would fold the stats part in the above if() case to avoid the need
> for the get_err variable, i.e.
> >>
> >>     if (!tc_get_flower(id, &flower)) {
> >>       err = tc_del_filter(id);
> >>       if (stats) {
> >>         memset(stats, 0, sizeof *stats);
> >>         if (!get_err) {
> >>             parse_tc_flower_to_stats(&flower, stats);
> >>         }
> >>       }
> >>
> > We also need to zero stats if tc_get_flower fails.
> >
> >>
> >>>
> >>> -    err = tc_del_filter(id);
> >>>      if (!err) {
> >>>          del_ufid_tc_mapping(ufid);
> >>>      }
> >>> @@ -1946,7 +1962,8 @@ netdev_tc_flow_put(struct netdev *netdev, struct
> match *match,
> >>>      if (get_ufid_tc_mapping(ufid, &id) == 0) {
> >>>          VLOG_DBG_RL(&rl, "updating old handle: %d prio: %d",
> >>>                      id.handle, id.prio);
> >>> -        info->tc_modify_flow_deleted =
> !del_filter_and_ufid_mapping(&id, ufid);
> >>> +        info->tc_modify_flow_deleted =
> >>> +                    !del_filter_and_ufid_mapping(&id, ufid, NULL);
> >>>      }
> >>>
> >>>      prio = get_prio_for_tc_flower(&flower);
> >>> @@ -2017,7 +2034,6 @@ netdev_tc_flow_del(struct netdev *netdev
> OVS_UNUSED,
> >>>                     const ovs_u128 *ufid,
> >>>                     struct dpif_flow_stats *stats)
> >>>  {
> >>> -    struct tc_flower flower;
> >>>      struct tcf_id id;
> >>>      int error;
> >>>
> >>> @@ -2026,16 +2042,7 @@ netdev_tc_flow_del(struct netdev *netdev
> OVS_UNUSED,
> >>>          return error;
> >>>      }
> >>>
> >>> -    if (stats) {
> >>> -        memset(stats, 0, sizeof *stats);
> >>> -        if (!tc_get_flower(&id, &flower)) {
> >>> -            parse_tc_flower_to_stats(&flower, stats);
> >>> -        }
> >>> -    }
> >>> -
> >>> -    error = del_filter_and_ufid_mapping(&id, ufid);
> >>> -
> >>> -    return error;
> >>> +    return del_filter_and_ufid_mapping(&id, ufid, stats);
> >>>  }
> >>>
> >>>  static int
> >>> --
> >>> 2.23.0
> >>>
> >>> _______________________________________________
> >>> dev mailing list
> >>> [email protected]
> >>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >>
> >>
> >
> > --
> > Best regards,
> > Tao Liu
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to