man. 5. jun. 2023, 23:02 skrev Ilya Maximets <[email protected]>:

> On 5/31/23 15:49, Frode Nordahl wrote:
> > The tc module combines the use of the `tc_transact` helper
> > function for communication with the in-kernel tc infrastructure
> > with assertions on the reply data by `ofpbuf_at_assert` on the
> > received data prior to further processing.
> >
> > With the presence of bugs on the kernel side, we need to treat
> > the kernel as an unreliable service provider and replace assertions
> > on the reply from it with checks to avoid a fatal crash of OVS.
> >
> > For the record, the symptom of the crash is this in the log:
> > EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <=
> b->size failed in ofpbuf_at_assert()
> >
> > And an excerpt of the backtrace looks like this:
> > 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20,
> b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> > tc_replace_flower (id=<optimized out>, flower=<optimized out>) at
> ../lib/tc.c:3223
> > 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840,
> match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> > ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at
> ../lib/netdev-offload-tc.c:2096
> > 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>,
> info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>,
> actions=<optimized out>,
> > match=0x7fb65b7ba980, netdev=0x561dacf91840) at
> ../lib/netdev-offload.c:257
> > parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at
> ../lib/dpif-netlink.c:2297
> > try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at
> ../lib/dpif-netlink.c:2384
> >
> > Reported-At: https://launchpad.net/bugs/2018500
> > Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> > Signed-off-by: Frode Nordahl <[email protected]>
> > ---
> >  lib/netdev-linux.c      |  7 ++---
> >  lib/netdev-offload-tc.c |  8 ++++-
> >  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
> >  3 files changed, 60 insertions(+), 20 deletions(-)
>
> Thanks for the update!  This version looks mostly fine to me.
> See some comments inline.
>

Thanks for taking the time to review, I have one question below and will
provide an update asap.

Best regards, Ilya Maximets.
>
> >
> > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> > index 36620199e..0a1a08527 100644
> > --- a/lib/netdev-linux.c
> > +++ b/lib/netdev-linux.c
> > @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >       * On Linux 2.6.35+ we use the straightforward method because it
> allows us
> >       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).
> However,
> >       * in such a case we get no response at all from the kernel (!) if a
> > -     * builtin qdisc is in use (which is later caught by "!error &&
> > -     * !qdisc->size"). */
> > +     * builtin qdisc is in use (which is later caught by "error ==
> EPROTO" */
> >      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC,
> NLM_F_ECHO,
> >                                           &request);
> >      if (!tcmsg) {
> > @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >
> >      /* Figure out what tc class to instantiate. */
> >      error = tc_transact(&request, &qdisc);
> > -    if (!error && qdisc->size) {
> > +    if (!error) {
> >          const char *kind;
> >
> >          error = tc_parse_qdisc(qdisc, &kind, NULL);
> > @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
> >                  ops = &tc_ops_other;
> >              }
> >          }
> > -    } else if ((!error && !qdisc->size) || error == ENOENT) {
> > +    } else if (error == ENOENT || error == EPROTO) {
> >          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's
> a qdisc
> >           * set up by some other entity that doesn't have a handle 1:0.
> We will
> >           * assume that it's the system default qdisc. */
>
> There are 3 more cases of ofpbuf_at_assert() in this file:
>   - tc_update_policer_action_stats
>   - tc_parse_class
>   - tc_add_matchall_policer
>
> First two already have a length check before asserting, so changes are not
> strictly necessary.  The last one though asserts right after tc_transact,
> so
> needs fixing.  It doesn't seem to do anything useful with the reply, so
> maybe
> we need to just delete that access or not provide the 'reply' pointer in
> the
> first place.
>

I must have missed those as we changed approach from v4 to v5, sorry about
that. Will check and update!

> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 4f26dd8cc..f89aa3270 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev
> *netdev,
> >      }
> >      nl_msg_end_nested(buf, act_off);
> >
> > -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
> > +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
> > +    if (!act) {
> > +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly
> found no "
> > +                    "actions.");
> > +        return -EPROTO;
> > +    }
>
> We can keep an assertion here.  It's not a buffer received from the kernel,
> it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous line
> already called the same ofpbuf_at_assert(), so it should not be possible
> to reach this check.
>

Ack

> +    *actions = act;
> >
> >      parse_tc_flower_to_stats(flower, stats);
> >      parse_tc_flower_to_attrs(flower, attrs);
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 5c32c6f97..6a763d171 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id *id,
> uint16_t eth_type,
> >      }
> >  }
> >
> > +/* The `tc_transact` function is a wrapper around `nl_transact` with the
> > + * addition of:
> > + *
> > + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
> > + *    regardless of success or failure.
> > + *
> > + * 2. When a 'replyp' pointer is provided; in the event of the kernel
> > + *    providing an empty response, a positive error return will be
> provided
> > + *    with the value of EPROTO.
> > + *
> > + * Please acquaint yourself with the documentation of the `nl_transact`
> > + * function in the netlink-socket module before making use of this
> function.
> > + */
> >  int
> >  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
> >  {
> >      int error = nl_transact(NETLINK_ROUTE, request, replyp);
> >      ofpbuf_uninit(request);
> > +
> > +    if (!error && replyp && *replyp && !(*replyp)->size) {
> > +        /* We replicate the behavior of `nl_transact` for error
> conditions and
> > +         * free any allocations before setting the 'replyp' buffer to
> NULL. */
> > +        ofpbuf_delete(*replyp);
> > +        *replyp = NULL;
> > +
> > +        return EPROTO;
>
> You didn't add a log message here and its probably fine.  But maybe we can
> add a coverage counter to at least know that the error condition happened?
> Somehting like 'tc_malformed_netlink_reply' ?  We could use the same
> counter
> also in all the cases below.
>

Not adding log in this specific location was deliberate as some consumers
(tc_query_qdisc()) squelch errors from here and as such logging something
here would cause the ERR/WARN check in tests to trigger.

For the other places I concluded that the error would be logged somewhere
else.

What do you think?
>

Adding a coverage counter is fine and the proposed name is good. I did
almost carry that over from the previous approach, but I found that there
are lots of places where we return EPROTO, so the patch became "noisy" and
hard to backport.

Asking to avoid potential respin: Would it make sense to only add the
coverage counter those places the patch converts to using ofpbuf_try_pull()
+ return EPROTO, and not everywhere else EPROTO is returned on the back of
other checks in TC related modules?

--
Frode Nordahl


> > +    }
> > +
> >      return error;
> >  }
> >
> > @@ -2190,18 +2213,18 @@ int
> >  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
> >                             struct tc_flower *flower, bool terse)
> >  {
> > -    struct tcmsg *tc;
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
> >      const char *kind;
> >
> > -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> > +    if (!nlmsg || !tc) {
> >          return EPROTO;
> >      }
> >
> >      memset(flower, 0, sizeof *flower);
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > -
> >      flower->key.eth_type = (OVS_FORCE ovs_be16)
> tc_get_minor(tc->tcm_info);
> >      flower->mask.eth_type = OVS_BE16_MAX;
> >      id->prio = tc_get_major(tc->tcm_info);
> > @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply,
> struct tcf_id *id,
> >          return EAGAIN;
> >      }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
> >          return EPROTO;
> >      }
> > @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply,
> struct tcf_id *id,
> >  int
> >  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
> >  {
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> >      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> > -    struct tcmsg *tc;
> >
> > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +    if (!nlmsg || !tc) {
> > +        return EPROTO;
> > +    }
> >
> > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> > -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> > +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> >          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
> >          return EINVAL;
> >      }
> > @@ -2307,21 +2332,26 @@ int
> >  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
> >  {
> >      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] =
> {};
> > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> >      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> >      const int max_size = ARRAY_SIZE(actions_orders_policy);
> > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
> >      const struct nlattr *actions;
> >      struct tc_flower flower;
> > -    struct tcamsg *tca;
> >      int i, cnt = 0;
> >      int err;
> >
> > +    if (!nlmsg || !tca) {
> > +        return EPROTO;
> > +    }
> > +
> >      for (i = 0; i < max_size; i++) {
> >          actions_orders_policy[i].type = NL_A_NESTED;
> >          actions_orders_policy[i].optional = true;
> >      }
> >
> > -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> > -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca,
> TCA_ACT_TAB);
> > +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
> >      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
> >                                       actions_orders, max_size)) {
> >          VLOG_ERR_RL(&error_rl,
> > @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct
> tc_flower *flower)
> >
> >      error = tc_transact(&request, &reply);
> >      if (!error) {
> > -        struct tcmsg *tc =
> > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data,
> reply->size);
> > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> > +
> > +        if (!nlmsg || !tc) {
> > +            return EPROTO;
> > +        }
> >
> >          id->prio = tc_get_major(tc->tcm_info);
> >          id->handle = tc->tcm_handle;
>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to