On 6/5/23 23:45, Frode Nordahl wrote:
> 
> 
> man. 5. jun. 2023, 23:02 skrev Ilya Maximets <[email protected] 
> <mailto:[email protected]>>:
> 
>     On 5/31/23 15:49, Frode Nordahl wrote:
>     > The tc module combines the use of the `tc_transact` helper
>     > function for communication with the in-kernel tc infrastructure
>     > with assertions on the reply data by `ofpbuf_at_assert` on the
>     > received data prior to further processing.
>     >
>     > With the presence of bugs on the kernel side, we need to treat
>     > the kernel as an unreliable service provider and replace assertions
>     > on the reply from it with checks to avoid a fatal crash of OVS.
>     >
>     > For the record, the symptom of the crash is this in the log:
>     > EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= 
> b->size failed in ofpbuf_at_assert()
>     >
>     > And an excerpt of the backtrace looks like this:
>     > 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, 
> b=0x7fb650005d20, offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
>     > tc_replace_flower (id=<optimized out>, flower=<optimized out>) at 
> ../lib/tc.c:3223
>     > 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, 
> match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
>     > ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at 
> ../lib/netdev-offload-tc.c:2096
>     > 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, 
> info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, 
> actions=<optimized out>,
>     > match=0x7fb65b7ba980, netdev=0x561dacf91840) at 
> ../lib/netdev-offload.c:257
>     > parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at 
> ../lib/dpif-netlink.c:2297
>     > try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at 
> ../lib/dpif-netlink.c:2384
>     >
>     > Reported-At: https://launchpad.net/bugs/2018500 
> <https://launchpad.net/bugs/2018500>
>     > Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
>     > Signed-off-by: Frode Nordahl <[email protected] 
> <mailto:[email protected]>>
>     > ---
>     >  lib/netdev-linux.c      |  7 ++---
>     >  lib/netdev-offload-tc.c |  8 ++++-
>     >  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
>     >  3 files changed, 60 insertions(+), 20 deletions(-)
> 
>     Thanks for the update!  This version looks mostly fine to me.
>     See some comments inline.
> 
> 
> Thanks for taking the time to review, I have one question below and will 
> provide an update asap.
> 
>     Best regards, Ilya Maximets.
> 
>     >
>     > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
>     > index 36620199e..0a1a08527 100644
>     > --- a/lib/netdev-linux.c
>     > +++ b/lib/netdev-linux.c
>     > @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     >       * On Linux 2.6.35+ we use the straightforward method because it 
> allows us
>     >       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).  
> However,
>     >       * in such a case we get no response at all from the kernel (!) if 
> a
>     > -     * builtin qdisc is in use (which is later caught by "!error &&
>     > -     * !qdisc->size"). */
>     > +     * builtin qdisc is in use (which is later caught by "error == 
> EPROTO" */
>     >      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC, 
> NLM_F_ECHO,
>     >                                           &request);
>     >      if (!tcmsg) {
>     > @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     > 
>     >      /* Figure out what tc class to instantiate. */
>     >      error = tc_transact(&request, &qdisc);
>     > -    if (!error && qdisc->size) {
>     > +    if (!error) {
>     >          const char *kind;
>     > 
>     >          error = tc_parse_qdisc(qdisc, &kind, NULL);
>     > @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>     >                  ops = &tc_ops_other;
>     >              }
>     >          }
>     > -    } else if ((!error && !qdisc->size) || error == ENOENT) {
>     > +    } else if (error == ENOENT || error == EPROTO) {
>     >          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's 
> a qdisc
>     >           * set up by some other entity that doesn't have a handle 1:0. 
>  We will
>     >           * assume that it's the system default qdisc. */
> 
>     There are 3 more cases of ofpbuf_at_assert() in this file:
>       - tc_update_policer_action_stats
>       - tc_parse_class
>       - tc_add_matchall_policer
> 
>     First two already have a length check before asserting, so changes are not
>     strictly necessary.  The last one though asserts right after tc_transact, 
> so
>     needs fixing.  It doesn't seem to do anything useful with the reply, so 
> maybe
>     we need to just delete that access or not provide the 'reply' pointer in 
> the
>     first place.
> 
> 
> I must have missed those as we changed approach from v4 to v5, sorry about 
> that. Will check and update!
> 
>     > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
>     > index 4f26dd8cc..f89aa3270 100644
>     > --- a/lib/netdev-offload-tc.c
>     > +++ b/lib/netdev-offload-tc.c
>     > @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev 
> *netdev,
>     >      }
>     >      nl_msg_end_nested(buf, act_off);
>     > 
>     > -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
>     > +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
>     > +    if (!act) {
>     > +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly 
> found no "
>     > +                    "actions.");
>     > +        return -EPROTO;
>     > +    }
> 
>     We can keep an assertion here.  It's not a buffer received from the 
> kernel,
>     it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous 
> line
>     already called the same ofpbuf_at_assert(), so it should not be possible
>     to reach this check.
> 
> 
> Ack
> 
>     > +    *actions = act;
>     > 
>     >      parse_tc_flower_to_stats(flower, stats);
>     >      parse_tc_flower_to_attrs(flower, attrs);
>     > diff --git a/lib/tc.c b/lib/tc.c
>     > index 5c32c6f97..6a763d171 100644
>     > --- a/lib/tc.c
>     > +++ b/lib/tc.c
>     > @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id 
> *id, uint16_t eth_type,
>     >      }
>     >  }
>     > 
>     > +/* The `tc_transact` function is a wrapper around `nl_transact` with 
> the
>     > + * addition of:
>     > + *
>     > + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
>     > + *    regardless of success or failure.
>     > + *
>     > + * 2. When a 'replyp' pointer is provided; in the event of the kernel
>     > + *    providing an empty response, a positive error return will be 
> provided
>     > + *    with the value of EPROTO.
>     > + *
>     > + * Please acquaint yourself with the documentation of the `nl_transact`
>     > + * function in the netlink-socket module before making use of this 
> function.
>     > + */
>     >  int
>     >  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
>     >  {
>     >      int error = nl_transact(NETLINK_ROUTE, request, replyp);
>     >      ofpbuf_uninit(request);
>     > +
>     > +    if (!error && replyp && *replyp && !(*replyp)->size) {
>     > +        /* We replicate the behavior of `nl_transact` for error 
> conditions and
>     > +         * free any allocations before setting the 'replyp' buffer to 
> NULL. */
>     > +        ofpbuf_delete(*replyp);
>     > +        *replyp = NULL;
>     > +
>     > +        return EPROTO;
> 
>     You didn't add a log message here and its probably fine.  But maybe we can
>     add a coverage counter to at least know that the error condition happened?
>     Somehting like 'tc_malformed_netlink_reply' ?  We could use the same 
> counter
>     also in all the cases below.
> 
> 
> Not adding log in this specific location was deliberate as some consumers 
> (tc_query_qdisc()) squelch errors from here and as such logging something 
> here would cause the ERR/WARN check in tests to trigger.

Hrm.  RTM_GETQDISC indeed doesn't fail and doesn't return anything
if only built-in qdosc is present.  IMHO, that is a horrible design.

I see two ways out of this:

1. Remove error checking from tc_transact() and only check in callers,
   because EPROTO is really not the correct thing to return for
   RTM_GETQDISC, if that is how it "supposed to work".  This way
   tc_query_qdisc() can keep its current checks.

2. Keep the error checking in tc_transact() in this patch and convert
   this oddball tc_query_qdisc() to use nl_transact() directly, since
   it is the only weird interface we have (hopefully).

Both cases are OK from my point of view.

> 
> For the other places I concluded that the error would be logged somewhere 
> else.
> 
>     What do you think?
> 
> 
> Adding a coverage counter is fine and the proposed name is good. I did almost 
> carry that over from the previous approach, but I found that there are lots 
> of places where we return EPROTO, so the patch became "noisy" and hard to 
> backport.
> 
> Asking to avoid potential respin: Would it make sense to only add the 
> coverage counter those places the patch converts to using ofpbuf_try_pull() + 
> return EPROTO, and not everywhere else EPROTO is returned on the back of 
> other checks in TC related modules?

I meant the counter to count only "hard failures", i.e. missing base
netlink header or base tc[a]msg header.  Most of deeper parsing code
already has some form of error messages in place.  So having a counter
only in places you're changing below in lib/tc.c is probably OK for now.

Thinking more about it we may want to have separate counter for the
no-reply case in tc_transact() and another counter for truncated or
broken base headers during the later parsing.  Something like:
 - tc_netlink_malformed_reply
 - tc_netlink_no_reply/missing_reply

Of course, this is only if we go with case #2 above.  Otherwise, the
no-reply counter is not needed as we shouldn't count the RTM_GETQDISC
as error.


> 
> --
> Frode Nordahl 
> 
> 
>     > +    }
>     > +
>     >      return error;
>     >  }
>     > 
>     > @@ -2190,18 +2213,18 @@ int
>     >  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>     >                             struct tc_flower *flower, bool terse)
>     >  {
>     > -    struct tcmsg *tc;
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, 
> reply->size);
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     >      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>     >      const char *kind;
>     > 
>     > -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
>     > +    if (!nlmsg || !tc) {
>     >          return EPROTO;
>     >      }
>     > 
>     >      memset(flower, 0, sizeof *flower);
>     > 
>     > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > -
>     >      flower->key.eth_type = (OVS_FORCE ovs_be16) 
> tc_get_minor(tc->tcm_info);
>     >      flower->mask.eth_type = OVS_BE16_MAX;
>     >      id->prio = tc_get_major(tc->tcm_info);
>     > @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, 
> struct tcf_id *id,
>     >          return EAGAIN;
>     >      }
>     > 
>     > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
>     > -                         tca_policy, ta, ARRAY_SIZE(ta))) {
>     > +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>     >          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>     >          return EPROTO;
>     >      }
>     > @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf 
> *reply, struct tcf_id *id,
>     >  int
>     >  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
>     >  {
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, 
> reply->size);
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     >      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
>     > -    struct tcmsg *tc;
>     > 
>     > -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > +    if (!nlmsg || !tc) {
>     > +        return EPROTO;
>     > +    }
>     > 
>     > -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
>     > -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>     > +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) 
> {
>     >          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
>     >          return EINVAL;
>     >      }
>     > @@ -2307,21 +2332,26 @@ int
>     >  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t 
> police_idx[])
>     >  {
>     >      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = 
> {};
>     > +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, 
> reply->size);
>     >      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
>     > +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     >      const int max_size = ARRAY_SIZE(actions_orders_policy);
>     > +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>     >      const struct nlattr *actions;
>     >      struct tc_flower flower;
>     > -    struct tcamsg *tca;
>     >      int i, cnt = 0;
>     >      int err;
>     > 
>     > +    if (!nlmsg || !tca) {
>     > +        return EPROTO;
>     > +    }
>     > +
>     >      for (i = 0; i < max_size; i++) {
>     >          actions_orders_policy[i].type = NL_A_NESTED;
>     >          actions_orders_policy[i].optional = true;
>     >      }
>     > 
>     > -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
>     > -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, 
> TCA_ACT_TAB);
>     > +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
>     >      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
>     >                                       actions_orders, max_size)) {
>     >          VLOG_ERR_RL(&error_rl,
>     > @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct 
> tc_flower *flower)
>     > 
>     >      error = tc_transact(&request, &reply);
>     >      if (!error) {
>     > -        struct tcmsg *tc =
>     > -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
>     > +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, 
> reply->size);
>     > +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>     > +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>     > +
>     > +        if (!nlmsg || !tc) {
>     > +            return EPROTO;
>     > +        }
>     > 
>     >          id->prio = tc_get_major(tc->tcm_info);
>     >          id->handle = tc->tcm_handle;
> 

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to