On 5/31/23 15:49, Frode Nordahl wrote:
> The tc module combines the use of the `tc_transact` helper
> function for communication with the in-kernel tc infrastructure
> with assertions on the reply data by `ofpbuf_at_assert` on the
> received data prior to further processing.
> 
> With the presence of bugs on the kernel side, we need to treat
> the kernel as an unreliable service provider and replace assertions
> on the reply from it with checks to avoid a fatal crash of OVS.
> 
> For the record, the symptom of the crash is this in the log:
> EMER|../include/openvswitch/ofpbuf.h:194: assertion offset + size <= b->size 
> failed in ofpbuf_at_assert()
> 
> And an excerpt of the backtrace looks like this:
> 0x0000561dac1396d1 in ofpbuf_at_assert (b=0x7fb650005d20, b=0x7fb650005d20, 
> offset=16, size=20) at ../include/openvswitch/ofpbuf.h:194
> tc_replace_flower (id=<optimized out>, flower=<optimized out>) at 
> ../lib/tc.c:3223
> 0x0000561dac128155 in netdev_tc_flow_put (netdev=0x561dacf91840, 
> match=<optimized out>, actions=<optimized out>, actions_len=<optimized out>,
> ufid=<optimized out>, info=<optimized out>, stats=<optimized out>) at 
> ../lib/netdev-offload-tc.c:2096
> 0x0000561dac117541 in netdev_flow_put (stats=<optimized out>, 
> info=0x7fb65b7ba780, ufid=<optimized out>, act_len=<optimized out>, 
> actions=<optimized out>,
> match=0x7fb65b7ba980, netdev=0x561dacf91840) at ../lib/netdev-offload.c:257
> parse_flow_put (put=0x7fb65b7bcc50, dpif=0x561dad0ad550) at 
> ../lib/dpif-netlink.c:2297
> try_send_to_netdev (op=0x7fb65b7bcc48, dpif=0x561dad0ad550) at 
> ../lib/dpif-netlink.c:2384
> 
> Reported-At: https://launchpad.net/bugs/2018500
> Fixes: f98e418fbdb6 ("tc: Add tc flower functions")
> Signed-off-by: Frode Nordahl <[email protected]>
> ---
>  lib/netdev-linux.c      |  7 ++---
>  lib/netdev-offload-tc.c |  8 ++++-
>  lib/tc.c                | 65 +++++++++++++++++++++++++++++++----------
>  3 files changed, 60 insertions(+), 20 deletions(-)

Thanks for the update!  This version looks mostly fine to me.
See some comments inline.

Best regards, Ilya Maximets.

> 
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 36620199e..0a1a08527 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -6235,8 +6235,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>       * On Linux 2.6.35+ we use the straightforward method because it allows 
> us
>       * to handle non-builtin qdiscs without handle 1:0 (e.g. codel).  
> However,
>       * in such a case we get no response at all from the kernel (!) if a
> -     * builtin qdisc is in use (which is later caught by "!error &&
> -     * !qdisc->size"). */
> +     * builtin qdisc is in use (which is later caught by "error == EPROTO" */
>      tcmsg = netdev_linux_tc_make_request(netdev_, RTM_GETQDISC, NLM_F_ECHO,
>                                           &request);
>      if (!tcmsg) {
> @@ -6247,7 +6246,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>  
>      /* Figure out what tc class to instantiate. */
>      error = tc_transact(&request, &qdisc);
> -    if (!error && qdisc->size) {
> +    if (!error) {
>          const char *kind;
>  
>          error = tc_parse_qdisc(qdisc, &kind, NULL);
> @@ -6262,7 +6261,7 @@ tc_query_qdisc(const struct netdev *netdev_)
>                  ops = &tc_ops_other;
>              }
>          }
> -    } else if ((!error && !qdisc->size) || error == ENOENT) {
> +    } else if (error == ENOENT || error == EPROTO) {
>          /* Either it's a built-in qdisc, or (on Linux pre-2.6.35) it's a 
> qdisc
>           * set up by some other entity that doesn't have a handle 1:0.  We 
> will
>           * assume that it's the system default qdisc. */

There are 3 more cases of ofpbuf_at_assert() in this file:
  - tc_update_policer_action_stats
  - tc_parse_class
  - tc_add_matchall_policer

First two already have a length check before asserting, so changes are not
strictly necessary.  The last one though asserts right after tc_transact, so
needs fixing.  It doesn't seem to do anything useful with the reply, so maybe
we need to just delete that access or not provide the 'reply' pointer in the
first place.

> diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> index 4f26dd8cc..f89aa3270 100644
> --- a/lib/netdev-offload-tc.c
> +++ b/lib/netdev-offload-tc.c
> @@ -1247,7 +1247,13 @@ parse_tc_flower_to_match(const struct netdev *netdev,
>      }
>      nl_msg_end_nested(buf, act_off);
>  
> -    *actions = ofpbuf_at_assert(buf, act_off, sizeof(struct nlattr));
> +    struct nlattr *act = ofpbuf_at(buf, act_off, sizeof *act);
> +    if (!act) {
> +        VLOG_ERR_RL(&error_rl, "failed to parse flower, unexpectedly found 
> no "
> +                    "actions.");
> +        return -EPROTO;
> +    }

We can keep an assertion here.  It's not a buffer received from the kernel,
it's a buffer we're creating.  Also, nl_msg_end_nested() on a previous line
already called the same ofpbuf_at_assert(), so it should not be possible
to reach this check.

> +    *actions = act;
>  
>      parse_tc_flower_to_stats(flower, stats);
>      parse_tc_flower_to_attrs(flower, attrs);
> diff --git a/lib/tc.c b/lib/tc.c
> index 5c32c6f97..6a763d171 100644
> --- a/lib/tc.c
> +++ b/lib/tc.c
> @@ -237,11 +237,34 @@ static void request_from_tcf_id(struct tcf_id *id, 
> uint16_t eth_type,
>      }
>  }
>  
> +/* The `tc_transact` function is a wrapper around `nl_transact` with the
> + * addition of:
> + *
> + * 1. the 'request' ofpbuf buffer is freed after `nl_transact` returns,
> + *    regardless of success or failure.
> + *
> + * 2. When a 'replyp' pointer is provided; in the event of the kernel
> + *    providing an empty response, a positive error return will be provided
> + *    with the value of EPROTO.
> + *
> + * Please acquaint yourself with the documentation of the `nl_transact`
> + * function in the netlink-socket module before making use of this function.
> + */
>  int
>  tc_transact(struct ofpbuf *request, struct ofpbuf **replyp)
>  {
>      int error = nl_transact(NETLINK_ROUTE, request, replyp);
>      ofpbuf_uninit(request);
> +
> +    if (!error && replyp && *replyp && !(*replyp)->size) {
> +        /* We replicate the behavior of `nl_transact` for error conditions 
> and
> +         * free any allocations before setting the 'replyp' buffer to NULL. 
> */
> +        ofpbuf_delete(*replyp);
> +        *replyp = NULL;
> +
> +        return EPROTO;

You didn't add a log message here and its probably fine.  But maybe we can
add a coverage counter to at least know that the error condition happened?
Somehting like 'tc_malformed_netlink_reply' ?  We could use the same counter
also in all the cases below.

What do you think?


> +    }
> +
>      return error;
>  }
>  
> @@ -2190,18 +2213,18 @@ int
>  parse_netlink_to_tc_flower(struct ofpbuf *reply, struct tcf_id *id,
>                             struct tc_flower *flower, bool terse)
>  {
> -    struct tcmsg *tc;
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_policy)];
>      const char *kind;
>  
> -    if (NLMSG_HDRLEN + sizeof *tc > reply->size) {
> +    if (!nlmsg || !tc) {
>          return EPROTO;
>      }
>  
>      memset(flower, 0, sizeof *flower);
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> -
>      flower->key.eth_type = (OVS_FORCE ovs_be16) tc_get_minor(tc->tcm_info);
>      flower->mask.eth_type = OVS_BE16_MAX;
>      id->prio = tc_get_major(tc->tcm_info);
> @@ -2215,8 +2238,7 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, struct 
> tcf_id *id,
>          return EAGAIN;
>      }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca policy");
>          return EPROTO;
>      }
> @@ -2237,13 +2259,16 @@ parse_netlink_to_tc_flower(struct ofpbuf *reply, 
> struct tcf_id *id,
>  int
>  parse_netlink_to_tc_chain(struct ofpbuf *reply, uint32_t *chain)
>  {
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +    struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
>      struct nlattr *ta[ARRAY_SIZE(tca_chain_policy)];
> -    struct tcmsg *tc;
>  
> -    tc = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +    if (!nlmsg || !tc) {
> +        return EPROTO;
> +    }
>  
> -    if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *tc,
> -                         tca_chain_policy, ta, ARRAY_SIZE(ta))) {
> +    if (!nl_policy_parse(&b, 0, tca_chain_policy, ta, ARRAY_SIZE(ta))) {
>          VLOG_ERR_RL(&error_rl, "failed to parse tca chain policy");
>          return EINVAL;
>      }
> @@ -2307,21 +2332,26 @@ int
>  parse_netlink_to_tc_policer(struct ofpbuf *reply, uint32_t police_idx[])
>  {
>      static struct nl_policy actions_orders_policy[TCA_ACT_MAX_PRIO] = {};
> +    struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
>      struct nlattr *actions_orders[ARRAY_SIZE(actions_orders_policy)];
> +    struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
>      const int max_size = ARRAY_SIZE(actions_orders_policy);
> +    struct tcamsg *tca = ofpbuf_try_pull(&b, sizeof *tca);
>      const struct nlattr *actions;
>      struct tc_flower flower;
> -    struct tcamsg *tca;
>      int i, cnt = 0;
>      int err;
>  
> +    if (!nlmsg || !tca) {
> +        return EPROTO;
> +    }
> +
>      for (i = 0; i < max_size; i++) {
>          actions_orders_policy[i].type = NL_A_NESTED;
>          actions_orders_policy[i].optional = true;
>      }
>  
> -    tca = ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tca);
> -    actions = nl_attr_find(reply, NLMSG_HDRLEN + sizeof *tca, TCA_ACT_TAB);
> +    actions = nl_attr_find(&b, 0, TCA_ACT_TAB);
>      if (!actions || !nl_parse_nested(actions, actions_orders_policy,
>                                       actions_orders, max_size)) {
>          VLOG_ERR_RL(&error_rl,
> @@ -3823,8 +3853,13 @@ tc_replace_flower(struct tcf_id *id, struct tc_flower 
> *flower)
>  
>      error = tc_transact(&request, &reply);
>      if (!error) {
> -        struct tcmsg *tc =
> -            ofpbuf_at_assert(reply, NLMSG_HDRLEN, sizeof *tc);
> +        struct ofpbuf b = ofpbuf_const_initializer(reply->data, reply->size);
> +        struct nlmsghdr *nlmsg = ofpbuf_try_pull(&b, sizeof *nlmsg);
> +        struct tcmsg *tc = ofpbuf_try_pull(&b, sizeof *tc);
> +
> +        if (!nlmsg || !tc) {
> +            return EPROTO;
> +        }
>  
>          id->prio = tc_get_major(tc->tcm_info);
>          id->handle = tc->tcm_handle;

_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to