Re: [ovs-dev] [PATCH] v2 datapath: More flexible kernel/userspace tunneling attribute.

Jesse Gross Wed, 16 Jan 2013 18:41:59 -0800

On Wed, Jan 16, 2013 at 1:54 PM, Pravin B Shelar <pshe...@nicira.com> wrote:
> diff --git a/datapath/datapath.c b/datapath/datapath.c
> index ed69af8..4ed40e2 100644
> --- a/datapath/datapath.c
> +++ b/datapath/datapath.c
> +static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, int 
> attr_len)
> +{
> +
> +       struct sw_flow_actions *new;
> +       struct nlattr *a;
> +
> +       if (NLA_ALIGN(attr_len) <= (ksize(*sfa) - (*sfa)->actions_len))
> +               goto out;
> +
> +       if (ksize(*sfa) * 2 > MAX_ACTIONS_BUFSIZE)
> +               return ERR_PTR(-EMSGSIZE);


It's possible that the current size is smaller than
MAX_ACTIONS_BUFSIZE but 2 * size is larger.  This probably is not
likely because kmalloc will round up to a power of two and
MAX_ACTIONS_BUFSIZE is a power of two but I'm not sure that we want to
implicitly assume that.

> @@ -716,16 +850,15 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
> struct genl_info *info)
>         err = PTR_ERR(acts);
>         if (IS_ERR(acts))
>                 goto err_flow_free;
> +
> +       err = validate_and_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], 
> &flow->key, 0, &acts);
>         rcu_assign_pointer(flow->sf_acts, acts);
> +       if (err)
> +               goto err_flow_free;

I would probably put the error handler before continuing on with the
rcu_assign_pointer call.

> +static int actions_to_attr(const struct nlattr *attr, int len, struct 
> sk_buff *skb)
> +{
> +       const struct nlattr *a;
> +       int rem, err;
> +
> +       nla_for_each_attr(a, attr, len, rem) {
> +               bool skip_copy;
> +               int type = nla_type(a);
> +
> +               skip_copy = false;
> +               switch (type) {
> +               case OVS_ACTION_ATTR_SET:
> +                       err = set_tun_action_to_attr(a, skb, &skip_copy);

The name is a little strange given that we call it unconditionally for
all set actions.

> @@ -951,28 +1179,32 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff 
> *skb, struct genl_info *info)
>
>         /* Validate actions. */
>         if (a[OVS_FLOW_ATTR_ACTIONS]) {
> -               error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
> -               if (error)
> +               acts = 
> ovs_flow_actions_alloc(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
> +               error = PTR_ERR(acts);
> +               if (IS_ERR(acts))
>                         goto error;
> +
> +               error = validate_and_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], 
> &key,  0, &acts);
> +               if (error) {
> +                       goto err_kfree;
> +               }

We don't need the braces around this error condition.

>         } else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
>                 error = -EINVAL;
> -               goto error;
> +               goto err_kfree;

I don't we need to call err_kfree in this case because we didn't
actually allocate anything.

> diff --git a/datapath/flow.c b/datapath/flow.c
> index 63eef77..49982f0 100644
> --- a/datapath/flow.c
> +++ b/datapath/flow.c
> +int ipv4_tun_from_nlattr(const struct nlattr *attr,
> +                        struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +       struct nlattr *a;
> +       int rem;
> +
> +       memset(tun_key, 0, sizeof(*tun_key));
> +
> +       nla_for_each_nested(a, attr, rem) {
> +               int type = nla_type(a);
> +               static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_MAX + 1] = {
> +                       [OVS_TUNNEL_ID] = sizeof(u64),
> +                       [OVS_TUNNEL_IPV4_SRC] = sizeof(u32),
> +                       [OVS_TUNNEL_IPV4_DST] = sizeof(u32),
> +                       [OVS_TUNNEL_TOS] = 1,
> +                       [OVS_TUNNEL_TTL] = 1,
> +                       [OVS_TUNNEL_FLAGS_DONT_FRAGMENT] = 0,
> +                       [OVS_TUNNEL_FLAGS_CSUM] = 0,
> +               };
> +
> +               if (type > OVS_TUNNEL_MAX ||
> +                       ovs_tunnel_key_lens[type] != nla_len(a))
> +                       return -EINVAL;
> +
> +               switch (type) {
> +               case OVS_TUNNEL_ID:
> +                       memcpy(&tun_key->tun_id, nla_data(a), sizeof(__be64));
> +                       tun_key->tun_flags |= OVS_TNL_F_KEY;
> +               break;
> +               case OVS_TUNNEL_IPV4_SRC:
> +                       memcpy(&tun_key->ipv4_src, nla_data(a), 
> sizeof(__be32));
> +               break;
> +               case OVS_TUNNEL_IPV4_DST:
> +                       memcpy(&tun_key->ipv4_dst, nla_data(a), 
> sizeof(__be32));

Can't we use nla_get_X for these types?

> +       if (rem > 0)
> +               return -EINVAL;
> +
> +       if (!tun_key->ipv4_dst)
> +               return -EINVAL;
> +
> +       if (!tun_key->ipv4_ttl)
> +               return -EINVAL;

I would distinguish between TTL of zero and not set.  If TTL is zero
is explicitly asked for then I think it's fine to allow but we might
want to create a different default later.

> +int ipv4_tun_to_nlattr(struct sk_buff *skb,
> +                       const struct ovs_key_ipv4_tunnel *tun_key)
> +{
> +       struct nlattr *nla;
> +
> +       nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
> +       if (!nla)
> +               return -EMSGSIZE;
> +
> +       if (tun_key->tun_flags & OVS_TNL_F_KEY &&
> +           nla_put_be64(skb, OVS_TUNNEL_ID, tun_key->tun_id))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_src &&
> +           nla_put_be32(skb, OVS_TUNNEL_IPV4_SRC, tun_key->ipv4_src))
> +               return -EMSGSIZE;
> +       if (nla_put_be32(skb, OVS_TUNNEL_IPV4_DST, tun_key->ipv4_dst))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_tos &&
> +           nla_put_u8(skb, OVS_TUNNEL_TOS, tun_key->ipv4_tos))
> +               return -EMSGSIZE;
> +       if (tun_key->ipv4_ttl &&
> +           nla_put_u8(skb, OVS_TUNNEL_TTL, tun_key->ipv4_ttl))
> +               return -EMSGSIZE;

I think we should always include TTL in our messages since we are
saying that it is required now.

> diff --git a/datapath/flow.h b/datapath/flow.h
> index 3f3624f..4b43336 100644
> --- a/datapath/flow.h
> +++ b/datapath/flow.h
> +struct ovs_key_ipv4_tunnel {
> +       __be64 tun_id;
> +       __u32  tun_flags;
> +       __be32 ipv4_src;
> +       __be32 ipv4_dst;
> +       __u8   ipv4_tos;
> +       __u8   ipv4_ttl;
> +       __u8   pad[2];
> +};

Is there a need to still keep the pad around?  We could also reduce
tun_flags to a u16 (or even a u8 really).  On 32-bit machines these
two things would reduce the size of the struct.

Also, you could use the non __ type definitions since this is internal
to the kernel.

> diff --git a/datapath/tunnel.h b/datapath/tunnel.h
> index 7705475..809fefd 100644
> --- a/datapath/tunnel.h
> +++ b/datapath/tunnel.h
> @@ -59,6 +59,11 @@
>                       TNL_F_DF_INHERIT | TNL_F_DF_DEFAULT | TNL_F_PMTUD | \
>                       TNL_F_IPSEC)
>
> +/* Tunnel flow flags. */
> +#define OVS_TNL_F_DONT_FRAGMENT                (1 << 0)
> +#define OVS_TNL_F_CSUM                 (1 << 1)
> +#define OVS_TNL_F_KEY                  (1 << 2)

I would probably define these in flow.h with the struct
ovs_key_ipv4_tunnel definition since them seem closely related.

> diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
> index 5e32965..9b4e257 100644
> --- a/include/linux/openvswitch.h
> +++ b/include/linux/openvswitch.h
> +enum ovs_tunnel_attr {
> +       OVS_TUNNEL_ID,          /* be64 Tunnel ID */
> +       OVS_TUNNEL_IPV4_SRC,    /* be32 Tunnel src IP address. */
> +       OVS_TUNNEL_IPV4_DST,    /* be32 Tunnel dst IP address. */
> +       OVS_TUNNEL_TOS,         /* u8 Tunnel IP ToS. */
> +       OVS_TUNNEL_TTL,         /* u8 Tunnel IP TTL. */

I would include ATTR in these names (as in OVS_TUNNEL_ATTR_ID) to
match the other types.

> +       OVS_TUNNEL_FLAGS_DONT_FRAGMENT, /* No argument, flag to set DF. */
> +       OVS_TUNNEL_FLAGS_CSUM,  /* No argument. flag to CSUM packet. */

We probably could drop FLAGS_ from these names to make them a little shorter.

> diff --git a/lib/odp-util.c b/lib/odp-util.c
> index e2f21da..5d7f25a 100644
> --- a/lib/odp-util.c
> +++ b/lib/odp-util.c
> +/* Returns OVS_TNL_* flags. */
> +static enum odp_key_fitness
> +tun_key_from_attr(const struct nlattr *attr, struct flow_tnl *tun)

The comment above this function doesn't look right.

> +{
> +    unsigned int left;
> +    const struct nlattr *a;
> +
> +    NL_NESTED_FOR_EACH(a, left, attr) {
> +        uint16_t type = nl_attr_type(a);
> +        size_t len = nl_attr_get_size(a);
> +        int expected_len = tunnel_key_attr_len(type);
> +
> +        if (len != expected_len && expected_len >= 0) {
> +            return ODP_FIT_ERROR;
> +        }
> +
> +        switch (type) {
> +        case OVS_TUNNEL_ID:
> +            tun->tun_id = nl_attr_get_be64(a);
> +            tun->flags |= FLOW_TNL_F_KEY;
> +        break;
> +        case OVS_TUNNEL_IPV4_SRC:
> +            tun->ip_src = nl_attr_get_be32(a);
> +        break;
> +        case OVS_TUNNEL_IPV4_DST:
> +            tun->ip_dst = nl_attr_get_be32(a);
> +        break;
> +        case OVS_TUNNEL_TOS:
> +            tun->ip_tos = nl_attr_get_u8(a);
> +        break;
> +        case OVS_TUNNEL_TTL:
> +            tun->ip_ttl = nl_attr_get_u8(a);

Should we enforce that TTL is present?

> +        break;
> +        case OVS_TUNNEL_FLAGS_DONT_FRAGMENT:
> +            tun->flags |= FLOW_TNL_F_DONT_FRAGMENT;
> +        break;
> +        case OVS_TUNNEL_FLAGS_CSUM:
> +            tun->flags |= FLOW_TNL_F_CSUM;
> +        break;
> +        default:
> +            return ODP_FIT_TOO_MUCH;

If we get an unknown attribute we should still extract the parts that
we understand since we'll still process the flow.

> +static int
> +tun_key_to_attr(struct ofpbuf *a, const struct flow_tnl *tun_key)
> +{
> +    size_t tun_key_ofs;
> +
> +    tun_key_ofs = nl_msg_start_nested(a, OVS_KEY_ATTR_TUNNEL);
> +
> +    if (tun_key->flags & FLOW_TNL_F_KEY) {
> +        nl_msg_put_be64(a, OVS_TUNNEL_ID, tun_key->tun_id);
> +    }
> +    if (tun_key->ip_src) {
> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_SRC, tun_key->ip_src);
> +    }
> +    if (tun_key->ip_dst) {
> +        nl_msg_put_be32(a, OVS_TUNNEL_IPV4_DST, tun_key->ip_dst);
> +    }
> +    if (tun_key->ip_tos) {
> +        nl_msg_put_u8(a, OVS_TUNNEL_TOS, tun_key->ip_tos);
> +    }
> +    if (tun_key->ip_ttl) {
> +        nl_msg_put_u8(a, OVS_TUNNEL_TTL, tun_key->ip_ttl);
> +    } else {
> +        return -EINVAL;

I'm not sure that we need to return an error here.  If the tunnel code
really wants to use a TTL of zero then we should let it for the time
being.  We should just always put the attribute.

> @@ -1905,23 +1962,17 @@ odp_flow_key_to_flow(const struct nlattr *key, size_t 
> key_len,
>          expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUN_ID;
>      }
>
> -    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL)) {
> -        const struct ovs_key_ipv4_tunnel *ipv4_tun_key;
> -
> -        ipv4_tun_key = nl_attr_get(attrs[OVS_KEY_ATTR_IPV4_TUNNEL]);
> -
> -        flow->tunnel.tun_id = ipv4_tun_key->tun_id;
> -        flow->tunnel.ip_src = ipv4_tun_key->ipv4_src;
> -        flow->tunnel.ip_dst = ipv4_tun_key->ipv4_dst;
> -        flow->tunnel.flags = odp_to_flow_flags(ipv4_tun_key->tun_flags);
> -        flow->tunnel.ip_tos = ipv4_tun_key->ipv4_tos;
> -        flow->tunnel.ip_ttl = ipv4_tun_key->ipv4_ttl;
> +    if (present_attrs & (UINT64_C(1) << OVS_KEY_ATTR_TUNNEL)) {
> +        enum odp_key_fitness res;
>
> +        res = tun_key_from_attr(attrs[OVS_KEY_ATTR_TUNNEL], &flow->tunnel);
> +        if (res == ODP_FIT_ERROR) {
> +            return ODP_FIT_ERROR;
> +        } else if (res == ODP_FIT_PERFECT) {
>          /* Allow this to show up as unexpected, if there are unknown flags,
>           * eventually resulting in ODP_FIT_TOO_MUCH.
>           * OVS_TNL_F_KNOWN_MASK defined locally above. */
> -        if (!(ipv4_tun_key->tun_flags & ~OVS_TNL_F_KNOWN_MASK)) {
> -            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_IPV4_TUNNEL;
> +            expected_attrs |= UINT64_C(1) << OVS_KEY_ATTR_TUNNEL;
>          }

I think comment above this if no longer applies.

Can you also make sure to test this thoroughly since it's so late in
the release cycle?
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Re: [ovs-dev] [PATCH] v2 datapath: More flexible kernel/userspace tunneling attribute.

Reply via email to