destroy_flow through uverbs

Matan Barak Tue, 17 Sep 2013 05:37:09 -0700

On 11/9/2013 5:04 PM, Yann Droneaud wrote:

Le mercredi 28 août 2013 à 15:47 +0300, Matan Barak a écrit :

From: Hadar Hen Zion <[email protected]>


Implement ib_uverbs_create_flow and ib_uverbs_destroy_flow to
support flow steering for user space applications.

Change-Id: I0bc6dbe26f4776d00f95b6200ff43ccb24000e31
Signed-off-by: Hadar Hen Zion <[email protected]>
Signed-off-by: Or Gerlitz <[email protected]>
Signed-off-by: Matan Barak <[email protected]>


Seen this in linux-next as commit
436f2ad05a0b65b1467ddf51bc68171c381bf844

---
  drivers/infiniband/core/uverbs.h      |    3 +
  drivers/infiniband/core/uverbs_cmd.c  |  228 +++++++++++++++++++++++++++++++++
  drivers/infiniband/core/uverbs_main.c |   13 ++-
  include/rdma/ib_verbs.h               |    1 +
  include/uapi/rdma/ib_user_verbs.h     |   89 +++++++++++++-
  5 files changed, 332 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/uverbs_cmd.c 
b/drivers/infiniband/core/uverbs_cmd.c
index b105140..2856696 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2597,6 +2599,232 @@ out_put:
        return ret ? ret : in_len;
  }

+static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+                               union ib_flow_spec *ib_spec)
+{
+       ib_spec->type = kern_spec->type;
+
+       switch (ib_spec->type) {
+       case IB_FLOW_SPEC_ETH:
+               ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+               if (ib_spec->eth.size != kern_spec->eth.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+                      sizeof(struct ib_flow_eth_filter));
+               memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+                      sizeof(struct ib_flow_eth_filter));
+               break;
+       case IB_FLOW_SPEC_IPV4:
+               ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+               if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+                      sizeof(struct ib_flow_ipv4_filter));
+               memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+                      sizeof(struct ib_flow_ipv4_filter));
+               break;
+       case IB_FLOW_SPEC_TCP:
+       case IB_FLOW_SPEC_UDP:
+               ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+               if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+                       return -EINVAL;
+               memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+                      sizeof(struct ib_flow_tcp_udp_filter));
+               memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+                      sizeof(struct ib_flow_tcp_udp_filter));
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
+ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
+                             const char __user *buf, int in_len,
+                             int out_len)
+{
+       struct ib_uverbs_create_flow      cmd;
+       struct ib_uverbs_create_flow_resp resp;
+       struct ib_uobject                 *uobj;
+       struct ib_flow                    *flow_id;
+       struct ib_kern_flow_attr          *kern_flow_attr;
+       struct ib_flow_attr               *flow_attr;
+       struct ib_qp                      *qp;
+       int err = 0;
+       void *kern_spec;
+       void *ib_spec;


why void * here, there's a type for them, and the function
kern_spec_to_ib_spec() has a prototype with those types.

We could use struct ib_kern_spec and union ib_flow_spec, but since thereis a lot of pointer arithmetic here, I think void * provides a morereadable code. Since the actual specs are of different size and areplaced one after another, traversing through the specs requires theactual size rather than the union size.

+       int i;
+       int kern_attr_size;
+
+       if (out_len < sizeof(resp))
+               return -ENOSPC;
+
+       if (copy_from_user(&cmd, buf, sizeof(cmd)))
+               return -EFAULT;
+
+       if (cmd.comp_mask)
+               return -EINVAL;
+
+       if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+            !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+               return -EPERM;
+
+       if (cmd.flow_attr.num_of_specs < 0 ||


num_of_specs is unsigned ...


Correct, this will be fixed.

+           cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+               return -EINVAL;
+
+       kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
+                        sizeof(struct ib_uverbs_cmd_hdr_ex);
+


Please, no under/overflow. Check that cmd.flow_attr.size is bigger than
sizeof(cmd) + sizeof(struct ib_uverbs_cmd_hdr_ex) before computing
kern_attr_size.

This introduces code duplication since we check that value and thencompute it. Since this value has an upper and lower limits and bothboundaries are checked, why is this problematic ?

+       if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||


just like num_of_specs, it's unsigned !


Correct, this will be removed.


cmd.flow_attr.size is certainly less then in_len, so you probably mean
cmd.flow_attr.size > (in_len - sizeof(cmd) - sizeof(struct
ib_uverbs_cmd_hdr_ex)).

It's not certainly less than in_len, as the user initializescmd.flow_attr.size. Currently, our version of libibverbs place theentire command size on cmd.flow_attr.size.

+           kern_attr_size < 0 || kern_attr_size >
+           (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+               return -EINVAL;
+


let's make kern_attr_size unsigned.

I think that it's not a good idea since we're iterating until we've gotto num_of_specs or we've got an invalid size in one of the specstructures. Since we have an upper limit to kern_attr_size, I don'tthink it's problematic to keep it as int.


And be stricter: kern_attr_size != cmd.flow_attr.num_of_specs *
sizeof(struct ib_kern_spec) might be welcome.

The structures are TLV, so I can only check upper limit but not theexact size without traversing them.

+       if (cmd.flow_attr.num_of_specs) {
+               kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+               if (!kern_flow_attr)
+                       return -ENOMEM;
+
+               memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));


might be sizeof(struct ib_kern_spec) as used in the computation.


This only copies the "header" and not the spec themselves.

+               if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
+                                  kern_attr_size)) {


This copies the specs.

+                       err = -EFAULT;
+                       goto err_free_attr;
+               }


I don't feel comfortable with the bounds: previously it was
sizeof(cmd) + sizeof(struct ib_uverbs_cmd_hdr_ex), and here
copy_from_user() is copying starting from buf + sizeof(cmd) ... so where
is the struct ib_uverbs_cmd_hdr_ex ?

The cmd starts straight after ib_uverbs_cmd_hdr_ex, but in_len includessizeof(struct ib_uverbs_cmd_hdr_ex). Please take a look at the patchwhich adds the extension verbs mechanism.


And I dislike the + 1 throughout this portion of the code: why the first
element is being so different than the others. I didn't dig enough in
the code to understand, but from my point of view, it doesn't fit.

+       kern_spec = kern_flow_attr + 1;
+       ib_spec = flow_attr + 1;
+       for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+               err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+               if (err)
+                       goto err_free;
+               flow_attr->size +=
+                       ((union ib_flow_spec *)ib_spec)->size;
+               kern_attr_size -= ((struct ib_kern_spec *)kern_spec)->size;
+               kern_spec += ((struct ib_kern_spec *)kern_spec)->size;
+               ib_spec += ((union ib_flow_spec *)ib_spec)->size;


Why use void * for ib_spec and kern_spec if there's a union type for
them ?


Since we use TLV, using void * actually introduces less castings.

diff --git a/include/uapi/rdma/ib_user_verbs.h 
b/include/uapi/rdma/ib_user_verbs.h
index 61535aa..0b233c5 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -86,7 +86,9 @@ enum {

+struct ib_kern_spec {
+       union {
+               struct {
+                       __u32 type;
+                       __u16 size;
+                       __u16 reserved;
+               };
+               struct ib_kern_spec_eth     eth;
+               struct ib_kern_spec_ipv4    ipv4;
+               struct ib_kern_spec_tcp_udp tcp_udp;
+       };
+};
+


[Aside note: no IPv6 spec ?]

+struct ib_kern_flow_attr {
+       __u32 type;
+       __u16 size;
+       __u16 priority;
+       __u8  num_of_specs;
+       __u8  reserved[2];
+       __u8  port;
+       __u32 flags;
+       /* Following are the optional layers according to user request
+        * struct ib_flow_spec_xxx
+        * struct ib_flow_spec_yyy
+        */
+};
+
+struct ib_uverbs_create_flow  {
+       __u32 comp_mask;


Alignment notice: There's a hole here on 64bits system with stricter
alignment rules.


Thanks! Do you recommend adding a u32 reserved field ?

+       __u64 response;
+       __u32 qp_handle;
+       struct ib_kern_flow_attr flow_attr;
+};
+


Sorry I've missed that before it got included in infiniband git tree.

Regards.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V4 for-next 3/4] IB/core: Export ib_create/destroy_flow through uverbs

Reply via email to