On 11/9/2013 5:04 PM, Yann Droneaud wrote:
Le mercredi 28 août 2013 à 15:47 +0300, Matan Barak a écrit :
From: Hadar Hen Zion <[email protected]>
Implement ib_uverbs_create_flow and ib_uverbs_destroy_flow to
support flow steering for user space applications.
Change-Id: I0bc6dbe26f4776d00f95b6200ff43ccb24000e31
Signed-off-by: Hadar Hen Zion <[email protected]>
Signed-off-by: Or Gerlitz <[email protected]>
Signed-off-by: Matan Barak <[email protected]>
Seen this in linux-next as commit
436f2ad05a0b65b1467ddf51bc68171c381bf844
---
drivers/infiniband/core/uverbs.h | 3 +
drivers/infiniband/core/uverbs_cmd.c | 228 +++++++++++++++++++++++++++++++++
drivers/infiniband/core/uverbs_main.c | 13 ++-
include/rdma/ib_verbs.h | 1 +
include/uapi/rdma/ib_user_verbs.h | 89 +++++++++++++-
5 files changed, 332 insertions(+), 2 deletions(-)
diff --git a/drivers/infiniband/core/uverbs_cmd.c
b/drivers/infiniband/core/uverbs_cmd.c
index b105140..2856696 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -2597,6 +2599,232 @@ out_put:
return ret ? ret : in_len;
}
+static int kern_spec_to_ib_spec(struct ib_kern_spec *kern_spec,
+ union ib_flow_spec *ib_spec)
+{
+ ib_spec->type = kern_spec->type;
+
+ switch (ib_spec->type) {
+ case IB_FLOW_SPEC_ETH:
+ ib_spec->eth.size = sizeof(struct ib_flow_spec_eth);
+ if (ib_spec->eth.size != kern_spec->eth.size)
+ return -EINVAL;
+ memcpy(&ib_spec->eth.val, &kern_spec->eth.val,
+ sizeof(struct ib_flow_eth_filter));
+ memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask,
+ sizeof(struct ib_flow_eth_filter));
+ break;
+ case IB_FLOW_SPEC_IPV4:
+ ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4);
+ if (ib_spec->ipv4.size != kern_spec->ipv4.size)
+ return -EINVAL;
+ memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val,
+ sizeof(struct ib_flow_ipv4_filter));
+ memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask,
+ sizeof(struct ib_flow_ipv4_filter));
+ break;
+ case IB_FLOW_SPEC_TCP:
+ case IB_FLOW_SPEC_UDP:
+ ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp);
+ if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size)
+ return -EINVAL;
+ memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask,
+ sizeof(struct ib_flow_tcp_udp_filter));
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+ssize_t ib_uverbs_create_flow(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_flow cmd;
+ struct ib_uverbs_create_flow_resp resp;
+ struct ib_uobject *uobj;
+ struct ib_flow *flow_id;
+ struct ib_kern_flow_attr *kern_flow_attr;
+ struct ib_flow_attr *flow_attr;
+ struct ib_qp *qp;
+ int err = 0;
+ void *kern_spec;
+ void *ib_spec;
why void * here, there's a type for them, and the function
kern_spec_to_ib_spec() has a prototype with those types.
We could use struct ib_kern_spec and union ib_flow_spec, but since there
is a lot of pointer arithmetic here, I think void * provides a more
readable code. Since the actual specs are of different size and are
placed one after another, traversing through the specs requires the
actual size rather than the union size.
+ int i;
+ int kern_attr_size;
+
+ if (out_len < sizeof(resp))
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof(cmd)))
+ return -EFAULT;
+
+ if (cmd.comp_mask)
+ return -EINVAL;
+
+ if ((cmd.flow_attr.type == IB_FLOW_ATTR_SNIFFER &&
+ !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
+ return -EPERM;
+
+ if (cmd.flow_attr.num_of_specs < 0 ||
num_of_specs is unsigned ...
Correct, this will be fixed.
+ cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
+ return -EINVAL;
+
+ kern_attr_size = cmd.flow_attr.size - sizeof(cmd) -
+ sizeof(struct ib_uverbs_cmd_hdr_ex);
+
Please, no under/overflow. Check that cmd.flow_attr.size is bigger than
sizeof(cmd) + sizeof(struct ib_uverbs_cmd_hdr_ex) before computing
kern_attr_size.
This introduces code duplication since we check that value and then
compute it. Since this value has an upper and lower limits and both
boundaries are checked, why is this problematic ?
+ if (cmd.flow_attr.size < 0 || cmd.flow_attr.size > in_len ||
just like num_of_specs, it's unsigned !
Correct, this will be removed.
cmd.flow_attr.size is certainly less then in_len, so you probably mean
cmd.flow_attr.size > (in_len - sizeof(cmd) - sizeof(struct
ib_uverbs_cmd_hdr_ex)).
It's not certainly less than in_len, as the user initializes
cmd.flow_attr.size. Currently, our version of libibverbs place the
entire command size on cmd.flow_attr.size.
+ kern_attr_size < 0 || kern_attr_size >
+ (cmd.flow_attr.num_of_specs * sizeof(struct ib_kern_spec)))
+ return -EINVAL;
+
let's make kern_attr_size unsigned.
I think that it's not a good idea since we're iterating until we've got
to num_of_specs or we've got an invalid size in one of the spec
structures. Since we have an upper limit to kern_attr_size, I don't
think it's problematic to keep it as int.
And be stricter: kern_attr_size != cmd.flow_attr.num_of_specs *
sizeof(struct ib_kern_spec) might be welcome.
The structures are TLV, so I can only check upper limit but not the
exact size without traversing them.
+ if (cmd.flow_attr.num_of_specs) {
+ kern_flow_attr = kmalloc(cmd.flow_attr.size, GFP_KERNEL);
+ if (!kern_flow_attr)
+ return -ENOMEM;
+
+ memcpy(kern_flow_attr, &cmd.flow_attr, sizeof(*kern_flow_attr));
might be sizeof(struct ib_kern_spec) as used in the computation.
This only copies the "header" and not the spec themselves.
+ if (copy_from_user(kern_flow_attr + 1, buf + sizeof(cmd),
+ kern_attr_size)) {
This copies the specs.
+ err = -EFAULT;
+ goto err_free_attr;
+ }
I don't feel comfortable with the bounds: previously it was
sizeof(cmd) + sizeof(struct ib_uverbs_cmd_hdr_ex), and here
copy_from_user() is copying starting from buf + sizeof(cmd) ... so where
is the struct ib_uverbs_cmd_hdr_ex ?
The cmd starts straight after ib_uverbs_cmd_hdr_ex, but in_len includes
sizeof(struct ib_uverbs_cmd_hdr_ex). Please take a look at the patch
which adds the extension verbs mechanism.
And I dislike the + 1 throughout this portion of the code: why the first
element is being so different than the others. I didn't dig enough in
the code to understand, but from my point of view, it doesn't fit.
+ kern_spec = kern_flow_attr + 1;
+ ib_spec = flow_attr + 1;
+ for (i = 0; i < flow_attr->num_of_specs && kern_attr_size > 0; i++) {
+ err = kern_spec_to_ib_spec(kern_spec, ib_spec);
+ if (err)
+ goto err_free;
+ flow_attr->size +=
+ ((union ib_flow_spec *)ib_spec)->size;
+ kern_attr_size -= ((struct ib_kern_spec *)kern_spec)->size;
+ kern_spec += ((struct ib_kern_spec *)kern_spec)->size;
+ ib_spec += ((union ib_flow_spec *)ib_spec)->size;
Why use void * for ib_spec and kern_spec if there's a union type for
them ?
Since we use TLV, using void * actually introduces less castings.
diff --git a/include/uapi/rdma/ib_user_verbs.h
b/include/uapi/rdma/ib_user_verbs.h
index 61535aa..0b233c5 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -86,7 +86,9 @@ enum {
+struct ib_kern_spec {
+ union {
+ struct {
+ __u32 type;
+ __u16 size;
+ __u16 reserved;
+ };
+ struct ib_kern_spec_eth eth;
+ struct ib_kern_spec_ipv4 ipv4;
+ struct ib_kern_spec_tcp_udp tcp_udp;
+ };
+};
+
[Aside note: no IPv6 spec ?]
+struct ib_kern_flow_attr {
+ __u32 type;
+ __u16 size;
+ __u16 priority;
+ __u8 num_of_specs;
+ __u8 reserved[2];
+ __u8 port;
+ __u32 flags;
+ /* Following are the optional layers according to user request
+ * struct ib_flow_spec_xxx
+ * struct ib_flow_spec_yyy
+ */
+};
+
+struct ib_uverbs_create_flow {
+ __u32 comp_mask;
Alignment notice: There's a hole here on 64bits system with stricter
alignment rules.
Thanks! Do you recommend adding a u32 reserved field ?
+ __u64 response;
+ __u32 qp_handle;
+ struct ib_kern_flow_attr flow_attr;
+};
+
Sorry I've missed that before it got included in infiniband git tree.
Regards.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html