From: Geliang Tang <[email protected]> To add MPTCP support in "NVMe over TCP", the host side needs to pass IPPROTO_MPTCP to sock_create_kern() instead of IPPROTO_TCP to create an MPTCP socket.
Similar to the target-side nvmet_tcp_proto, this patch defines the host-side nvme_tcp_proto structure, which contains the protocol of the socket and a set of function pointers for socket operations. The only difference is that it defines .set_syncnt instead of .set_reuseaddr. A TCP-specific version of this structure is defined, and a proto field is added to nvme_tcp_ctrl. When the transport string is "tcp", it is assigned to ctrl->proto. All locations that previously called TCP setsockopt functions are updated to call the corresponding function pointers in the nvme_tcp_proto structure. The controller's proto pointer is set during initialization and remains valid throughout the controller's lifetime. v2: - use 'trtype' instead of '--mptcp' (Hannes) v3: - check mptcp protocol from opts->transport instead of passing a parameter (Hannes). v4: - check CONFIG_MPTCP. v5: - define nvme_tcp_proto struct. - add a pointer to this structure in nvme_tcp_ctrl. Cc: Hannes Reinecke <[email protected]> Cc: John Meneghini <[email protected]> Cc: Randy Jennings <[email protected]> Cc: Nilay Shroff <[email protected]> Co-developed-by: zhenwei pi <[email protected]> Signed-off-by: zhenwei pi <[email protected]> Co-developed-by: Hui Zhu <[email protected]> Signed-off-by: Hui Zhu <[email protected]> Co-developed-by: Gang Yan <[email protected]> Signed-off-by: Gang Yan <[email protected]> Signed-off-by: Geliang Tang <[email protected]> --- drivers/nvme/host/tcp.c | 44 ++++++++++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 7 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 15d36d6a728e..13a5240623ef 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -182,6 +182,16 @@ struct nvme_tcp_queue { void (*write_space)(struct sock *); }; +struct nvme_tcp_proto { + int protocol; + int (*set_syncnt)(struct sock *sk, int val); + void (*set_nodelay)(struct sock *sk); + void (*no_linger)(struct sock *sk); + void (*set_priority)(struct sock *sk, u32 priority); + void (*set_tos)(struct sock *sk, int val); + const struct nvme_ctrl_ops *ops; +}; + struct nvme_tcp_ctrl { /* read only in the hot path */ struct nvme_tcp_queue *queues; @@ -198,6 +208,8 @@ struct nvme_tcp_ctrl { struct delayed_work connect_work; struct nvme_tcp_request async_req; u32 io_queues[HCTX_MAX_TYPES]; + + const struct nvme_tcp_proto *proto; }; static LIST_HEAD(nvme_tcp_ctrl_list); @@ -1799,7 +1811,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, ret = sock_create_kern(current->nsproxy->net_ns, ctrl->addr.ss_family, SOCK_STREAM, - IPPROTO_TCP, &queue->sock); + ctrl->proto->protocol, &queue->sock); if (ret) { dev_err(nctrl->device, "failed to create socket: %d\n", ret); @@ -1816,24 +1828,24 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid, nvme_tcp_reclassify_socket(queue->sock); /* Single syn retry */ - tcp_sock_set_syncnt(queue->sock->sk, 1); + ctrl->proto->set_syncnt(queue->sock->sk, 1); /* Set TCP no delay */ - tcp_sock_set_nodelay(queue->sock->sk); + ctrl->proto->set_nodelay(queue->sock->sk); /* * Cleanup whatever is sitting in the TCP transmit queue on socket * close. This is done to prevent stale data from being sent should * the network connection be restored before TCP times out. */ - sock_no_linger(queue->sock->sk); + ctrl->proto->no_linger(queue->sock->sk); if (so_priority > 0) - sock_set_priority(queue->sock->sk, so_priority); + ctrl->proto->set_priority(queue->sock->sk, so_priority); /* Set socket type of service */ if (nctrl->opts->tos >= 0) - ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos); + ctrl->proto->set_tos(queue->sock->sk, nctrl->opts->tos); /* Set 10 seconds timeout for icresp recvmsg */ queue->sock->sk->sk_rcvtimeo = 10 * HZ; @@ -2900,6 +2912,17 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) return found; } +static const struct nvme_tcp_proto nvme_tcp_proto = { + .protocol = IPPROTO_TCP, + .set_syncnt = tcp_sock_set_syncnt, + .set_nodelay = tcp_sock_set_nodelay, + .no_linger = sock_no_linger, + .set_priority = sock_set_priority, + .set_tos = ip_sock_set_tos, + .ops = &nvme_tcp_ctrl_ops, + +}; + static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -2964,13 +2987,20 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, goto out_free_ctrl; } + if (!strcmp(ctrl->ctrl.opts->transport, "tcp")) { + ctrl->proto = &nvme_tcp_proto; + } else { + ret = -EINVAL; + goto out_free_ctrl; + } + ctrl->queues = kzalloc_objs(*ctrl->queues, ctrl->ctrl.queue_count); if (!ctrl->queues) { ret = -ENOMEM; goto out_free_ctrl; } - ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0); + ret = nvme_init_ctrl(&ctrl->ctrl, dev, ctrl->proto->ops, 0); if (ret) goto out_kfree_queues; -- 2.53.0
