Export rdma_set_ib_paths to user space to allow applications to manually set the IB path used for connections. This allows alternative ways for a user space application or library to obtain path record information, including retrieving path information from cached data, avoiding direct interaction with the IB SA. The IB SA is a single, centralized entity that can limit scaling on large clusters running MPI applications.
Signed-off-by: Sean Hefty <[email protected]> --- I'd like to get feedback on this approach with the possibility of merging for 2.6.33. drivers/infiniband/core/ucma.c | 40 ++++++++++++++++++++++++++++++++++++++++ include/rdma/rdma_user_cm.h | 7 +++++-- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 4346a24..1359727 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -42,6 +42,7 @@ #include <rdma/rdma_user_cm.h> #include <rdma/ib_marshall.h> #include <rdma/rdma_cm.h> +#include <rdma/rdma_cm_ib.h> MODULE_AUTHOR("Sean Hefty"); MODULE_DESCRIPTION("RDMA Userspace Connection Manager Access"); @@ -811,6 +812,42 @@ static int ucma_set_option_id(struct ucma_context *ctx, int optname, return ret; } +static int ucma_set_ib_path(struct ucma_context *ctx, + struct ib_user_path_rec *upath, size_t optlen) +{ + struct ib_sa_path_rec sa_path; + struct rdma_cm_event event; + int ret; + + if (optlen != sizeof(*upath)) + return -EINVAL; + + ib_copy_path_rec_from_user(&sa_path, upath); + ret = rdma_set_ib_paths(ctx->cm_id, &sa_path, 1); + if (ret) + return ret; + + memset(&event, 0, sizeof event); + event.event = RDMA_CM_EVENT_ROUTE_RESOLVED; + return ucma_event_handler(ctx->cm_id, &event); +} + +static int ucma_set_option_ib(struct ucma_context *ctx, int optname, + void *optval, size_t optlen) +{ + int ret; + + switch (optname) { + case RDMA_OPTION_IB_PATH: + ret = ucma_set_ib_path(ctx, optval, optlen); + break; + default: + ret = -ENOSYS; + } + + return ret; +} + static int ucma_set_option_level(struct ucma_context *ctx, int level, int optname, void *optval, size_t optlen) { @@ -820,6 +857,9 @@ static int ucma_set_option_level(struct ucma_context *ctx, int level, case RDMA_OPTION_ID: ret = ucma_set_option_id(ctx, optname, optval, optlen); break; + case RDMA_OPTION_IB: + ret = ucma_set_option_ib(ctx, optname, optval, optlen); + break; default: ret = -ENOSYS; } diff --git a/include/rdma/rdma_user_cm.h b/include/rdma/rdma_user_cm.h index c557054..d7829f4 100644 --- a/include/rdma/rdma_user_cm.h +++ b/include/rdma/rdma_user_cm.h @@ -215,12 +215,15 @@ struct rdma_ucm_event_resp { /* Option levels */ enum { - RDMA_OPTION_ID = 0 + RDMA_OPTION_ID = 0, + RDMA_OPTION_IB = 1 }; /* Option details */ enum { - RDMA_OPTION_ID_TOS = 0 + RDMA_OPTION_ID_TOS = 0, + + RDMA_OPTION_IB_PATH = 1 }; struct rdma_ucm_set_option { -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
