On 16:48 Tue 11 May , Ira Weiny wrote:
>
> From: Ira Weiny <[email protected]>
> Date: Tue, 11 May 2010 15:36:08 -0700
> Subject: [PATCH] ibnetdisc: Separate calls to umad and mad layer to avoid
> race condition on response MAD's
>
> Specify CA/Port to use which allows parallel scanning to other
> operations.
>
> Signed-off-by: Ira Weiny <[email protected]>
Applied. Thanks.
However see a minor comment below.
> ---
> .../libibnetdisc/include/infiniband/ibnetdisc.h | 15 ++--
> infiniband-diags/libibnetdisc/src/ibnetdisc.c | 52 +++++++-----
> infiniband-diags/libibnetdisc/src/internal.h | 11 ++-
> infiniband-diags/libibnetdisc/src/query_smp.c | 83
> ++++++++++++++++----
> infiniband-diags/libibnetdisc/test/testleaks.c | 16 +---
> infiniband-diags/src/iblinkinfo.c | 8 +-
> infiniband-diags/src/ibnetdiscover.c | 14 +---
> infiniband-diags/src/ibqueryerrors.c | 11 ++-
> 8 files changed, 134 insertions(+), 76 deletions(-)
[snip]
> diff --git a/infiniband-diags/libibnetdisc/src/internal.h
> b/infiniband-diags/libibnetdisc/src/internal.h
> index 2cfde02..d037a60 100644
> --- a/infiniband-diags/libibnetdisc/src/internal.h
> +++ b/infiniband-diags/libibnetdisc/src/internal.h
> @@ -54,6 +54,8 @@
> #define MAXHOPS 63
>
> #define DEFAULT_MAX_SMP_ON_WIRE 2
> +#define DEFAULT_TIMEOUT 1000
> +#define DEFAULT_RETRIES 3
>
> typedef struct ibnd_scan {
> ib_portid_t selfportid;
> @@ -76,16 +78,19 @@ struct ibnd_smp {
>
> struct smp_engine {
> struct ibmad_port *ibmad_port;
After this patch ibmad_port is not used by smp_engine, but instead by an
"upper" layer (ibnetdisc). So I would suggest to move this there (for
example to be a member of scan struct).
Sasha
> + int umad_fd;
> + int smi_agent;
> + int smi_dir_agent;
> ibnd_smp_t *smp_queue_head;
> ibnd_smp_t *smp_queue_tail;
> void *user_data;
> cl_qmap_t smps_on_wire;
> - int max_smps_on_wire;
> + struct ibnd_config *cfg;
> unsigned total_smps;
> };
>
> -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port,
> - void *user_data, int max_smps_on_wire);
> +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port,
> + void *user_data, ibnd_config_t *cfg);
> int issue_smp(smp_engine_t * engine, ib_portid_t * portid,
> unsigned attrid, unsigned mod, smp_comp_cb_t cb, void *cb_data);
> int process_mads(smp_engine_t * engine);
> diff --git a/infiniband-diags/libibnetdisc/src/query_smp.c
> b/infiniband-diags/libibnetdisc/src/query_smp.c
> index 7234844..4dbfa0d 100644
> --- a/infiniband-diags/libibnetdisc/src/query_smp.c
> +++ b/infiniband-diags/libibnetdisc/src/query_smp.c
> @@ -61,25 +61,32 @@ static ibnd_smp_t *get_smp(smp_engine_t * engine)
> return rc;
> }
>
> -static int send_smp(ibnd_smp_t * smp, struct ibmad_port *srcport)
> +static int send_smp(ibnd_smp_t * smp, smp_engine_t * engine)
> {
> int rc = 0;
> uint8_t umad[1024];
> ib_rpc_t *rpc = &smp->rpc;
> + int agent = 0;
>
> memset(umad, 0, umad_size() + IB_MAD_SIZE);
>
> + if (rpc->mgtclass == IB_SMI_CLASS) {
> + agent = engine->smi_agent;
> + } else if (rpc->mgtclass == IB_SMI_DIRECT_CLASS) {
> + agent = engine->smi_dir_agent;
> + } else {
> + IBND_ERROR("Invalid class for RPC\n");
> + return (-EIO);
> + }
> +
> if ((rc = mad_build_pkt(umad, &smp->rpc, &smp->path, NULL, NULL))
> < 0) {
> IBND_ERROR("mad_build_pkt failed; %d\n", rc);
> return rc;
> }
>
> - if ((rc = umad_send(mad_rpc_portid(srcport),
> - mad_rpc_class_agent(srcport, rpc->mgtclass),
> - umad, IB_MAD_SIZE,
> - mad_get_timeout(srcport, rpc->timeout),
> - mad_get_retries(srcport))) < 0) {
> + if ((rc = umad_send(engine->umad_fd, agent, umad, IB_MAD_SIZE,
> + engine->cfg->timeout_ms, engine->cfg->retries)) <
> 0) {
> IBND_ERROR("send failed; %d\n", rc);
> return rc;
> }
> @@ -91,12 +98,13 @@ static int process_smp_queue(smp_engine_t * engine)
> {
> int rc = 0;
> ibnd_smp_t *smp;
> - while (cl_qmap_count(&engine->smps_on_wire) < engine->max_smps_on_wire)
> {
> + while (cl_qmap_count(&engine->smps_on_wire)
> + < engine->cfg->max_smps) {
> smp = get_smp(engine);
> if (!smp)
> return 0;
>
> - if ((rc = send_smp(smp, engine->ibmad_port)) != 0) {
> + if ((rc = send_smp(smp, engine)) != 0) {
> free(smp);
> return rc;
> }
> @@ -122,7 +130,7 @@ int issue_smp(smp_engine_t * engine, ib_portid_t * portid,
> smp->rpc.method = IB_MAD_METHOD_GET;
> smp->rpc.attr.id = attrid;
> smp->rpc.attr.mod = mod;
> - smp->rpc.timeout = mad_get_timeout(engine->ibmad_port, 0);
> + smp->rpc.timeout = engine->cfg->timeout_ms;
> smp->rpc.datasz = IB_SMP_DATA_SIZE;
> smp->rpc.dataoffs = IB_SMP_DATA_OFFS;
> smp->rpc.trid = mad_trid();
> @@ -153,7 +161,7 @@ static int process_one_recv(smp_engine_t * engine)
> memset(umad, 0, sizeof(umad));
>
> /* wait for the next message */
> - if ((rc = umad_recv(mad_rpc_portid(engine->ibmad_port), umad, &length,
> + if ((rc = umad_recv(engine->umad_fd, umad, &length,
> 0)) < 0) {
> if (rc == -EWOULDBLOCK)
> return 0;
> @@ -190,14 +198,58 @@ error:
> return rc;
> }
>
> -void smp_engine_init(smp_engine_t * engine, struct ibmad_port *ibmad_port,
> - void *user_data, int max_smps_on_wire)
> +int smp_engine_init(smp_engine_t * engine, char * ca_name, int ca_port,
> + void *user_data, ibnd_config_t *cfg)
> {
> + int nc = 2;
> + int mc[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> +
> memset(engine, 0, sizeof(*engine));
> - engine->ibmad_port = ibmad_port;
> +
> + engine->ibmad_port = mad_rpc_open_port(ca_name, ca_port, mc, nc);
> + if (!engine->ibmad_port) {
> + IBND_ERROR("can't open MAD port (%s:%d)\n", ca_name, ca_port);
> + return -EIO;
> + }
> + mad_rpc_set_timeout(engine->ibmad_port, cfg->timeout_ms);
> + mad_rpc_set_retries(engine->ibmad_port, cfg->retries);
> +
> + if (umad_init() < 0) {
> + IBND_ERROR("umad_init failed\n");
> + mad_rpc_close_port(engine->ibmad_port);
> + return -EIO;
> + }
> +
> + engine->umad_fd = umad_open_port(ca_name, ca_port);
> + if (engine->umad_fd < 0) {
> + IBND_ERROR("can't open UMAD port (%s:%d)\n", ca_name, ca_port);
> + mad_rpc_close_port(engine->ibmad_port);
> + return -EIO;
> + }
> +
> + if ((engine->smi_agent = umad_register(engine->umad_fd,
> + IB_SMI_CLASS, 1, 0, 0)) < 0) {
> + IBND_ERROR("Failed to register SMI agent on (%s:%d)\n",
> + ca_name, ca_port);
> + goto eio_close;
> + }
> +
> + if ((engine->smi_dir_agent = umad_register(engine->umad_fd,
> + IB_SMI_DIRECT_CLASS, 1, 0, 0)) < 0) {
> + IBND_ERROR("Failed to register SMI_DIRECT agent on (%s:%d)\n",
> + ca_name, ca_port);
> + goto eio_close;
> + }
> +
> engine->user_data = user_data;
> cl_qmap_init(&engine->smps_on_wire);
> - engine->max_smps_on_wire = max_smps_on_wire;
> + engine->cfg = cfg;
> + return (0);
> +
> +eio_close:
> + mad_rpc_close_port(engine->ibmad_port);
> + umad_close_port(engine->umad_fd);
> + return (-EIO);
> }
>
> void smp_engine_destroy(smp_engine_t * engine)
> @@ -221,6 +273,9 @@ void smp_engine_destroy(smp_engine_t * engine)
> cl_qmap_remove_item(&engine->smps_on_wire, item);
> free(item);
> }
> +
> + umad_close_port(engine->umad_fd);
> + mad_rpc_close_port(engine->ibmad_port);
> }
>
> int process_mads(smp_engine_t * engine)
> diff --git a/infiniband-diags/libibnetdisc/test/testleaks.c
> b/infiniband-diags/libibnetdisc/test/testleaks.c
> index da2fc0a..9a91f50 100644
> --- a/infiniband-diags/libibnetdisc/test/testleaks.c
> +++ b/infiniband-diags/libibnetdisc/test/testleaks.c
> @@ -54,8 +54,6 @@
> char *argv0 = "iblinkinfotest";
> static FILE *f;
>
> -static int timeout_ms = 500;
> -
> void usage(void)
> {
> fprintf(stderr,
> @@ -88,9 +86,6 @@ int main(int argc, char **argv)
> ib_portid_t port_id;
> int iters = -1;
>
> - struct ibmad_port *ibmad_port;
> - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> -
> static char const str_opts[] = "S:D:n:C:P:t:shuf:i:";
> static const struct option long_opts[] = {
> {"S", 1, 0, 'S'},
> @@ -139,7 +134,7 @@ int main(int argc, char **argv)
> iters = (int)strtol(optarg, NULL, 0);
> break;
> case 't':
> - timeout_ms = strtoul(optarg, 0, 0);
> + config.timeout_ms = strtoul(optarg, 0, 0);
> break;
> case 'S':
> guid = (uint64_t) strtoull(optarg, 0, 0);
> @@ -152,15 +147,11 @@ int main(int argc, char **argv)
> argc -= optind;
> argv += optind;
>
> - ibmad_port = mad_rpc_open_port(ca, ca_port, mgmt_classes, 2);
> -
> - mad_rpc_set_timeout(ibmad_port, timeout_ms);
> -
> while (iters == -1 || iters-- > 0) {
> if (from) {
> /* only scan part of the fabric */
> str2drpath(&(port_id.drpath), from, 0, 0);
> - if ((fabric = ibnd_discover_fabric(ibmad_port,
> + if ((fabric = ibnd_discover_fabric(ca, ca_port,
> &port_id, &config))
> == NULL) {
> fprintf(stderr, "discover failed\n");
> @@ -168,7 +159,7 @@ int main(int argc, char **argv)
> goto close_port;
> }
> guid = 0;
> - } else if ((fabric = ibnd_discover_fabric(ibmad_port, NULL,
> + } else if ((fabric = ibnd_discover_fabric(ca, ca_port, NULL,
> &config)) == NULL) {
> fprintf(stderr, "discover failed\n");
> rc = 1;
> @@ -179,6 +170,5 @@ int main(int argc, char **argv)
> }
>
> close_port:
> - mad_rpc_close_port(ibmad_port);
> exit(rc);
> }
> diff --git a/infiniband-diags/src/iblinkinfo.c
> b/infiniband-diags/src/iblinkinfo.c
> index 029573f..d0c9b13 100644
> --- a/infiniband-diags/src/iblinkinfo.c
> +++ b/infiniband-diags/src/iblinkinfo.c
> @@ -337,8 +337,10 @@ int main(int argc, char **argv)
> exit(1);
> }
>
> - if (ibd_timeout)
> + if (ibd_timeout) {
> mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> + config.timeout_ms = ibd_timeout;
> + }
>
> node_name_map = open_node_name_map(node_name_map_file);
>
> @@ -371,12 +373,12 @@ int main(int argc, char **argv)
> } else {
> if (resolved >= 0 &&
> !(fabric =
> - ibnd_discover_fabric(ibmad_port, &port_id, &config)))
> + ibnd_discover_fabric(ibd_ca, ibd_ca_port, &port_id,
> &config)))
> IBWARN("Single node discover failed;"
> " attempting full scan\n");
>
> if (!fabric &&
> - !(fabric = ibnd_discover_fabric(ibmad_port, NULL,
> &config))) {
> + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL,
> &config))) {
> fprintf(stderr, "discover failed\n");
> rc = 1;
> goto close_port;
> diff --git a/infiniband-diags/src/ibnetdiscover.c
> b/infiniband-diags/src/ibnetdiscover.c
> index 57f9625..8f08f06 100644
> --- a/infiniband-diags/src/ibnetdiscover.c
> +++ b/infiniband-diags/src/ibnetdiscover.c
> @@ -67,8 +67,6 @@
> #define DIFF_FLAG_DEFAULT (DIFF_FLAG_SWITCH | DIFF_FLAG_CA |
> DIFF_FLAG_ROUTER \
> | DIFF_FLAG_PORT_CONNECTION)
>
> -struct ibmad_port *srcport;
> -
> static FILE *f;
>
> static char *node_name_map_file = NULL;
> @@ -938,9 +936,6 @@ int main(int argc, char **argv)
> ibnd_fabric_t *fabric = NULL;
> ibnd_fabric_t *diff_fabric = NULL;
>
> - struct ibmad_port *ibmad_port;
> - int mgmt_classes[2] = { IB_SMI_CLASS, IB_SMI_DIRECT_CLASS };
> -
> const struct ibdiag_opt opts[] = {
> {"show", 's', 0, NULL, "show more information"},
> {"list", 'l', 0, NULL, "list of connected nodes"},
> @@ -975,12 +970,8 @@ int main(int argc, char **argv)
> argc -= optind;
> argv += optind;
>
> - ibmad_port = mad_rpc_open_port(ibd_ca, ibd_ca_port, mgmt_classes, 2);
> - if (!ibmad_port)
> - IBERROR("Failed to open %s port %d", ibd_ca, ibd_ca_port);
> -
> if (ibd_timeout)
> - mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> + config.timeout_ms = ibd_timeout;
>
> if (argc && !(f = fopen(argv[0], "w")))
> IBERROR("can't open file %s for writing", argv[0]);
> @@ -996,7 +987,7 @@ int main(int argc, char **argv)
> IBERROR("loading cached fabric failed\n");
> } else {
> if ((fabric =
> - ibnd_discover_fabric(ibmad_port, NULL, &config)) == NULL)
> + ibnd_discover_fabric(ibd_ca, ibd_ca_port, NULL, &config))
> == NULL)
> IBERROR("discover failed\n");
> }
>
> @@ -1017,6 +1008,5 @@ int main(int argc, char **argv)
> if (diff_fabric)
> ibnd_destroy_fabric(diff_fabric);
> close_node_name_map(node_name_map);
> - mad_rpc_close_port(ibmad_port);
> exit(0);
> }
> diff --git a/infiniband-diags/src/ibqueryerrors.c
> b/infiniband-diags/src/ibqueryerrors.c
> index e896254..f04e47f 100644
> --- a/infiniband-diags/src/ibqueryerrors.c
> +++ b/infiniband-diags/src/ibqueryerrors.c
> @@ -600,8 +600,10 @@ int main(int argc, char **argv)
> if (!ibmad_port)
> IBERROR("Failed to open port; %s:%d\n", ibd_ca, ibd_ca_port);
>
> - if (ibd_timeout)
> + if (ibd_timeout) {
> mad_rpc_set_timeout(ibmad_port, ibd_timeout);
> + config.timeout_ms = ibd_timeout;
> + }
>
> node_name_map = open_node_name_map(node_name_map_file);
>
> @@ -633,11 +635,14 @@ int main(int argc, char **argv)
> }
> } else {
> if (resolved >= 0 &&
> - !(fabric = ibnd_discover_fabric(ibmad_port, &portid, 0)))
> + !(fabric = ibnd_discover_fabric(ibd_ca, ibd_ca_port,
> + &portid, &config)))
> IBWARN("Single node discover failed;"
> " attempting full scan");
>
> - if (!fabric && !(fabric = ibnd_discover_fabric(ibmad_port, NULL,
> + if (!fabric && !(fabric = ibnd_discover_fabric(ibd_ca,
> + ibd_ca_port,
> + NULL,
> &config))) {
> fprintf(stderr, "discover failed\n");
> rc = 1;
> --
> 1.5.4.5
>
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html