Multi-threaded code can race with ucma_init(). When ucma_init() is called it can set cma_dev_cnt before it is done initializing the library. If a second thread checks cma_dev_cnt and finds it non-zero, then it will skip the call to ucma_init() and assume that the library is ready for use. This can lead to an application crash during startup.
Do not set cma_dev_cnt until the end of ucma_init(), after all initialization has completed. Adjust the error handling code in ucma_init() accordingly. Bug reported by Nir Naaman: Looking at cma.c we see that ucma_init initializes things under a lock but it sets cma_dev_cnt right at the start and many other functions (like rdma_create_event_channel in our case) do not check this variable under a lock. So if two threads call rdma_create_event_channel at the same time one of them will do the initialization but the other may see cma_dev_cnt>0 and continue before the first thread completed the initialization. Signed-off-by: Sean Hefty <[email protected]> --- Nir, can you give this patch a try? If it works for you, I'll push it into my git tree and create a new release. src/cma.c | 40 ++++++++++++++++++++++++---------------- 1 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/cma.c b/src/cma.c index 70dbe1c..87e73ac 100644 --- a/src/cma.c +++ b/src/cma.c @@ -205,30 +205,32 @@ static int ucma_init(void) struct ibv_device **dev_list = NULL; struct cma_device *cma_dev; struct ibv_device_attr attr; - int i, ret; + int i, ret, dev_cnt; pthread_mutex_lock(&mut); - if (cma_dev_cnt) - goto out; + if (cma_dev_cnt) { + pthread_mutex_unlock(&mut); + return 0; + } ret = check_abi_version(); if (ret) - goto err; + goto err1; - dev_list = ibv_get_device_list(&cma_dev_cnt); + dev_list = ibv_get_device_list(&dev_cnt); if (!dev_list) { printf("CMA: unable to get RDMA device list\n"); ret = -ENODEV; - goto err; + goto err1; } - cma_dev_array = malloc(sizeof *cma_dev * cma_dev_cnt); + cma_dev_array = malloc(sizeof *cma_dev * dev_cnt); if (!cma_dev_array) { ret = -ENOMEM; - goto err; + goto err2; } - for (i = 0; dev_list[i]; ++i) { + for (i = 0; dev_list[i];) { cma_dev = &cma_dev_array[i]; cma_dev->guid = ibv_get_device_guid(dev_list[i]); @@ -236,28 +238,34 @@ static int ucma_init(void) if (!cma_dev->verbs) { printf("CMA: unable to open RDMA device\n"); ret = -ENODEV; - goto err; + goto err3; } + i++; ret = ibv_query_device(cma_dev->verbs, &attr); if (ret) { printf("CMA: unable to query RDMA device\n"); - goto err; + goto err3; } cma_dev->port_cnt = attr.phys_port_cnt; cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom; cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom; } -out: + + cma_dev_cnt = dev_cnt; pthread_mutex_unlock(&mut); ibv_free_device_list(dev_list); return 0; -err: - ucma_cleanup(); + +err3: + while (i--) + ibv_close_device(cma_dev_array[i].verbs); + free(cma_dev_array); +err2: + ibv_free_device_list(dev_list); +err1: pthread_mutex_unlock(&mut); - if (dev_list) - ibv_free_device_list(dev_list); return ret; } -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to [email protected] More majordomo info at http://vger.kernel.org/majordomo-info.html
