We ran across a problem at a customer's site where the qp array in the mcg entry was being filling up and denying further qp attaches. In addition, the upcoming SRIOV support is expected to increase use of this array as well for unicast address steering (or so the comments in a patch Yevgeny's original patch that statically increased the size of the mcg entries stated). But, since increasing the size of this to an arbitrarily large number is just a waste of memory, and since we don't know that 0x200 will be large enough for all use cases, make the option a module parameter instead. This has been tested at our customer's site and solves their problem.
commit ff608ce370b49d2e5b614ff91f4e23b5deaac8a4 Author: Doug Ledford <dledf...@redhat.com> Date: Wed Mar 23 12:20:47 2011 -0400 mlx4: make the size of the mcg entry a module parameter Testing showed that the default size of 0x100 could be overrun. Bumping to 0x200 would fix the problem, but only until we hit the cap again, and at the expense of making memory consumption in all scenarios worse. So, make the size of the mcg entry a module parameter and let those people who need to bump the size do so. Signed-off-by: Doug Ledford <dledf...@redhat.com> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index c7a6213..322f0af 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -158,7 +158,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->masked_atomic_cap = IB_ATOMIC_HCA; props->max_pkeys = dev->dev->caps.pkey_table_len[1]; props->max_mcast_grp = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms; - props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm; + props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mcg; props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = (1 << (32 - ilog2(dev->dev->caps.num_mpts))) - 1; diff --git a/drivers/net/mlx4/fw.c b/drivers/net/mlx4/fw.c index 5de1db8..800cb2d 100644 --- a/drivers/net/mlx4/fw.c +++ b/drivers/net/mlx4/fw.c @@ -434,8 +434,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->reserved_mrws, dev_cap->reserved_mtts); mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n", dev_cap->max_pds, dev_cap->reserved_pds, dev_cap->reserved_uars); - mlx4_dbg(dev, "Max QP/MCG: %d, reserved MGMs: %d\n", - dev_cap->max_pds, dev_cap->reserved_mgms); + mlx4_dbg(dev, "Max QP/MCG: %d, Max MCGs: %d, reserved MGMs: %d\n", + dev_cap->max_qp_per_mcg, dev_cap->max_mcgs, dev_cap->reserved_mgms); mlx4_dbg(dev, "Max CQEs: %d, max WQEs: %d, max SRQ WQEs: %d\n", dev_cap->max_cq_sz, dev_cap->max_qp_sz, dev_cap->max_srq_sz); mlx4_dbg(dev, "Local CA ACK delay: %d, max MTU: %d, port width cap: %d\n", diff --git a/drivers/net/mlx4/main.c b/drivers/net/mlx4/main.c index 2765a3c..5306141 100644 --- a/drivers/net/mlx4/main.c +++ b/drivers/net/mlx4/main.c @@ -101,6 +101,10 @@ module_param_named(use_prio, use_prio, bool, 0444); MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports " "(0/1, default 0)"); +static int log_mcg_size = 8; +module_param_named(log_mcg_size, log_mcg_size, int, 0444); +MODULE_PARM_DESC(log_mcg_size, "Log2 size of MCG struct (8-11)"); + static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG); module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444); MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)"); @@ -203,7 +207,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_srqs = dev_cap->reserved_srqs; dev->caps.max_sq_desc_sz = dev_cap->max_sq_desc_sz; dev->caps.max_rq_desc_sz = dev_cap->max_rq_desc_sz; - dev->caps.num_qp_per_mgm = MLX4_QP_PER_MGM; + dev->caps.max_qp_per_mcg = dev_cap->max_qp_per_mcg; + dev->caps.max_mcgs = dev_cap->max_mcgs; + i = 0; + do { + dev->caps.mcg_entry_size = 1 << (log_mcg_size - i++); + dev->caps.num_qp_per_mcg = 4 * (dev->caps.mcg_entry_size / 16 - 2); + } while (dev->caps.num_qp_per_mcg > dev->caps.max_qp_per_mcg); + /* * Subtract 1 from the limit because we need to allocate a * spare CQE so the HCA HW can tell the difference between an @@ -642,7 +653,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap, * and it's a lot easier than trying to track ref counts. */ err = mlx4_init_icm_table(dev, &priv->mcg_table.table, - init_hca->mc_base, MLX4_MGM_ENTRY_SIZE, + init_hca->mc_base, dev->caps.mcg_entry_size, dev->caps.num_mgms + dev->caps.num_amgms, dev->caps.num_mgms + dev->caps.num_amgms, 0, 0); @@ -1330,6 +1341,11 @@ static int __init mlx4_verify_params(void) return -1; } + if ((log_mcg_size < 8) || (log_mcg_size > 11)) { + printk(KERN_WARNING "mlx4_core: bad log_mcg_size: %d\n", log_mcg_size); + return -1; + } + return 0; } diff --git a/drivers/net/mlx4/mcg.c b/drivers/net/mlx4/mcg.c index 79cf42d..70e2ba6 100644 --- a/drivers/net/mlx4/mcg.c +++ b/drivers/net/mlx4/mcg.c @@ -45,7 +45,7 @@ struct mlx4_mgm { __be32 members_count; u32 reserved[2]; u8 gid[16]; - __be32 qp[MLX4_QP_PER_MGM]; + __be32 qp[MLX4_MAX_QP_PER_MGM]; }; static const u8 zero_gid[16]; /* automatically initialized to 0 */ @@ -185,12 +185,12 @@ int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16], } index += dev->caps.num_mgms; - memset(mgm, 0, sizeof *mgm); + memset(mgm, 0, dev->caps.mcg_entry_size); memcpy(mgm->gid, gid, 16); } members_count = be32_to_cpu(mgm->members_count) & 0xffffff; - if (members_count == MLX4_QP_PER_MGM) { + if (members_count == dev->caps.num_qp_per_mcg) { mlx4_err(dev, "MGM at index %x is full.\n", index); err = -ENOMEM; goto out; diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h index 0da5bb72..1818d84 100644 --- a/drivers/net/mlx4/mlx4.h +++ b/drivers/net/mlx4/mlx4.h @@ -58,8 +58,8 @@ enum { }; enum { - MLX4_MGM_ENTRY_SIZE = 0x100, - MLX4_QP_PER_MGM = 4 * (MLX4_MGM_ENTRY_SIZE / 16 - 2), + MLX4_MAX_MGM_ENTRY_SIZE = 1 << 11, + MLX4_MAX_QP_PER_MGM = 4 * (MLX4_MAX_MGM_ENTRY_SIZE / 16 - 2), MLX4_MTT_ENTRY_PER_SEG = 8 }; diff --git a/drivers/net/mlx4/profile.c b/drivers/net/mlx4/profile.c index e749f82..2f19831 100644 --- a/drivers/net/mlx4/profile.c +++ b/drivers/net/mlx4/profile.c @@ -99,7 +99,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, profile[MLX4_RES_DMPT].size = dev_cap->dmpt_entry_sz; profile[MLX4_RES_CMPT].size = dev_cap->cmpt_entry_sz; profile[MLX4_RES_MTT].size = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz; - profile[MLX4_RES_MCG].size = MLX4_MGM_ENTRY_SIZE; + profile[MLX4_RES_MCG].size = dev->caps.mcg_entry_size; profile[MLX4_RES_QP].num = request->num_qp; profile[MLX4_RES_RDMARC].num = request->num_qp * request->rdmarc_per_qp; @@ -220,7 +220,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev, dev->caps.num_mgms = profile[i].num >> 1; dev->caps.num_amgms = profile[i].num >> 1; init_hca->mc_base = profile[i].start; - init_hca->log_mc_entry_sz = ilog2(MLX4_MGM_ENTRY_SIZE); + init_hca->log_mc_entry_sz = ilog2(dev->caps.mcg_entry_size); init_hca->log_mc_table_sz = profile[i].log_num; init_hca->log_mc_hash_sz = profile[i].log_num - 1; break; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 0492146..2a86dbd 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -230,10 +230,13 @@ struct mlx4_caps { int reserved_mtts; int reserved_mrws; int reserved_uars; + int max_qp_per_mcg; + int max_mcgs; + int mcg_entry_size; + int num_qp_per_mcg; int num_mgms; int num_amgms; int reserved_mcgs; - int num_qp_per_mgm; int num_pds; int reserved_pds; int mtt_entry_sz; -- Doug Ledford <dledf...@redhat.com> GPG KeyID: CFBFF194 http://people.redhat.com/dledford Infiniband specific RPMs available at http://people.redhat.com/dledford/Infiniband
signature.asc
Description: This is a digitally signed message part