I've spent hours trying to fix this commit so openib would even compile again, but failed. Just too many errors. Setting aside the need to include <sys/types.h>, <sys/stat.h>, and <unistd.h> to handle the stat call under linux, there is no function "read_module_param" anywhere, nor is "device" defined in btl_openib_component.c
Please - a tad more care in what gets committed?? I finally just reverted it so the trunk could build. On Jul 18, 2012, at 10:29 AM, svn-commit-mai...@open-mpi.org wrote: > Author: hjelmn (Nathan Hjelm) > Date: 2012-07-18 13:29:48 EDT (Wed, 18 Jul 2012) > New Revision: 26804 > URL: https://svn.open-mpi.org/trac/ompi/changeset/26804 > > Log: > btl/openib: limit each process to a ppn fraction of the available registered > memory when using mellanox hardware (mlx4 and mthca) > > Text files modified: > trunk/ompi/mca/btl/openib/btl_openib.c | 74 > ++++++++++++++++++++++++++++++++++++++- > trunk/ompi/mca/btl/openib/btl_openib.h | 4 ++ > > trunk/ompi/mca/btl/openib/btl_openib_component.c | 15 ++++++++ > > trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt | 19 ++++++++++ > > 4 files changed, 110 insertions(+), 2 deletions(-) > > Modified: trunk/ompi/mca/btl/openib/btl_openib.c > ============================================================================== > --- trunk/ompi/mca/btl/openib/btl_openib.c Wed Jul 18 13:29:37 2012 > (r26803) > +++ trunk/ompi/mca/btl/openib/btl_openib.c 2012-07-18 13:29:48 EDT (Wed, > 18 Jul 2012) (r26804) > @@ -70,6 +70,10 @@ > #ifdef HAVE_UNISTD_H > #include <unistd.h> > #endif > +#ifdef OPAL_HAVE_HWLOC > +#include "opal/mca/hwloc/hwloc.h" > +#endif > + > #ifndef MIN > #define MIN(a,b) ((a)<(b)?(a):(b)) > #endif > @@ -579,6 +583,65 @@ > return OMPI_SUCCESS; > } > > +/* calculate memory registation limits */ > +static uint64_t calculate_total_mem (void) > +{ > +#if OPAL_HAVE_HWLOC > + hwloc_obj_t machine; > + > + machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, > HWLOC_OBJ_MACHINE, NULL); > + if (NULL == machine) { > + return 0; > + } > + > + return machine->memory.total_memory; > +#else > + return 0; > +#endif > +} > + > +static uint64_t calculate_max_reg (void) > +{ > + struct stat statinfo; > + uint64_t mtts_per_seg = 1; > + uint64_t num_mtt = 1 << 19; > + uint64_t reserved_mtt = 0; > + uint64_t max_reg, mem_total; > + > + mem_total = calculate_total_mem (); > + > + if (0 == stat("/sys/module/mlx4_core/parameters", &statinfo)) { > + mtts_per_seg = 1 << > read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1); > + num_mtt = 1 << > read_module_param("/sys/module/mlx4_core/parameters/log_num_mtt", 20); > + if (1 == num_mtt) { > + /* NTH: is 19 a minimum? when log_num_mtt is set to 0 use 19 */ > + num_mtt = 1 << 20; > + } > + > + max_reg = (num_mtt - reserved_mtt) * getpagesize () * mtts_per_seg; > + } else if (0 == stat("/sys/module/ib_mthca/parameters", &statinfo)) { > + mtts_per_seg = 1 << > read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1); > + num_mtt = > read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20); > + reserved_mtt = > read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0); > + > + max_reg = (num_mtt - reserved_mtt) * getpagesize () * mtts_per_seg; > + } else { > + /* need to update to determine the registration limit for this > configuration */ > + max_reg = mem_total; > + } > + > + /* NTH: print a warning if we can't register more than 75% of physical > memory */ > + if (max_reg < mem_total * 3 / 4) { > + orte_show_help("help-mpi-btl-openib.txt", "reg mem limit low", true, > + orte_process_info.nodename, (unsigned long)(max_reg > >> 20), > + (unsigned long)(mem_total >> 20)); > + } > + > + /* limit us to 87.5% of the registered memory (some fluff for QPs, file > systems, etc) */ > + return (max_reg * 7) >> 3; > +} > + > + > /* > * add a proc to this btl module > * creates an endpoint that is setup on the > @@ -592,7 +655,7 @@ > opal_bitmap_t* reachable) > { > mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl; > - int i,j, rc; > + int i,j, rc, local_procs; > int rem_subnet_id_port_cnt; > int lcl_subnet_id_port_cnt = 0; > int btl_rank = 0; > @@ -621,13 +684,17 @@ > } > #endif > > - for (i = 0; i < (int) nprocs; i++) { > + for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) { > struct ompi_proc_t* ompi_proc = ompi_procs[i]; > mca_btl_openib_proc_t* ib_proc; > int remote_matching_port; > > opal_output(-1, "add procs: adding proc %d", i); > > + if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) { > + local_procs ++; > + } > + > /* OOB, XOOB, and RDMACM do not support SELF comunication, so > * mark the prco as unreachable by openib btl */ > if (OPAL_EQUAL == orte_util_compare_name_fields > @@ -794,6 +861,9 @@ > peers[i] = endpoint; > } > > + openib_btl->local_procs += local_procs; > + openib_btl->device->mem_reg_max = calculate_max_reg () / > openib_btl->local_procs; > + > return mca_btl_openib_size_queues(openib_btl, nprocs); > } > > > Modified: trunk/ompi/mca/btl/openib/btl_openib.h > ============================================================================== > --- trunk/ompi/mca/btl/openib/btl_openib.h Wed Jul 18 13:29:37 2012 > (r26803) > +++ trunk/ompi/mca/btl/openib/btl_openib.h 2012-07-18 13:29:48 EDT (Wed, > 18 Jul 2012) (r26804) > @@ -390,6 +390,8 @@ > mca_btl_openib_device_qp_t *qps; > /* Maximum value supported by this device for max_inline_data */ > uint32_t max_inline_data; > + /* Registration limit and current count */ > + uint64_t mem_reg_max, mem_reg_active; > } mca_btl_openib_device_t; > OBJ_CLASS_DECLARATION(mca_btl_openib_device_t); > > @@ -467,6 +469,8 @@ > mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */ > > mca_btl_openib_module_qp_t * qps; > + > + int local_procs; /** number of local procs */ > }; > typedef struct mca_btl_openib_module_t mca_btl_openib_module_t; > > > Modified: trunk/ompi/mca/btl/openib/btl_openib_component.c > ============================================================================== > --- trunk/ompi/mca/btl/openib/btl_openib_component.c Wed Jul 18 13:29:37 > 2012 (r26803) > +++ trunk/ompi/mca/btl/openib/btl_openib_component.c 2012-07-18 13:29:48 EDT > (Wed, 18 Jul 2012) (r26804) > @@ -596,6 +596,13 @@ > enum ibv_access_flags access_flag = (enum ibv_access_flags) > (IBV_ACCESS_LOCAL_WRITE | > IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ); > > + if (device->mem_reg_max && > + device->mem_reg_max < (device->mem_reg_active + size)) { > + return OMPI_ERR_OUT_OF_RESOURCE; > + } > + > + device->mem_reg_active += size; > + > #if HAVE_DECL_IBV_ACCESS_SO > if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) { > access_flag |= IBV_ACCESS_SO; > @@ -637,6 +644,9 @@ > #endif > > } > + > + device->mem_reg_active -= (uint64_t) (reg->bound - reg->base + 1); > + > openib_reg->mr = NULL; > return OMPI_SUCCESS; > } > @@ -818,6 +828,7 @@ > > openib_btl->cpcs = NULL; > openib_btl->num_cpcs = 0; > + openib_btl->local_procs = 0; > > mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = > btl_openib_control; > mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbdata = NULL; > @@ -1670,6 +1681,10 @@ > return OMPI_ERR_OUT_OF_RESOURCE; > } > > + device->mem_reg_active = 0; > + /* NTH: set some high default until we know how many local peers we have > */ > + device->mem_reg_max = 1ull << 48; > + > device->ib_dev = ib_dev; > device->ib_dev_context = ibv_open_device(ib_dev); > device->ib_pd = NULL; > > Modified: trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt > ============================================================================== > --- trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt Wed Jul 18 13:29:37 > 2012 (r26803) > +++ trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt 2012-07-18 13:29:48 EDT > (Wed, 18 Jul 2012) (r26804) > @@ -689,3 +689,22 @@ > > Use "ibv_devinfo -v" on the local host to see the GID table of this > device. > +[reg mem limit low] > +WARNING: It appears that your OpenFabrics subsystem is configured to only > +allow registering part of your physical memory. This can cause MPI jobs to > +run with erratic performance, hang, and/or crash. > + > +This may be caused by your OpenFabrics vendor limiting the amount of > +physical memory that can be registered. You should investigate the > +relevant Linux kernel module parameters that control how much physical > +memory can be registered, and increase them to allow registering all > +physical memory on your machine. > + > +See this Open MPI FAQ item for more information on these Linux kernel module > +parameters: > + > + http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages > + > + Local host: %s > + Registerable memory: %lu MiB > + Total memory: %lu MiB > _______________________________________________ > svn mailing list > s...@open-mpi.org > http://www.open-mpi.org/mailman/listinfo.cgi/svn