I've spent hours trying to fix this commit so openib would even compile again, 
but failed. Just too many errors. Setting aside the need to include 
<sys/types.h>, <sys/stat.h>, and <unistd.h> to handle the stat call under 
linux, there is no function "read_module_param" anywhere, nor is "device" 
defined in btl_openib_component.c

Please - a tad more care in what gets committed??

I finally just reverted it so the trunk could build.

On Jul 18, 2012, at 10:29 AM, svn-commit-mai...@open-mpi.org wrote:

> Author: hjelmn (Nathan Hjelm)
> Date: 2012-07-18 13:29:48 EDT (Wed, 18 Jul 2012)
> New Revision: 26804
> URL: https://svn.open-mpi.org/trac/ompi/changeset/26804
> 
> Log:
> btl/openib: limit each process to a ppn fraction of the available registered 
> memory when using mellanox hardware (mlx4 and mthca)
> 
> Text files modified: 
>   trunk/ompi/mca/btl/openib/btl_openib.c            |    74 
> ++++++++++++++++++++++++++++++++++++++- 
>   trunk/ompi/mca/btl/openib/btl_openib.h            |     4 ++                
>                       
>   trunk/ompi/mca/btl/openib/btl_openib_component.c  |    15 ++++++++          
>                       
>   trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt |    19 ++++++++++        
>                       
>   4 files changed, 110 insertions(+), 2 deletions(-)
> 
> Modified: trunk/ompi/mca/btl/openib/btl_openib.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib.c    Wed Jul 18 13:29:37 2012        
> (r26803)
> +++ trunk/ompi/mca/btl/openib/btl_openib.c    2012-07-18 13:29:48 EDT (Wed, 
> 18 Jul 2012)      (r26804)
> @@ -70,6 +70,10 @@
> #ifdef HAVE_UNISTD_H
> #include <unistd.h>
> #endif
> +#ifdef OPAL_HAVE_HWLOC
> +#include "opal/mca/hwloc/hwloc.h"
> +#endif
> +
> #ifndef MIN
> #define MIN(a,b) ((a)<(b)?(a):(b))
> #endif
> @@ -579,6 +583,65 @@
>     return OMPI_SUCCESS;
> }
> 
> +/* calculate memory registation limits */
> +static uint64_t calculate_total_mem (void)
> +{
> +#if OPAL_HAVE_HWLOC
> +    hwloc_obj_t machine;
> +
> +    machine = hwloc_get_next_obj_by_type (opal_hwloc_topology, 
> HWLOC_OBJ_MACHINE, NULL);
> +    if (NULL == machine) {
> +        return 0;
> +    }
> +    
> +    return machine->memory.total_memory;
> +#else
> +    return 0;
> +#endif
> +}
> +
> +static uint64_t calculate_max_reg (void) 
> +{
> +    struct stat statinfo;
> +    uint64_t mtts_per_seg = 1;
> +    uint64_t num_mtt = 1 << 19;
> +    uint64_t reserved_mtt = 0;
> +    uint64_t max_reg, mem_total;
> +
> +    mem_total = calculate_total_mem ();
> +
> +    if (0 == stat("/sys/module/mlx4_core/parameters", &statinfo)) {
> +        mtts_per_seg = 1 << 
> read_module_param("/sys/module/mlx4_core/parameters/log_mtts_per_seg", 1);
> +        num_mtt = 1 << 
> read_module_param("/sys/module/mlx4_core/parameters/log_num_mtt", 20);
> +        if (1 == num_mtt) {
> +            /* NTH: is 19 a minimum? when log_num_mtt is set to 0 use 19 */
> +            num_mtt = 1 << 20;
> +        }
> +
> +        max_reg = (num_mtt - reserved_mtt) * getpagesize () * mtts_per_seg;
> +    } else if (0 == stat("/sys/module/ib_mthca/parameters", &statinfo)) {
> +        mtts_per_seg = 1 << 
> read_module_param("/sys/module/ib_mthca/parameters/log_mtts_per_seg", 1);
> +        num_mtt = 
> read_module_param("/sys/module/ib_mthca/parameters/num_mtt", 1 << 20);
> +        reserved_mtt = 
> read_module_param("/sys/module/ib_mthca/parameters/fmr_reserved_mtts", 0);
> +
> +        max_reg = (num_mtt - reserved_mtt) * getpagesize () * mtts_per_seg;
> +    } else {
> +        /* need to update to determine the registration limit for this 
> configuration */
> +        max_reg = mem_total;
> +    }
> +
> +    /* NTH: print a warning if we can't register more than 75% of physical 
> memory */
> +    if (max_reg < mem_total * 3 / 4) {
> +        orte_show_help("help-mpi-btl-openib.txt", "reg mem limit low", true,
> +                       orte_process_info.nodename, (unsigned long)(max_reg 
> >> 20),
> +                       (unsigned long)(mem_total >> 20));
> +    }
> +
> +    /* limit us to 87.5% of the registered memory (some fluff for QPs, file 
> systems, etc) */
> +    return (max_reg * 7) >> 3;
> +}
> +
> +
> /*
>  *  add a proc to this btl module
>  *    creates an endpoint that is setup on the
> @@ -592,7 +655,7 @@
>     opal_bitmap_t* reachable)
> {
>     mca_btl_openib_module_t* openib_btl = (mca_btl_openib_module_t*)btl;
> -    int i,j, rc;
> +    int i,j, rc, local_procs;
>     int rem_subnet_id_port_cnt;
>     int lcl_subnet_id_port_cnt = 0;
>     int btl_rank = 0;
> @@ -621,13 +684,17 @@
>     }
> #endif
> 
> -    for (i = 0; i < (int) nprocs; i++) {
> +    for (i = 0, local_procs = 0 ; i < (int) nprocs; i++) {
>         struct ompi_proc_t* ompi_proc = ompi_procs[i];
>         mca_btl_openib_proc_t* ib_proc;
>         int remote_matching_port;
> 
>         opal_output(-1, "add procs: adding proc %d", i);
> 
> +        if (OPAL_PROC_ON_LOCAL_NODE(ompi_proc->proc_flags)) {
> +            local_procs ++;
> +        }
> +
>         /* OOB, XOOB, and RDMACM do not support SELF comunication, so
>          * mark the prco as unreachable by openib btl  */
>         if (OPAL_EQUAL == orte_util_compare_name_fields
> @@ -794,6 +861,9 @@
>         peers[i] = endpoint;
>     }
> 
> +    openib_btl->local_procs += local_procs;
> +    openib_btl->device->mem_reg_max = calculate_max_reg () / 
> openib_btl->local_procs;
> +
>     return mca_btl_openib_size_queues(openib_btl, nprocs);
> }
> 
> 
> Modified: trunk/ompi/mca/btl/openib/btl_openib.h
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib.h    Wed Jul 18 13:29:37 2012        
> (r26803)
> +++ trunk/ompi/mca/btl/openib/btl_openib.h    2012-07-18 13:29:48 EDT (Wed, 
> 18 Jul 2012)      (r26804)
> @@ -390,6 +390,8 @@
>     mca_btl_openib_device_qp_t *qps;
>     /* Maximum value supported by this device for max_inline_data */
>     uint32_t max_inline_data;
> +    /* Registration limit and current count */
> +    uint64_t mem_reg_max, mem_reg_active;
> } mca_btl_openib_device_t;
> OBJ_CLASS_DECLARATION(mca_btl_openib_device_t);
> 
> @@ -467,6 +469,8 @@
>     mca_btl_base_module_error_cb_fn_t error_cb; /**< error handler */
> 
>     mca_btl_openib_module_qp_t * qps;
> +
> +    int local_procs;                   /** number of local procs */
> };
> typedef struct mca_btl_openib_module_t mca_btl_openib_module_t;
> 
> 
> Modified: trunk/ompi/mca/btl/openib/btl_openib_component.c
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/btl_openib_component.c  Wed Jul 18 13:29:37 
> 2012        (r26803)
> +++ trunk/ompi/mca/btl/openib/btl_openib_component.c  2012-07-18 13:29:48 EDT 
> (Wed, 18 Jul 2012)      (r26804)
> @@ -596,6 +596,13 @@
>     enum ibv_access_flags access_flag = (enum ibv_access_flags) 
> (IBV_ACCESS_LOCAL_WRITE |
>         IBV_ACCESS_REMOTE_WRITE | IBV_ACCESS_REMOTE_READ);
> 
> +    if (device->mem_reg_max &&
> +        device->mem_reg_max < (device->mem_reg_active + size)) {
> +        return OMPI_ERR_OUT_OF_RESOURCE;
> +    }
> +
> +    device->mem_reg_active += size;
> +
> #if HAVE_DECL_IBV_ACCESS_SO
>     if (reg->flags & MCA_MPOOL_FLAGS_SO_MEM) {
>         access_flag |= IBV_ACCESS_SO;
> @@ -637,6 +644,9 @@
> #endif
> 
>     }
> +
> +    device->mem_reg_active -= (uint64_t) (reg->bound - reg->base + 1);
> +
>     openib_reg->mr = NULL;
>     return OMPI_SUCCESS;
> }
> @@ -818,6 +828,7 @@
> 
>             openib_btl->cpcs = NULL;
>             openib_btl->num_cpcs = 0;
> +            openib_btl->local_procs = 0;
> 
>             mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbfunc = 
> btl_openib_control;
>             mca_btl_base_active_message_trigger[MCA_BTL_TAG_IB].cbdata = NULL;
> @@ -1670,6 +1681,10 @@
>         return OMPI_ERR_OUT_OF_RESOURCE;
>     }
> 
> +    device->mem_reg_active = 0;
> +    /* NTH: set some high default until we know how many local peers we have 
> */
> +    device->mem_reg_max    = 1ull << 48;
> +
>     device->ib_dev = ib_dev;
>     device->ib_dev_context = ibv_open_device(ib_dev);
>     device->ib_pd = NULL;
> 
> Modified: trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt
> ==============================================================================
> --- trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt Wed Jul 18 13:29:37 
> 2012        (r26803)
> +++ trunk/ompi/mca/btl/openib/help-mpi-btl-openib.txt 2012-07-18 13:29:48 EDT 
> (Wed, 18 Jul 2012)      (r26804)
> @@ -689,3 +689,22 @@
> 
> Use "ibv_devinfo -v" on the local host to see the GID table of this
> device.
> +[reg mem limit low]
> +WARNING: It appears that your OpenFabrics subsystem is configured to only
> +allow registering part of your physical memory.  This can cause MPI jobs to
> +run with erratic performance, hang, and/or crash.
> +
> +This may be caused by your OpenFabrics vendor limiting the amount of
> +physical memory that can be registered.  You should investigate the
> +relevant Linux kernel module parameters that control how much physical
> +memory can be registered, and increase them to allow registering all
> +physical memory on your machine.
> +
> +See this Open MPI FAQ item for more information on these Linux kernel module
> +parameters:
> +
> +    http://www.open-mpi.org/faq/?category=openfabrics#ib-locked-pages
> +
> +  Local host:              %s
> +  Registerable memory:     %lu MiB
> +  Total memory:            %lu MiB
> _______________________________________________
> svn mailing list
> s...@open-mpi.org
> http://www.open-mpi.org/mailman/listinfo.cgi/svn


Reply via email to