Hello all,

I am trying to run on the new skylake processors on Stampede2 and while the run speeds we are obtaining are very good, we are concerned that we aren't optimizing properly when it comes to OpenMP.  For instance, we see the best speeds when we use 8 MPI processors per node (with 6 threads each for a total of 48 total threads/node).  Based on the architecture, we were expecting to see the best speeds with 2 MPI/node.  Here is what I have tried:

1. Using the simfactory files for stampede2-skx (config file, run and
   submit scripts, and modules loaded) I compiled a version of
   ET_2017_06 using LazEv (RIT's evolution thorn) and McLachlan and
   submitted a series of runs that change both the number of nodes
   used, and how I distribute the 48 threads/node between MPI processes.
2. I use a standard low resolution grid, with no IO or regridding. 
   Parameter file attached.
3. Run speeds are measured from Carpet::physical_time_per_hour at
   iteration 256.
4. I tried both with and without hwloc/SystemTopology.
5. For both McLachlan and LazEv, I see similar results, with 2 MPI/node
   giving the worst results (see attached plot for McLachlan) and a
   slight preferences for 8 MPI/node.

So my questions are:

1. Has there been any tests run by any other users on stampede2 skx?
2. Should we expect 2 MPI/node to be the optimal choice?
3. If so, are there any other configurations we can try that could help
   optimize?

Thanks in advance!

Jim Healy

# rl    dx              Resolution      Res:m-          Res:m+          rm      
        rp              NNm     NNp
# 0     2.38095238      M/0.4200        m-/0.1930       m+/0.2270       
400.000000      400.000000      168     168
# 1     1.19047619      M/0.8400        m-/0.3859       m+/0.4541       
225.000000      225.000000      189     189
# 2     0.59523810      M/1.6800        m-/0.7719       m+/0.9081       
150.000000      150.000000      252     252
# 3     0.29761905      M/3.3600        m-/1.5438       m+/1.8162       
35.000000       35.000000       117     117
# 4     0.14880952      M/6.7200        m-/3.0876       m+/3.6324       
10.000000       10.000000       67      67
# 5     0.07440476      M/13.4400       m-/6.1751       m+/7.2649       
4.800000        4.800000        64      64
# 6     0.03720238      M/26.8800       m-/12.3503      m+/14.5297      
2.400000        2.400000        64      64
# 7     0.01860119      M/53.7600       m-/24.7005      m+/29.0595      
1.200000        1.200000        64      64
# 8     0.00930060      M/107.5200      m-/49.4011      m+/58.1189      
0.700000        0.700000        75      75
# 9     0.00465030      M/215.0400      m-/98.8022      m+/116.2378     
0.500000        0.250000        107     53
# 10    0.00232515      M/430.0800      m-/197.6043     m+/232.4757     
0.250000        0.125000        107     53
# dt = 0.000775049603174603
# m+ = 0.54054054054054
# m- = 0.459459459459459

ActiveThorns = "admbase admcoupling admmacros coordgauge spacemask 
StaticConformal  boundary time cartgrid3d ioutil CoordBase aeilocalinterp Slab 
SphericalSurface LocalReduce MoL Carpet CarpetInterp CarpetIOASCII CarpetLib 
CarpetReduce CarpetSlab CarpetRegrid2 CarpetIOHDF5 CarpetIOScalar TwoPunctures 
InitBase SymBase LoopControl GSL ReflectionSymmetry CarpetIOBasic"

#############################################################
# Grid
#############################################################

CartGrid3D::type                        = "coordbase"
CartGrid3D::domain                      = "full"
CartGrid3D::avoid_origin                = "no"

CoordBase::domainsize                   = minmax
CoordBase::xmin                         = -400
CoordBase::ymin                         = -400
CoordBase::zmin                         = 0
CoordBase::xmax                         = 400
CoordBase::ymax                         = 400
CoordBase::zmax                         = 400
CoordBase::dx                           = 2.38095238095238
CoordBase::dy                           = 2.38095238095238
CoordBase::dz                           = 2.38095238095238
CoordBase::boundary_size_x_lower        = 4
CoordBase::boundary_size_y_lower        = 4
CoordBase::boundary_size_z_lower        = 4
CoordBase::boundary_shiftout_x_lower    = 0
CoordBase::boundary_shiftout_y_lower    = 0
CoordBase::boundary_shiftout_z_lower    = 1
CoordBase::boundary_size_x_upper        = 4
CoordBase::boundary_size_y_upper        = 4
CoordBase::boundary_size_z_upper        = 4
CoordBase::boundary_shiftout_x_upper    = 0
CoordBase::boundary_shiftout_y_upper    = 0
CoordBase::boundary_shiftout_z_upper    = 0

#############################################################
# Symmetries
#############################################################

ReflectionSymmetry::reflection_x        = "no"
ReflectionSymmetry::reflection_y        = "no"
ReflectionSymmetry::reflection_z        = "yes"
ReflectionSymmetry::avoid_origin_x      = "no"
ReflectionSymmetry::avoid_origin_y      = "no"
ReflectionSymmetry::avoid_origin_z      = "no"

#RotatingSymmetry180::poison_boundaries  = "yes"

#############################################################
# Run statistics
#############################################################

#TimerReport::out_every       = 1024
#TimerReport::out_filename    = "TimerReport"

#############################################################
# CarpetRegrid2
#############################################################

CarpetRegrid2::regrid_every = 2048 #2048
Carpet::grid_coordinates_filename = "grid.asc" 
CarpetRegrid2::symmetry_rotating180   = "no"

CarpetRegrid2::num_centres = 3

CarpetRegrid2::num_levels_1 = 11
CarpetRegrid2::position_x_1 = 5.97297297297297
CarpetRegrid2::position_y_1 = 0
CarpetRegrid2::position_z_1 = 0
CarpetRegrid2::radius_1[ 1] = 0
CarpetRegrid2::radius_1[ 2] = 0
CarpetRegrid2::radius_1[ 3] = 35
CarpetRegrid2::radius_1[ 4] = 10
CarpetRegrid2::radius_1[ 5] = 4.8
CarpetRegrid2::radius_1[ 6] = 2.4
CarpetRegrid2::radius_1[ 7] = 1.2
CarpetRegrid2::radius_1[ 8] = 0.7
CarpetRegrid2::radius_1[ 9] = 0.25
CarpetRegrid2::radius_1[10] = 0.125

CarpetRegrid2::num_levels_2 = 11
CarpetRegrid2::position_x_2 = -7.02702702702703
CarpetRegrid2::position_y_2 = 0
CarpetRegrid2::position_z_2 = 0
CarpetRegrid2::radius_2[ 1] = 0
CarpetRegrid2::radius_2[ 2] = 0
CarpetRegrid2::radius_2[ 3] = 35
CarpetRegrid2::radius_2[ 4] = 10
CarpetRegrid2::radius_2[ 5] = 4.8
CarpetRegrid2::radius_2[ 6] = 2.4
CarpetRegrid2::radius_2[ 7] = 1.2
CarpetRegrid2::radius_2[ 8] = 0.7
CarpetRegrid2::radius_2[ 9] = 0.5
CarpetRegrid2::radius_2[10] = 0.25

CarpetRegrid2::num_levels_3  =  3
CarpetRegrid2::position_x_3  =  0
CarpetRegrid2::position_y_3  =  0
CarpetRegrid2::position_z_3  =  0
CarpetRegrid2::radius_3[1]  =  225
CarpetRegrid2::radius_3[2]  =  150

#LazRegrid2::num_levels_on_recover[0]=10
#LazRegrid2::num_levels_on_recover[1]=10

#############################################################
# SphericalSurface
#############################################################

SphericalSurface::nsurfaces = 3
SphericalSurface::maxntheta = 39
SphericalSurface::maxnphi   = 76

SphericalSurface::ntheta      [0] = 39
SphericalSurface::nphi        [0] = 76
SphericalSurface::nghoststheta[0] = 2
SphericalSurface::nghostsphi  [0] = 2

SphericalSurface::ntheta      [1] = 39
SphericalSurface::nphi        [1] = 76
SphericalSurface::nghoststheta[1] = 2
SphericalSurface::nghostsphi  [1] = 2

SphericalSurface::ntheta      [2] = 39
SphericalSurface::nphi        [2] = 76
SphericalSurface::nghoststheta[2] = 2
SphericalSurface::nghostsphi  [2] = 2

#############################################################
# Carpet
#############################################################

driver::ghost_size                      = 4
Carpet::domain_from_coordbase           = "yes"
Carpet::prolongation_order_space        = 5
Carpet::prolongation_order_time         = 2
Carpet::max_refinement_levels           = 11
Carpet::use_buffer_zones                = "yes"
#Carpet::num_integrator_substeps         = 1
#Carpet::additional_buffer_zones         = 2
Carpet::verbose                         = "no"
Carpet::veryverbose                     = "no"
Carpet::schedule_barriers               = "no"

Carpet::init_3_timelevels               = "yes"
Carpet::init_each_timelevel             = "no"
Carpet::init_fill_timelevels            = "no"
Carpet::enable_all_storage              = "no"
Carpet::regrid_during_recovery          = "no"

Carpet::refinement_factor              = 2
#Carpet::time_refinement_factors        = "[1,1,1,2,4,8,16,32,64,128]"
#Carpet::poison_new_timelevels          = "yes"
#Carpet::check_for_poison               = "no"
#Carpet::poison_value                   = 113
#Carpet::use_tapered_grids              = "no"
#Carpet::output_timers_every             = 1024
#Carpet::print_timestats_every           = 0


#############################################################
# CarpetLib
#############################################################

CarpetLib::output_bboxes  = no
CarpetLib::check_bboxes              = no
CarpetLib::interleave_communications = yes
CarpetLib::combine_sends             = yes
CarpetLib::print_memstats_every      = 1024
#CarpetLib::max_memory_size_MB        = 3100
#CarpetLib::poison_new_memory            = "yes"
#CarpetLib::poison_value                 = 114

#############################################################
# Time integration
#############################################################

Cactus::terminate                     = "any"
Cactus::max_runtime                   = 2850
Cactus::cctk_final_time               = 4999
Cactus::cctk_itlast                   = 256
Cactus::cctk_timer_output             = "full"
Cactus::highlight_warning_messages    = "no"

Time::dtfac                           = 0.333333333333333

MethodOfLines::ode_method             = "RK4"
MethodOfLines::MoL_NaN_Check          = "no"
MethodOfLines::MoL_Intermediate_Steps = 4
MethodOfLines::MoL_Num_Scratch_Levels = 1

#############################################################
# Initial data
#############################################################

initbase::initial_data_setup_method = init_all_levels

ADMBase::initial_data = "twopunctures"
ADMBase::metric_type  = "Physical"
ADMBase::initial_lapse   = "twopunctures-averaged"
ADMBase::initial_shift   = "zero"
ADMBase::initial_dtlapse = "zero"
ADMBase::initial_dtshift = "zero"

# Uncomment these for fast but very inaccurate initial data
#       TwoPunctures::npoints_A = 6
#       TwoPunctures::npoints_B = 6
#       TwoPunctures::npoints_phi = 6

TwoPunctures::verbose           = "yes"
TwoPunctures::keep_u_around     = no

###TwoPunctures::npoints_A         = 70
##TwoPunctures::npoints_B         = 70
#TwoPunctures::npoints_phi       = 70

TwoPunctures::par_b             = 6.5
TwoPunctures::center_offset[0]  = -0.527027027027027

TwoPunctures::par_m_plus        = 0.529929328970489
TwoPunctures::par_P_plus[0]     = -0.000385046023339
TwoPunctures::par_P_plus[1]     = 0.079132079120352
TwoPunctures::par_P_plus[2]     = 0
TwoPunctures::par_S_plus[0]     = 0
TwoPunctures::par_S_plus[1]     = 0
TwoPunctures::par_S_plus[2]     = 0

TwoPunctures::par_m_minus       = 0.371848649946643
TwoPunctures::par_P_minus[0]    = 0.000385046023339
TwoPunctures::par_P_minus[1]    = -0.079132079120352
TwoPunctures::par_P_minus[2]    = 0
TwoPunctures::par_S_minus[0]    = 0
TwoPunctures::par_S_minus[1]    = 0
TwoPunctures::par_S_minus[2]    = 0.126661796932067

TwoPunctures::Newton_maxit = 10
TwoPunctures::Newton_tol = 7.0e-10
TwoPunctures::grid_setup_method = "evaluation"
TwoPunctures::TP_Tiny = 1e-6

#############################################################
# Evolution system
#############################################################

ActiveThorns = "ML_BSSN ML_BSSN_Helper NewRad"

ADMBase::evolution_method         = "ML_BSSN"
ADMBase::lapse_evolution_method   = "ML_BSSN"
ADMBase::shift_evolution_method   = "ML_BSSN"
ADMBase::dtlapse_evolution_method = "ML_BSSN"
ADMBase::dtshift_evolution_method = "ML_BSSN"

ML_BSSN::fdOrder             = 6
ML_BSSN::harmonicN           = 1      # 1+log
ML_BSSN::harmonicF           = 2.0    # 1+log
ML_BSSN::ShiftGammaCoeff     = 0.75
ML_BSSN::BetaDriver          = 1.0
ML_BSSN::advectLapse         = 1
ML_BSSN::advectShift         = 1

ML_BSSN::MinimumLapse        = 1.0e-8

ML_BSSN::initial_boundary_condition = "extrapolate-gammas"
ML_BSSN::rhs_boundary_condition     = "NewRad"
Boundary::radpower                     = 2

ML_BSSN::ML_log_confac_bound = "none"
ML_BSSN::ML_metric_bound     = "none"
ML_BSSN::ML_Gamma_bound      = "none"
ML_BSSN::ML_trace_curv_bound = "none"
ML_BSSN::ML_curv_bound       = "none"
ML_BSSN::ML_lapse_bound      = "none"
ML_BSSN::ML_dtlapse_bound    = "none"
ML_BSSN::ML_shift_bound      = "none"
ML_BSSN::ML_dtshift_bound    = "none"



ActiveThorns = "Dissipation"

Dissipation::order = 5
Dissipation::vars  = "
        ML_BSSN::ML_log_confac
        ML_BSSN::ML_metric
        ML_BSSN::ML_trace_curv
        ML_BSSN::ML_curv
        ML_BSSN::ML_Gamma
        ML_BSSN::ML_lapse
        ML_BSSN::ML_shift
        ML_BSSN::ML_dtlapse
        ML_BSSN::ML_dtshift
"



ActiveThorns = "ML_ADMConstraints TmunuBase"


#############################################################
# Output
#############################################################

IO::out_dir                          = $parfile
IO::out_fileinfo                     = "all"

CarpetIOBasic::outInfo_every         = 128
CarpetIOBasic::outInfo_vars          = "ML_ADMConstraints::ML_Ham 
Carpet::physical_time_per_hour"
CarpetIOBasic::real_max              = 1e6
CarpetIOBasic::int_width             = 12

CarpetIOScalar::outScalar_every      = 0
CarpetIOScalar::outScalar_reductions = "norm2 minimum"
CarpetIOScalar::outScalar_vars       = ""

CarpetIOASCII::out1D_every           = 0
CarpetIOASCII::out1D_x               = "yes"
CarpetIOASCII::out1D_y               = "no"
CarpetIOASCII::out1D_z               = "no"
CarpetIOASCII::out1D_d               = "no"
CarpetIOASCII::out1D_vars            = ""

#CarpetIOASCII::out2D_every           = 0
#CarpetIOASCII::out2D_vars            = ""
#Carpetioascii::out3D_ghosts          = "yes"
#CarpetIOASCII::out2D_xz              = "no"
#CarpetIOASCII::out2D_yz              = "no"
#CarpetIOASCII::out_precision         = 19

#CarpetIOHDF5::out_every              = 0
#CarpetIOHDF5::out_vars               = ""

CarpetIOHDF5::out2D_every            = 0
CarpetIOHDF5::out2D_vars             = ""

#############################################################
# Checkpoint and recovery
#############################################################

CarpetIOHDF5::checkpoint       = "no"
IO::checkpoint_every_walltime_hours = 6
IO::checkpoint_keep            = 2
IO::checkpoint_dir             = "test_checks"
IO::checkpoint_on_terminate    = "no"

IO::recover                    = "autoprobe"
IO::recover_dir                = "test_checks"
IO::recover_and_remove         = "no"

_______________________________________________
Users mailing list
[email protected]
http://lists.einsteintoolkit.org/mailman/listinfo/users

Reply via email to