Brice
Apologies, I didn't explain it very well, I do make sure that if the tile size
256*8 < 4096 (pagesize), then I double the number of tiles per page, I just
wanted to keep the explanation simple.
here are some code snippets to give you the flavour of it
initializing the helper sruct
matrix_numa_binder(std::size_t Ncols, std::size_t Nrows,
std::size_t Ntile, std::size_t Ntiles_per_domain,
std::size_t Ncolprocs=1, std::size_t Nrowprocs=1,
std::string pool_name="default"
)
: cols_(Ncols), rows_(Nrows),
tile_size_(Ntile), tiles_per_domain_(Ntiles_per_domain),
colprocs_(Ncolprocs), rowprocs_(Nrowprocs)
{
using namespace hpx::compute::host;
binding_helper::pool_name_ = pool_name;
const int CACHE_LINE_SIZE = sysconf (_SC_LEVEL1_DCACHE_LINESIZE);
const int PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
const int ALIGNMENT = std::max(PAGE_SIZE,CACHE_LINE_SIZE);
const int ELEMS_ALIGN = (ALIGNMENT/sizeof(T));
rows_page_= ELEMS_ALIGN;
leading_dim_ = ELEMS_ALIGN*((rows_*sizeof(T) +
ALIGNMENT-1)/ALIGNMENT);
tiles_per_domain_ = std::max(tiles_per_domain_, ELEMS_ALIGN/tile_size_);
}
operator called by allocator which returns the domain index to bind a page to
virtual std::size_t operator ()(
const T * const base_ptr, const T * const page_ptr,
const std::size_t pagesize, const std::size_t domains) const
override
{
std::size_t offset = (page_ptr - base_ptr);
std::size_t col = (offset / leading_dim_);
std::size_t row = (offset % leading_dim_);
std::size_t index = (col / (tile_size_ * tiles_per_domain_));
if ((tile_size_*tiles_per_domain_*sizeof(T))>=pagesize) {
index += (row / (tile_size_ * tiles_per_domain_));
}
else {
HPX_ASSERT(0);
}
return index % domains;
}
this function is called by each thread (one per numa domain) and if the domain
returned by the page query matches the domain ID of the thread/task then the
first memory location on the page is written to
for (size_type i=0; ioperator()(p, page_ptr, pagesize,
nodesets.size());
if (dom==numa_domain) {
// trigger a memory read and rewrite without changing
contents
volatile char* vaddr = (volatile char*) page_ptr;
*vaddr = T(0); // *vaddr;
}
page_ptr += pageN;
}
All of this has been debugged quite extensively and I can write numbers to
memory and read them back and the patterns always match the domains expected.
This function is called after all data is written to attempt to verify (and
display the patterns above)
int topology::get_numa_domain(const void *addr) const
{
#if HWLOC_API_VERSION >= 0x00010b06
hpx_hwloc_bitmap_wrapper *nodeset = topology::bitmap_storage_.get();
if (nullptr == nodeset)
{
hwloc_bitmap_t nodeset_ = hwloc_bitmap_alloc();
topology::bitmap_storage_.reset(new
hpx_hwloc_bitmap_wrapper(nodeset_));
nodeset = topology::bitmap_storage_.get();
}
//
hwloc_nodeset_t ns =
reinterpret_cast(nodeset->get_bmp());
int ret = hwloc_get_area_memlocation(topo, addr, 1, ns,
HWLOC_MEMBIND_BYNODESET);
if (ret<0) {
std::string msg(strerror(errno));
HPX_THROW_EXCEPTION(kernel_error
, "hpx::threads::topology::get_numa_domain"
, "hwloc_get_area_memlocation failed " + msg);
return -1;
}
// this uses hwloc directly
//int bit = hwloc_bitmap_first(ns);
//return bit
// this uses an alternative method, both give the same result AFAICT
threads::mask_type mask = bitmap_to_mask(ns, HWLOC_OBJ_NUMANODE);
return static_cast(threads::find_first(mask));
#else
return 0;
#endif
}
Thanks for taking the time to look it over
JB
___
hwloc-users mailing list
hwloc-users@lists.open-mpi.org
https://lists.open-mpi.org/mailman/listinfo/hwloc-users