Le 08/02/2012 22:33, Paul H. Hargrove a écrit : > Tests on the virtual node I have access to where that problem report > originated is still not quite right. > There is now a different assertion failing than I had seen before: >> lt-linux-libnuma: >> /users/phh1/OMPI/hwloc-1.3.2rc1-linux-ppc64-gcc//hwloc-1.3.2rc1/tests/linux-libnuma.c:83: >> main: Assertion `!memcmp(&nodemask, &numa_all_nodes, >> sizeof(nodemask_t))' failed. >> /bin/sh: line 5: 19416 Aborted ${dir}$tst >> FAIL: linux-libnuma > > I don't have any clue if that represents forward or backward progress.
Can you try the attached patch? It removes nodemask checks (this deprecated interface is too buggy/strange in libnuma, no way to assert its behavior reliably). Then, it fixes the libnuma helpers to properly use os_index instead logical_index (important on your machine because node ids are sparse). And finally it makes sure the test actually checks what we want (shouldn't matter in your case). I've tested this on your topology, a 8-node machine with out-of-order numa node ids, and some basic nodes, with a recent and a less recent libnuma release. My current plan is to apply all these in all branches and then remove the nodemask conversion helpers from trunk. Brice
diff --git a/include/hwloc/linux-libnuma.h b/include/hwloc/linux-libnuma.h index a5a1517..2a0d5ef 100644 --- a/include/hwloc/linux-libnuma.h +++ b/include/hwloc/linux-libnuma.h @@ -142,15 +142,12 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - unsigned i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(cpuset); - for(i=0; i<maxnode; i++) - if (mask[i/sizeof(*mask)/8] & (1UL << (i% (sizeof(*mask)*8)))) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_or(cpuset, cpuset, node->cpuset); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); } else { /* if no numa, libnuma assumes we have a single node */ if (mask[0] & 1) @@ -178,15 +175,12 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - unsigned i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(nodeset); - for(i=0; i<maxnode; i++) - if (mask[i/sizeof(*mask)/8] & (1UL << (i% (sizeof(*mask)*8)))) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_set(nodeset, node->os_index); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (node->os_index < maxnode + && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) + hwloc_bitmap_set(nodeset, node->os_index); } else { /* if no numa, libnuma assumes we have a single node */ if (mask[0] & 1) @@ -281,15 +275,11 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_ int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - int i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(cpuset); - for(i=0; i<NUMA_NUM_NODES; i++) - if (numa_bitmask_isbitset(bitmask, i)) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_or(cpuset, cpuset, node->cpuset); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); } else { /* if no numa, libnuma assumes we have a single node */ if (numa_bitmask_isbitset(bitmask, 0)) @@ -313,15 +303,11 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - int i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(nodeset); - for(i=0; i<NUMA_NUM_NODES; i++) - if (numa_bitmask_isbitset(bitmask, i)) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_set(nodeset, node->os_index); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (numa_bitmask_isbitset(bitmask, node->os_index)) + hwloc_bitmap_set(nodeset, node->os_index); } else { /* if no numa, libnuma assumes we have a single node */ if (numa_bitmask_isbitset(bitmask, 0)) @@ -408,15 +394,11 @@ hwloc_cpuset_from_linux_libnuma_nodemask(hwloc_topology_t topology, hwloc_cpuset int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - int i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(cpuset); - for(i=0; i<NUMA_NUM_NODES; i++) - if (nodemask_isset(nodemask, i)) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_or(cpuset, cpuset, node->cpuset); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (nodemask_isset(nodemask, node->os_index)) + hwloc_bitmap_or(cpuset, cpuset, node->cpuset); } else { /* if no numa, libnuma assumes we have a single node */ if (nodemask_isset(nodemask, 0)) @@ -440,15 +422,11 @@ hwloc_nodeset_from_linux_libnuma_nodemask(hwloc_topology_t topology, hwloc_nodes int depth = hwloc_get_type_depth(topology, HWLOC_OBJ_NODE); if (depth != HWLOC_TYPE_DEPTH_UNKNOWN) { - hwloc_obj_t node; - int i; + hwloc_obj_t node = NULL; hwloc_bitmap_zero(nodeset); - for(i=0; i<NUMA_NUM_NODES; i++) - if (nodemask_isset(nodemask, i)) { - node = hwloc_get_obj_by_depth(topology, depth, i); - if (node) - hwloc_bitmap_set(nodeset, node->os_index); - } + while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) + if (nodemask_isset(nodemask, node->os_index)) + hwloc_bitmap_set(nodeset, node->os_index); } else { /* if no numa, libnuma assumes we have a single node */ if (nodemask_isset(nodemask, 0)) diff --git a/tests/linux-libnuma.c b/tests/linux-libnuma.c index dc3b348..727e027 100644 --- a/tests/linux-libnuma.c +++ b/tests/linux-libnuma.c @@ -19,7 +19,6 @@ int main(void) hwloc_bitmap_t set, set2, nocpunomemnodeset, nocpubutmemnodeset, nomembutcpunodeset, nomembutcpucpuset; hwloc_obj_t node; struct bitmask *bitmask, *bitmask2; - nodemask_t nodemask, nodemask2; unsigned long mask; unsigned long maxnode; int i; @@ -65,23 +64,12 @@ int main(void) assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); - set2 = hwloc_bitmap_alloc(); - hwloc_cpuset_from_linux_libnuma_nodemask(topology, set2, &numa_all_nodes); - assert(hwloc_bitmap_isequal(set, set2)); - hwloc_bitmap_free(set2); - bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, set); /* numa_all_nodes_ptr contains NODES with no CPU but with memory */ hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { numa_bitmask_setbit(bitmask, i); } hwloc_bitmap_foreach_end(); assert(numa_bitmask_equal(bitmask, numa_all_nodes_ptr)); numa_bitmask_free(bitmask); - hwloc_cpuset_to_linux_libnuma_nodemask(topology, set, &nodemask); - /* numa_all_nodes contains NODES with no CPU (with or without memory) */ - hwloc_bitmap_foreach_begin(i, nocpubutmemnodeset) { nodemask_set(&nodemask, i); } hwloc_bitmap_foreach_end(); - hwloc_bitmap_foreach_begin(i, nocpunomemnodeset) { nodemask_set(&nodemask, i); } hwloc_bitmap_foreach_end(); - assert(!memcmp(&nodemask, &numa_all_nodes, sizeof(nodemask_t))); - hwloc_bitmap_free(set); /* convert full stuff between nodeset and libnuma */ @@ -101,27 +89,13 @@ int main(void) assert(hwloc_bitmap_isequal(set, set2)); hwloc_bitmap_free(set2); - set2 = hwloc_bitmap_alloc(); - hwloc_nodeset_from_linux_libnuma_nodemask(topology, set2, &numa_all_nodes); - assert(hwloc_bitmap_isequal(set, set2)); - hwloc_bitmap_free(set2); - bitmask = hwloc_nodeset_to_linux_libnuma_bitmask(topology, set); assert(numa_bitmask_equal(bitmask, numa_all_nodes_ptr)); numa_bitmask_free(bitmask); - hwloc_nodeset_to_linux_libnuma_nodemask(topology, set, &nodemask); - assert(!memcmp(&nodemask, &numa_all_nodes, sizeof(nodemask_t))); - hwloc_bitmap_free(set); /* convert empty stuff between cpuset and libnuma */ - nodemask_zero(&nodemask); - set = hwloc_bitmap_alloc(); - hwloc_cpuset_from_linux_libnuma_nodemask(topology, set, &nodemask); - assert(hwloc_bitmap_iszero(set)); - hwloc_bitmap_free(set); - bitmask = numa_bitmask_alloc(1); set = hwloc_bitmap_alloc(); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set, bitmask); @@ -144,12 +118,6 @@ int main(void) hwloc_bitmap_free(set); set = hwloc_bitmap_alloc(); - hwloc_cpuset_to_linux_libnuma_nodemask(topology, set, &nodemask); - nodemask_zero(&nodemask2); - assert(nodemask_equal(&nodemask, &nodemask2)); - hwloc_bitmap_free(set); - - set = hwloc_bitmap_alloc(); maxnode = sizeof(mask)*8; hwloc_cpuset_to_linux_libnuma_ulongs(topology, set, &mask, &maxnode); assert(!mask); @@ -157,12 +125,6 @@ int main(void) hwloc_bitmap_free(set); /* convert empty stuff between nodeset and libnuma */ - nodemask_zero(&nodemask); - set = hwloc_bitmap_alloc(); - hwloc_nodeset_from_linux_libnuma_nodemask(topology, set, &nodemask); - assert(hwloc_bitmap_iszero(set)); - hwloc_bitmap_free(set); - bitmask = numa_bitmask_alloc(1); set = hwloc_bitmap_alloc(); hwloc_nodeset_from_linux_libnuma_bitmask(topology, set, bitmask); @@ -185,12 +147,6 @@ int main(void) hwloc_bitmap_free(set); set = hwloc_bitmap_alloc(); - hwloc_nodeset_to_linux_libnuma_nodemask(topology, set, &nodemask); - nodemask_zero(&nodemask2); - assert(nodemask_equal(&nodemask, &nodemask2)); - hwloc_bitmap_free(set); - - set = hwloc_bitmap_alloc(); maxnode = sizeof(mask)*8; hwloc_nodeset_to_linux_libnuma_ulongs(topology, set, &mask, &maxnode); assert(!mask); @@ -200,20 +156,14 @@ int main(void) /* convert first node (with CPU and memory) between cpuset/nodeset and libnuma */ node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NODE, NULL); while (node && (!node->memory.local_memory || hwloc_bitmap_iszero(node->cpuset))) - /* skip nodes with no cpus or no memory because libnuma doesn't the same before for nodemask and bitmask there */ + /* skip nodes with no cpus or no memory to avoid strange libnuma behaviors */ node = node->next_sibling; if (node) { /* convert first node between cpuset and libnuma */ - hwloc_cpuset_to_linux_libnuma_nodemask(topology, node->cpuset, &nodemask); - assert(nodemask_isset(&nodemask, node->os_index)); - nodemask_clr(&nodemask, node->os_index); - nodemask_zero(&nodemask2); - assert(nodemask_equal(&nodemask, &nodemask2)); - bitmask = hwloc_cpuset_to_linux_libnuma_bitmask(topology, node->cpuset); assert(numa_bitmask_isbitset(bitmask, node->os_index)); numa_bitmask_clearbit(bitmask, node->os_index); - bitmask2 = numa_bitmask_alloc(1); + bitmask2 = numa_bitmask_alloc(node->os_index + 1); assert(numa_bitmask_equal(bitmask, bitmask2)); numa_bitmask_free(bitmask); numa_bitmask_free(bitmask2); @@ -225,19 +175,12 @@ int main(void) assert(!mask); } else { assert(maxnode = node->os_index + 1); - assert(mask == (1U<<node->os_index)); + assert(mask == (1UL << node->os_index)); } set = hwloc_bitmap_alloc(); - nodemask_zero(&nodemask); - nodemask_set(&nodemask, node->os_index); - hwloc_cpuset_from_linux_libnuma_nodemask(topology, set, &nodemask); - assert(hwloc_bitmap_isequal(set, node->cpuset)); - hwloc_bitmap_free(set); - - set = hwloc_bitmap_alloc(); - bitmask = numa_bitmask_alloc(1); - numa_bitmask_setbit(bitmask, 0); + bitmask = numa_bitmask_alloc(node->os_index + 1); + numa_bitmask_setbit(bitmask, node->os_index); hwloc_cpuset_from_linux_libnuma_bitmask(topology, set, bitmask); numa_bitmask_free(bitmask); assert(hwloc_bitmap_isequal(set, node->cpuset)); @@ -247,23 +190,17 @@ int main(void) if (node->os_index >= sizeof(mask)*8) { mask = 0; } else { - mask = 1 << node->os_index; + mask = 1UL << node->os_index; } - hwloc_cpuset_from_linux_libnuma_ulongs(topology, set, &mask, 1); + hwloc_cpuset_from_linux_libnuma_ulongs(topology, set, &mask, node->os_index + 1); assert(hwloc_bitmap_isequal(set, node->cpuset)); hwloc_bitmap_free(set); /* convert first node between nodeset and libnuma */ - hwloc_nodeset_to_linux_libnuma_nodemask(topology, node->nodeset, &nodemask); - assert(nodemask_isset(&nodemask, node->os_index)); - nodemask_clr(&nodemask, node->os_index); - nodemask_zero(&nodemask2); - assert(nodemask_equal(&nodemask, &nodemask2)); - bitmask = hwloc_nodeset_to_linux_libnuma_bitmask(topology, node->nodeset); assert(numa_bitmask_isbitset(bitmask, node->os_index)); numa_bitmask_clearbit(bitmask, node->os_index); - bitmask2 = numa_bitmask_alloc(1); + bitmask2 = numa_bitmask_alloc(node->os_index + 1); assert(numa_bitmask_equal(bitmask, bitmask2)); numa_bitmask_free(bitmask); numa_bitmask_free(bitmask2); @@ -275,19 +212,12 @@ int main(void) assert(!mask); } else { assert(maxnode = node->os_index + 1); - assert(mask == (1U<<node->os_index)); + assert(mask == (1UL << node->os_index)); } set = hwloc_bitmap_alloc(); - nodemask_zero(&nodemask); - nodemask_set(&nodemask, node->os_index); - hwloc_nodeset_from_linux_libnuma_nodemask(topology, set, &nodemask); - assert(hwloc_bitmap_isequal(set, node->nodeset)); - hwloc_bitmap_free(set); - - set = hwloc_bitmap_alloc(); - bitmask = numa_bitmask_alloc(1); - numa_bitmask_setbit(bitmask, 0); + bitmask = numa_bitmask_alloc(node->os_index + 1); + numa_bitmask_setbit(bitmask, node->os_index); hwloc_nodeset_from_linux_libnuma_bitmask(topology, set, bitmask); numa_bitmask_free(bitmask); assert(hwloc_bitmap_isequal(set, node->nodeset)); @@ -297,9 +227,9 @@ int main(void) if (node->os_index >= sizeof(mask)*8) { mask = 0; } else { - mask = 1 << node->os_index; + mask = 1UL << node->os_index; } - hwloc_nodeset_from_linux_libnuma_ulongs(topology, set, &mask, 1); + hwloc_nodeset_from_linux_libnuma_ulongs(topology, set, &mask, node->os_index + 1); assert(hwloc_bitmap_isequal(set, node->nodeset)); hwloc_bitmap_free(set); }