Le 16/09/2010 13:52, Alexey Kardashevskiy a écrit : > On 16/09/10 20:50, Brice Goglin wrote: >> >> We'll likely apply it, we just need to figure out where to put it if >> it's reusable for AIX. >> >> > > Good! And what is about messages coming twice in this maillist? Am I > the only person who experiences that? :)
I thought it was my mailer replying to both you and the mailing list, but you were actually subscribed twice, I just fixed this. The attached patch should do what you need for sparse numa number properly. I had to rework the code a bit because my local tests with fake topologies reported numa nodes out of order, causing the distance matrix to be misordered. So I am now placing all node indexes in the cpuset, then I fill the index array by reading the cpuset in order, and then I read distances from sysfs. Could you confirm that it works for you too? Brice
diff --git a/include/private/private.h b/include/private/private.h index 4e9d200..ffdd4f8 100644 --- a/include/private/private.h +++ b/include/private/private.h @@ -112,7 +112,7 @@ struct hwloc_topology { extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus); -extern void hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, unsigned *_distances/*[nbnobjs][nbobjs]*/); +extern void hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, unsigned *_distances/*[nbnobjs][nbobjs]*/, unsigned *distance_indexes /*[nbobjs]*/); extern int hwloc_get_sysctlbyname(const char *name, int64_t *n); extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n); extern unsigned hwloc_fallback_nbprocessors(struct hwloc_topology *topology); diff --git a/src/topology-linux.c b/src/topology-linux.c index 60406da..dc586d2 100644 --- a/src/topology-linux.c +++ b/src/topology-linux.c @@ -1302,39 +1302,67 @@ static void look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *found) { unsigned osnode; - unsigned nbnodes = 1; + unsigned nbnodes = 0; DIR *dir; struct dirent *dirent; hwloc_obj_t node; + hwloc_cpuset_t nodeset = hwloc_cpuset_alloc(); *found = 0; + /* Get the list of nodes first */ dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd); if (dir) { while ((dirent = readdir(dir)) != NULL) { - unsigned long numnode; if (strncmp(dirent->d_name, "node", 4)) continue; - numnode = strtoul(dirent->d_name+4, NULL, 0); - if (nbnodes < numnode+1) - nbnodes = numnode+1; + osnode = strtoul(dirent->d_name+4, NULL, 0); + hwloc_cpuset_set(nodeset, osnode); + nbnodes++; } closedir(dir); } if (nbnodes <= 1) - return; + { + hwloc_cpuset_free(nodeset); + return; + } /* For convenience, put these declarations inside a block. Saves us from a bunch of mallocs, particularly with the 2D array. */ + { hwloc_obj_t nodes[nbnodes]; unsigned distances[nbnodes][nbnodes]; - for (osnode=0; osnode < nbnodes; osnode++) { + unsigned distance_indexes[nbnodes]; + unsigned index; + + /* Get node indexes now. We need them in order since Linux groups + * sparse distances but keep them in order in the sysfs distance files. + */ + index = 0; + hwloc_cpuset_foreach_begin (osnode, nodeset) { + distance_indexes[index] = osnode; + index++; + } hwloc_cpuset_foreach_end(); + hwloc_cpuset_free(nodeset); + +#ifdef HWLOC_DEBUG + hwloc_debug("%s", "numa distance indexes: "); + for (index = 0; index < nbnodes; index++) { + hwloc_debug(" %u", distance_indexes[index]); + } + hwloc_debug("%s", "\n"); +#endif + + /* Get actual distances now */ + for (index = 0; index < nbnodes; index++) { char nodepath[SYSFS_NUMA_NODE_PATH_LEN]; hwloc_cpuset_t cpuset; + unsigned int osnode = distance_indexes[index]; sprintf(nodepath, "%s/node%u/cpumap", path, osnode); cpuset = hwloc_parse_cpumap(nodepath, topology->backend_params.sysfs.root_fd); @@ -1351,13 +1379,13 @@ look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *foun hwloc_debug_1arg_cpuset("os node %u has cpuset %s\n", osnode, node->cpuset); hwloc_insert_object_by_cpuset(topology, node); - nodes[osnode] = node; + nodes[index] = node; sprintf(nodepath, "%s/node%u/distance", path, osnode); - hwloc_parse_node_distance(nodepath, nbnodes, distances[osnode], topology->backend_params.sysfs.root_fd); + hwloc_parse_node_distance(nodepath, nbnodes, distances[index], topology->backend_params.sysfs.root_fd); } - hwloc_setup_misc_level_from_distances(topology, nbnodes, nodes, (unsigned*) distances); + hwloc_setup_misc_level_from_distances(topology, nbnodes, nodes, (unsigned *) distances, (unsigned *) distance_indexes); } *found = nbnodes; diff --git a/src/topology.c b/src/topology.c index 9cefe2d..b7f7a90 100644 --- a/src/topology.c +++ b/src/topology.c @@ -289,11 +289,13 @@ hwloc__setup_misc_level_from_distances(struct hwloc_topology *topology, */ void hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, - unsigned nbobjs, - struct hwloc_obj **objs, - unsigned *_distances) + unsigned nbobjs, + struct hwloc_obj **objs, + unsigned *_distances, + unsigned *_distance_indexes) { unsigned (*distances)[nbobjs][nbobjs] = (unsigned (*)[nbobjs][nbobjs])_distances; + unsigned (*distance_indexes)[nbobjs] = (unsigned (*)[nbobjs])_distance_indexes; unsigned i,j; if (getenv("HWLOC_IGNORE_DISTANCES")) @@ -303,11 +305,11 @@ hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, hwloc_debug("%s", "node distance matrix:\n"); hwloc_debug("%s", " "); for(j=0; j<nbobjs; j++) - hwloc_debug(" %3u", j); + hwloc_debug(" %3u", (*distance_indexes)[j]); hwloc_debug("%s", "\n"); for(i=0; i<nbobjs; i++) { - hwloc_debug("%3u", i); + hwloc_debug("%3u", (*distance_indexes)[i]); for(j=0; j<nbobjs; j++) hwloc_debug(" %3u", (*distances)[i][j]); hwloc_debug("%s", "\n");