Le 16/09/2010 13:52, Alexey Kardashevskiy a écrit :
> On 16/09/10 20:50, Brice Goglin wrote:
>>
>> We'll likely apply it, we just need to figure out where to put it if
>> it's reusable for AIX.
>>
>>    
>
> Good! And what is about messages coming twice in this maillist? Am I
> the only person who experiences that? :)

I thought it was my mailer replying to both you and the mailing list,
but you were actually subscribed twice, I just fixed this.

The attached patch should do what you need for sparse numa number
properly. I had to rework the code a bit because my local tests with
fake topologies reported numa nodes out of order, causing the distance
matrix to be misordered. So I am now placing all node indexes in the
cpuset, then I fill the index array by reading the cpuset in order, and
then I read distances from sysfs. Could you confirm that it works for
you too?

Brice

diff --git a/include/private/private.h b/include/private/private.h
index 4e9d200..ffdd4f8 100644
--- a/include/private/private.h
+++ b/include/private/private.h
@@ -112,7 +112,7 @@ struct hwloc_topology {


 extern void hwloc_setup_pu_level(struct hwloc_topology *topology, unsigned nb_pus);
-extern void hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, unsigned *_distances/*[nbnobjs][nbobjs]*/);
+extern void hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, unsigned *_distances/*[nbnobjs][nbobjs]*/, unsigned *distance_indexes /*[nbobjs]*/);
 extern int hwloc_get_sysctlbyname(const char *name, int64_t *n);
 extern int hwloc_get_sysctl(int name[], unsigned namelen, int *n);
 extern unsigned hwloc_fallback_nbprocessors(struct hwloc_topology *topology);
diff --git a/src/topology-linux.c b/src/topology-linux.c
index 60406da..dc586d2 100644
--- a/src/topology-linux.c
+++ b/src/topology-linux.c
@@ -1302,39 +1302,67 @@ static void
 look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *found)
 {
   unsigned osnode;
-  unsigned nbnodes = 1;
+  unsigned nbnodes = 0;
   DIR *dir;
   struct dirent *dirent;
   hwloc_obj_t node;
+  hwloc_cpuset_t nodeset = hwloc_cpuset_alloc();

   *found = 0;

+  /* Get the list of nodes first */
   dir = hwloc_opendir(path, topology->backend_params.sysfs.root_fd);
   if (dir)
     {
       while ((dirent = readdir(dir)) != NULL)
 	{
-	  unsigned long numnode;
 	  if (strncmp(dirent->d_name, "node", 4))
 	    continue;
-	  numnode = strtoul(dirent->d_name+4, NULL, 0);
-	  if (nbnodes < numnode+1)
-	    nbnodes = numnode+1;
+	  osnode = strtoul(dirent->d_name+4, NULL, 0);
+	  hwloc_cpuset_set(nodeset, osnode);
+	  nbnodes++;
 	}
       closedir(dir);
     }

   if (nbnodes <= 1)
-    return;
+    {
+      hwloc_cpuset_free(nodeset);
+      return;
+    }

   /* For convenience, put these declarations inside a block.  Saves us
      from a bunch of mallocs, particularly with the 2D array. */
+
   {
       hwloc_obj_t nodes[nbnodes];
       unsigned distances[nbnodes][nbnodes];
-      for (osnode=0; osnode < nbnodes; osnode++) {
+      unsigned distance_indexes[nbnodes];
+      unsigned index;
+
+      /* Get node indexes now. We need them in order since Linux groups
+       * sparse distances but keep them in order in the sysfs distance files.
+       */
+      index = 0;
+      hwloc_cpuset_foreach_begin (osnode, nodeset) {
+	distance_indexes[index] = osnode;
+	index++;
+      } hwloc_cpuset_foreach_end();
+      hwloc_cpuset_free(nodeset);
+
+#ifdef HWLOC_DEBUG
+      hwloc_debug("%s", "numa distance indexes: ");
+      for (index = 0; index < nbnodes; index++) {
+	hwloc_debug(" %u", distance_indexes[index]);
+      }
+      hwloc_debug("%s", "\n");
+#endif
+
+      /* Get actual distances now */
+      for (index = 0; index < nbnodes; index++) {
           char nodepath[SYSFS_NUMA_NODE_PATH_LEN];
           hwloc_cpuset_t cpuset;
+	  unsigned int osnode = distance_indexes[index];

           sprintf(nodepath, "%s/node%u/cpumap", path, osnode);
           cpuset = hwloc_parse_cpumap(nodepath, topology->backend_params.sysfs.root_fd);
@@ -1351,13 +1379,13 @@ look_sysfsnode(struct hwloc_topology *topology, const char *path, unsigned *foun
           hwloc_debug_1arg_cpuset("os node %u has cpuset %s\n",
                                   osnode, node->cpuset);
           hwloc_insert_object_by_cpuset(topology, node);
-          nodes[osnode] = node;
+          nodes[index] = node;

           sprintf(nodepath, "%s/node%u/distance", path, osnode);
-          hwloc_parse_node_distance(nodepath, nbnodes, distances[osnode], topology->backend_params.sysfs.root_fd);
+          hwloc_parse_node_distance(nodepath, nbnodes, distances[index], topology->backend_params.sysfs.root_fd);
       }

-      hwloc_setup_misc_level_from_distances(topology, nbnodes, nodes, (unsigned*) distances);
+      hwloc_setup_misc_level_from_distances(topology, nbnodes, nodes, (unsigned *) distances, (unsigned *) distance_indexes);
   }

   *found = nbnodes;
diff --git a/src/topology.c b/src/topology.c
index 9cefe2d..b7f7a90 100644
--- a/src/topology.c
+++ b/src/topology.c
@@ -289,11 +289,13 @@ hwloc__setup_misc_level_from_distances(struct hwloc_topology *topology,
  */
 void
 hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology,
-				     unsigned nbobjs,
-				     struct hwloc_obj **objs,
-				     unsigned *_distances)
+				      unsigned nbobjs,
+				      struct hwloc_obj **objs,
+				      unsigned *_distances,
+				      unsigned *_distance_indexes)
 {
   unsigned (*distances)[nbobjs][nbobjs] = (unsigned (*)[nbobjs][nbobjs])_distances;
+  unsigned (*distance_indexes)[nbobjs] = (unsigned (*)[nbobjs])_distance_indexes;
   unsigned i,j;

   if (getenv("HWLOC_IGNORE_DISTANCES"))
@@ -303,11 +305,11 @@ hwloc_setup_misc_level_from_distances(struct hwloc_topology *topology,
   hwloc_debug("%s", "node distance matrix:\n");
   hwloc_debug("%s", "   ");
   for(j=0; j<nbobjs; j++)
-    hwloc_debug(" %3u", j);
+    hwloc_debug(" %3u", (*distance_indexes)[j]);
   hwloc_debug("%s", "\n");

   for(i=0; i<nbobjs; i++) {
-    hwloc_debug("%3u", i);
+    hwloc_debug("%3u", (*distance_indexes)[i]);
     for(j=0; j<nbobjs; j++)
       hwloc_debug(" %3u", (*distances)[i][j]);
     hwloc_debug("%s", "\n");

Reply via email to