Hello Terry,

Here's a patch that should help. It cleans the code and makes all arrays
dynamic. I artificially set the initial array sizes to 4 to experience
the code on our 24-way T1 machine. I will set it to 256 or so in the
final commit. Please let me know if it helps on your 1440-way machine.

Brice

diff --git a/include/hwloc/rename.h b/include/hwloc/rename.h
index a938811..129926c 100644
--- a/include/hwloc/rename.h
+++ b/include/hwloc/rename.h
@@ -489,7 +489,6 @@ extern "C" {
 #define hwloc_bitmap_printf_value HWLOC_NAME(bitmap_printf_value)
 #define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object)
 #define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object)
-#define hwloc_setup_level HWLOC_NAME(setup_level)

 #define hwloc_alloc_heap HWLOC_NAME(alloc_heap)
 #define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap)
diff --git a/include/private/private.h b/include/private/private.h
index 648058e..6767960 100644
--- a/include/private/private.h
+++ b/include/private/private.h
@@ -302,39 +302,6 @@ hwloc_alloc_setup_object(hwloc_obj_type_t type, signed idx)
 }

 extern void hwloc_free_unlinked_object(hwloc_obj_t obj);
-
-#define hwloc_object_cpuset_from_array(l, _value, _array, _max) do {	\
-		struct hwloc_obj *__l = (l);				\
-		unsigned int *__a = (_array);				\
-		int k;							\
-		__l->cpuset = hwloc_bitmap_alloc();			\
-		for(k=0; k<_max; k++)					\
-			if (__a[k] == _value)				\
-				hwloc_bitmap_set(__l->cpuset, k);	\
-	} while (0)
-
-/* Configures an array of NUM objects of type TYPE with physical IDs OSPHYSIDS
- * and for which processors have ID PROC_PHYSIDS, and add them to the topology.
- * */
-static __hwloc_inline void
-hwloc_setup_level(int procid_max, unsigned num, unsigned *osphysids, unsigned *proc_physids, struct hwloc_topology *topology, hwloc_obj_type_t type)
-{
-  struct hwloc_obj *obj;
-  unsigned j;
-
-  hwloc_debug("%d %s\n", num, hwloc_obj_type_string(type));
-
-  for (j = 0; j < num; j++)
-    {
-      obj = hwloc_alloc_setup_object(type, osphysids[j]);
-      hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max);
-      hwloc_debug_2args_bitmap("%s %d has cpuset %s\n",
-		 hwloc_obj_type_string(type),
-		 j, obj->cpuset);
-      hwloc_insert_object_by_cpuset(topology, obj);
-    }
-  hwloc_debug("%s", "\n");
-}
 #endif

 /* This can be used for the alloc field to get allocated data that can be freed by free() */
diff --git a/src/topology-solaris.c b/src/topology-solaris.c
index 9758955..c49bbf5 100644
--- a/src/topology-solaris.c
+++ b/src/topology-solaris.c
@@ -438,7 +438,6 @@ hwloc_look_lgrp(struct hwloc_topology *topology)

 #ifdef HAVE_LIBKSTAT
 #include <kstat.h>
-#define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */
 static int
 hwloc_look_kstat(struct hwloc_topology *topology)
 {
@@ -451,38 +450,48 @@ hwloc_look_kstat(struct hwloc_topology *topology)
   kstat_named_t *stat;
   unsigned look_cores = 1, look_chips = 1;

-  unsigned numsockets = 0;
-  unsigned proc_physids[HWLOC_NBMAXCPUS];
-  unsigned proc_osphysids[HWLOC_NBMAXCPUS];
-  unsigned osphysids[HWLOC_NBMAXCPUS];
-
-  unsigned numcores = 0;
-  unsigned proc_coreids[HWLOC_NBMAXCPUS];
-  unsigned oscoreids[HWLOC_NBMAXCPUS];
-
-  unsigned core_osphysids[HWLOC_NBMAXCPUS];
-
-  unsigned numprocs = 0;
-  unsigned proc_procids[HWLOC_NBMAXCPUS];
-  unsigned osprocids[HWLOC_NBMAXCPUS];
-
-  unsigned physid, coreid, cpuid;
-  unsigned procid_max = 0;
+  unsigned Pproc_max = 0;
+  unsigned Pproc_alloc = 4;
+  struct hwloc_solaris_Pproc {
+    unsigned Lsock, Psock, Lcore, Lproc;
+  } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc));
+
+  unsigned Lproc_num = 0;
+  unsigned Lproc_alloc = 4;
+  struct hwloc_solaris_Lproc {
+    unsigned Pproc;
+  } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc));
+
+  unsigned Lcore_num = 0;
+  unsigned Lcore_alloc = 4;
+  struct hwloc_solaris_Lcore {
+    unsigned Pcore, Psock;
+  } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore));
+
+  unsigned Lsock_num = 0;
+  unsigned Lsock_alloc = 4;
+  struct hwloc_solaris_Lsock {
+    unsigned Psock;
+  } * Lsock = malloc(Lsock_alloc * sizeof(*Lsock));
+
+  unsigned sockid, coreid, cpuid;
   unsigned i;

-  for (cpuid = 0; cpuid < HWLOC_NBMAXCPUS; cpuid++)
-    {
-      proc_procids[cpuid] = -1;
-      proc_physids[cpuid] = -1;
-      proc_osphysids[cpuid] = -1;
-      proc_coreids[cpuid] = -1;
-    }
+  for (i = 0; i < Pproc_alloc; i++) {
+    Pproc[i].Lproc = -1;
+    Pproc[i].Lsock = -1;
+    Pproc[i].Psock = -1;
+    Pproc[i].Lcore = -1;
+  }

-  if (!kc)
-    {
-      hwloc_debug("kstat_open failed: %s\n", strerror(errno));
-      return 0;
-    }
+  if (!kc) {
+    hwloc_debug("kstat_open failed: %s\n", strerror(errno));
+    free(Pproc);
+    free(Lproc);
+    free(Lcore);
+    free(Lsock);
+    return 0;
+  }

   for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next)
     {
@@ -490,11 +499,6 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	continue;

       cpuid = ksp->ks_instance;
-      if (cpuid > HWLOC_NBMAXCPUS)
-	{
-	  fprintf(stderr,"CPU id too big: %u\n", cpuid);
-	  continue;
-	}

       if (kstat_read(kc, ksp, NULL) == -1)
 	{
@@ -503,12 +507,28 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	}

       hwloc_debug("cpu%u\n", cpuid);
-      proc_procids[cpuid] = numprocs;
-      osprocids[numprocs] = cpuid;
-      numprocs++;

-      if (cpuid >= procid_max)
-        procid_max = cpuid + 1;
+      if (cpuid >= Pproc_alloc) {
+	Pproc_alloc *= 2;
+	Pproc = realloc(Pproc, Pproc_alloc * sizeof(*Pproc));
+	for(i = Pproc_alloc/2; i < Pproc_alloc; i++) {
+	  Pproc[i].Lproc = -1;
+	  Pproc[i].Lsock = -1;
+	  Pproc[i].Psock = -1;
+	  Pproc[i].Lcore = -1;
+	}
+      }
+      Pproc[cpuid].Lproc = Lproc_num;
+
+      if (Lproc_num >= Lproc_alloc) {
+	Lproc_alloc *= 2;
+	Lproc = realloc(Lproc, Lproc_alloc * sizeof(*Lproc));
+      }
+      Lproc[Lproc_num].Pproc = cpuid;
+      Lproc_num++;
+
+      if (cpuid >= Pproc_max)
+        Pproc_max = cpuid + 1;

       stat = (kstat_named_t *) kstat_data_lookup(ksp, "state");
       if (!stat)
@@ -528,7 +548,7 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id");
 	if (!stat)
 	  {
-	    if (numsockets)
+	    if (Lsock_num)
 	      fprintf(stderr, "could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
 	    else
 	      hwloc_debug("could not read socket id for CPU%u: %s\n", cpuid, strerror(errno));
@@ -537,17 +557,17 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	  }
 	switch (stat->data_type) {
 	  case KSTAT_DATA_INT32:
-	    physid = stat->value.i32;
+	    sockid = stat->value.i32;
 	    break;
 	  case KSTAT_DATA_UINT32:
-	    physid = stat->value.ui32;
+	    sockid = stat->value.ui32;
 	    break;
 #ifdef _INT64_TYPE
 	  case KSTAT_DATA_UINT64:
-	    physid = stat->value.ui64;
+	    sockid = stat->value.ui64;
 	    break;
 	  case KSTAT_DATA_INT64:
-	    physid = stat->value.i64;
+	    sockid = stat->value.i64;
 	    break;
 #endif
 	  default:
@@ -555,14 +575,19 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	    look_chips = 0;
 	    continue;
 	}
-	proc_osphysids[cpuid] = physid;
-	for (i = 0; i < numsockets; i++)
-	  if (physid == osphysids[i])
+	Pproc[cpuid].Psock = sockid;
+	for (i = 0; i < Lsock_num; i++)
+	  if (sockid == Lsock[i].Psock)
 	    break;
-	proc_physids[cpuid] = i;
-	hwloc_debug("%u on socket %u (%u)\n", cpuid, i, physid);
-	if (i == numsockets)
-	  osphysids[numsockets++] = physid;
+	Pproc[cpuid].Lsock = i;
+	hwloc_debug("%u on socket %u (%u)\n", cpuid, i, sockid);
+	if (i == Lsock_num) {
+	  if (Lsock_num == Lsock_alloc) {
+	    Lsock_alloc *= 2;
+	    Lsock = realloc(Lsock, Lsock_alloc * sizeof(*Lsock));
+	  }
+	  Lsock[Lsock_num++].Psock = sockid;
+	}
       } while(0);

       if (look_cores) do {
@@ -570,7 +595,7 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id");
 	if (!stat)
 	  {
-	    if (numcores)
+	    if (Lcore_num)
 	      fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
 	    else
 	      hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno));
@@ -597,16 +622,19 @@ hwloc_look_kstat(struct hwloc_topology *topology)
 	    look_cores = 0;
 	    continue;
 	}
-	for (i = 0; i < numcores; i++)
-	  if (coreid == oscoreids[i] && proc_osphysids[cpuid] == core_osphysids[i])
+	for (i = 0; i < Lcore_num; i++)
+	  if (coreid == Lcore[i].Pcore && Pproc[cpuid].Psock == Lcore[i].Psock)
 	    break;
-	proc_coreids[cpuid] = i;
+	Pproc[cpuid].Lcore = i;
 	hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid);
-	if (i == numcores)
-	  {
-	    core_osphysids[numcores] = proc_osphysids[cpuid];
-	    oscoreids[numcores++] = coreid;
+	if (i == Lcore_num) {
+	  if (Lcore_num == Lcore_alloc) {
+	    Lcore_alloc *= 2;
+	    Lcore = realloc(Lcore, Lcore_alloc * sizeof(*Lcore));
 	  }
+	  Lcore[Lcore_num].Psock = Pproc[cpuid].Psock;
+	  Lcore[Lcore_num++].Pcore = coreid;
+	}
       } while(0);

       /* Note: there is also clog_id for the Thread ID (not unique) and
@@ -616,30 +644,63 @@ hwloc_look_kstat(struct hwloc_topology *topology)

   if (look_chips) {
     struct hwloc_obj *obj;
-    unsigned j;
-    hwloc_debug("%d Sockets\n", numsockets);
-    for (j = 0; j < numsockets; j++) {
-      obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, osphysids[j]);
+    unsigned j,k;
+    hwloc_debug("%d Sockets\n", Lsock_num);
+    for (j = 0; j < Lsock_num; j++) {
+      obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, Lsock[j].Psock);
       if (CPUType)
 	hwloc_obj_add_info(obj, "CPUType", CPUType);
       if (CPUModel)
 	hwloc_obj_add_info(obj, "CPUModel", CPUModel);
-      hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lsock == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
       hwloc_debug_1arg_bitmap("Socket %d has cpuset %s\n", j, obj->cpuset);
       hwloc_insert_object_by_cpuset(topology, obj);
     }
     hwloc_debug("%s", "\n");
   }

-  if (look_cores)
-    hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE);
-
-  if (numprocs)
-    hwloc_setup_level(procid_max, numprocs, osprocids, proc_procids, topology, HWLOC_OBJ_PU);
+  if (look_cores) {
+    struct hwloc_obj *obj;
+    unsigned j,k;
+    hwloc_debug("%d Cores\n", Lcore_num);
+    for (j = 0; j < Lcore_num; j++) {
+      obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lcore == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
+      hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }
+  if (Lproc_num) {
+    struct hwloc_obj *obj;
+    unsigned j,k;
+    hwloc_debug("%d PUs\n", Lproc_num);
+    for (j = 0; j < Lproc_num; j++) {
+      obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc);
+      obj->cpuset = hwloc_bitmap_alloc();
+      for(k=0; k<Pproc_max; k++)
+	if (Pproc[k].Lproc == j)
+	  hwloc_bitmap_set(obj->cpuset, k);
+      hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset);
+      hwloc_insert_object_by_cpuset(topology, obj);
+    }
+    hwloc_debug("%s", "\n");
+  }

   kstat_close(kc);

-  return numprocs > 0;
+  free(Pproc);
+  free(Lproc);
+  free(Lcore);
+  free(Lsock);
+
+  return Lproc_num > 0;
 }
 #endif /* LIBKSTAT */

Reply via email to