Hello Terry, Here's a patch that should help. It cleans the code and makes all arrays dynamic. I artificially set the initial array sizes to 4 to experience the code on our 24-way T1 machine. I will set it to 256 or so in the final commit. Please let me know if it helps on your 1440-way machine.
Brice
diff --git a/include/hwloc/rename.h b/include/hwloc/rename.h index a938811..129926c 100644 --- a/include/hwloc/rename.h +++ b/include/hwloc/rename.h @@ -489,7 +489,6 @@ extern "C" { #define hwloc_bitmap_printf_value HWLOC_NAME(bitmap_printf_value) #define hwloc_alloc_setup_object HWLOC_NAME(alloc_setup_object) #define hwloc_free_unlinked_object HWLOC_NAME(free_unlinked_object) -#define hwloc_setup_level HWLOC_NAME(setup_level) #define hwloc_alloc_heap HWLOC_NAME(alloc_heap) #define hwloc_alloc_mmap HWLOC_NAME(alloc_mmap) diff --git a/include/private/private.h b/include/private/private.h index 648058e..6767960 100644 --- a/include/private/private.h +++ b/include/private/private.h @@ -302,39 +302,6 @@ hwloc_alloc_setup_object(hwloc_obj_type_t type, signed idx) } extern void hwloc_free_unlinked_object(hwloc_obj_t obj); - -#define hwloc_object_cpuset_from_array(l, _value, _array, _max) do { \ - struct hwloc_obj *__l = (l); \ - unsigned int *__a = (_array); \ - int k; \ - __l->cpuset = hwloc_bitmap_alloc(); \ - for(k=0; k<_max; k++) \ - if (__a[k] == _value) \ - hwloc_bitmap_set(__l->cpuset, k); \ - } while (0) - -/* Configures an array of NUM objects of type TYPE with physical IDs OSPHYSIDS - * and for which processors have ID PROC_PHYSIDS, and add them to the topology. - * */ -static __hwloc_inline void -hwloc_setup_level(int procid_max, unsigned num, unsigned *osphysids, unsigned *proc_physids, struct hwloc_topology *topology, hwloc_obj_type_t type) -{ - struct hwloc_obj *obj; - unsigned j; - - hwloc_debug("%d %s\n", num, hwloc_obj_type_string(type)); - - for (j = 0; j < num; j++) - { - obj = hwloc_alloc_setup_object(type, osphysids[j]); - hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max); - hwloc_debug_2args_bitmap("%s %d has cpuset %s\n", - hwloc_obj_type_string(type), - j, obj->cpuset); - hwloc_insert_object_by_cpuset(topology, obj); - } - hwloc_debug("%s", "\n"); -} #endif /* This can be used for the alloc field to get allocated data that can be freed by free() */ diff --git a/src/topology-solaris.c b/src/topology-solaris.c index 9758955..c49bbf5 100644 --- a/src/topology-solaris.c +++ b/src/topology-solaris.c @@ -438,7 +438,6 @@ hwloc_look_lgrp(struct hwloc_topology *topology) #ifdef HAVE_LIBKSTAT #include <kstat.h> -#define HWLOC_NBMAXCPUS 1024 /* FIXME: drop */ static int hwloc_look_kstat(struct hwloc_topology *topology) { @@ -451,38 +450,48 @@ hwloc_look_kstat(struct hwloc_topology *topology) kstat_named_t *stat; unsigned look_cores = 1, look_chips = 1; - unsigned numsockets = 0; - unsigned proc_physids[HWLOC_NBMAXCPUS]; - unsigned proc_osphysids[HWLOC_NBMAXCPUS]; - unsigned osphysids[HWLOC_NBMAXCPUS]; - - unsigned numcores = 0; - unsigned proc_coreids[HWLOC_NBMAXCPUS]; - unsigned oscoreids[HWLOC_NBMAXCPUS]; - - unsigned core_osphysids[HWLOC_NBMAXCPUS]; - - unsigned numprocs = 0; - unsigned proc_procids[HWLOC_NBMAXCPUS]; - unsigned osprocids[HWLOC_NBMAXCPUS]; - - unsigned physid, coreid, cpuid; - unsigned procid_max = 0; + unsigned Pproc_max = 0; + unsigned Pproc_alloc = 4; + struct hwloc_solaris_Pproc { + unsigned Lsock, Psock, Lcore, Lproc; + } * Pproc = malloc(Pproc_alloc * sizeof(*Pproc)); + + unsigned Lproc_num = 0; + unsigned Lproc_alloc = 4; + struct hwloc_solaris_Lproc { + unsigned Pproc; + } * Lproc = malloc(Lproc_alloc * sizeof(*Lproc)); + + unsigned Lcore_num = 0; + unsigned Lcore_alloc = 4; + struct hwloc_solaris_Lcore { + unsigned Pcore, Psock; + } * Lcore = malloc(Lcore_alloc * sizeof(*Lcore)); + + unsigned Lsock_num = 0; + unsigned Lsock_alloc = 4; + struct hwloc_solaris_Lsock { + unsigned Psock; + } * Lsock = malloc(Lsock_alloc * sizeof(*Lsock)); + + unsigned sockid, coreid, cpuid; unsigned i; - for (cpuid = 0; cpuid < HWLOC_NBMAXCPUS; cpuid++) - { - proc_procids[cpuid] = -1; - proc_physids[cpuid] = -1; - proc_osphysids[cpuid] = -1; - proc_coreids[cpuid] = -1; - } + for (i = 0; i < Pproc_alloc; i++) { + Pproc[i].Lproc = -1; + Pproc[i].Lsock = -1; + Pproc[i].Psock = -1; + Pproc[i].Lcore = -1; + } - if (!kc) - { - hwloc_debug("kstat_open failed: %s\n", strerror(errno)); - return 0; - } + if (!kc) { + hwloc_debug("kstat_open failed: %s\n", strerror(errno)); + free(Pproc); + free(Lproc); + free(Lcore); + free(Lsock); + return 0; + } for (ksp = kc->kc_chain; ksp; ksp = ksp->ks_next) { @@ -490,11 +499,6 @@ hwloc_look_kstat(struct hwloc_topology *topology) continue; cpuid = ksp->ks_instance; - if (cpuid > HWLOC_NBMAXCPUS) - { - fprintf(stderr,"CPU id too big: %u\n", cpuid); - continue; - } if (kstat_read(kc, ksp, NULL) == -1) { @@ -503,12 +507,28 @@ hwloc_look_kstat(struct hwloc_topology *topology) } hwloc_debug("cpu%u\n", cpuid); - proc_procids[cpuid] = numprocs; - osprocids[numprocs] = cpuid; - numprocs++; - if (cpuid >= procid_max) - procid_max = cpuid + 1; + if (cpuid >= Pproc_alloc) { + Pproc_alloc *= 2; + Pproc = realloc(Pproc, Pproc_alloc * sizeof(*Pproc)); + for(i = Pproc_alloc/2; i < Pproc_alloc; i++) { + Pproc[i].Lproc = -1; + Pproc[i].Lsock = -1; + Pproc[i].Psock = -1; + Pproc[i].Lcore = -1; + } + } + Pproc[cpuid].Lproc = Lproc_num; + + if (Lproc_num >= Lproc_alloc) { + Lproc_alloc *= 2; + Lproc = realloc(Lproc, Lproc_alloc * sizeof(*Lproc)); + } + Lproc[Lproc_num].Pproc = cpuid; + Lproc_num++; + + if (cpuid >= Pproc_max) + Pproc_max = cpuid + 1; stat = (kstat_named_t *) kstat_data_lookup(ksp, "state"); if (!stat) @@ -528,7 +548,7 @@ hwloc_look_kstat(struct hwloc_topology *topology) stat = (kstat_named_t *) kstat_data_lookup(ksp, "chip_id"); if (!stat) { - if (numsockets) + if (Lsock_num) fprintf(stderr, "could not read socket id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read socket id for CPU%u: %s\n", cpuid, strerror(errno)); @@ -537,17 +557,17 @@ hwloc_look_kstat(struct hwloc_topology *topology) } switch (stat->data_type) { case KSTAT_DATA_INT32: - physid = stat->value.i32; + sockid = stat->value.i32; break; case KSTAT_DATA_UINT32: - physid = stat->value.ui32; + sockid = stat->value.ui32; break; #ifdef _INT64_TYPE case KSTAT_DATA_UINT64: - physid = stat->value.ui64; + sockid = stat->value.ui64; break; case KSTAT_DATA_INT64: - physid = stat->value.i64; + sockid = stat->value.i64; break; #endif default: @@ -555,14 +575,19 @@ hwloc_look_kstat(struct hwloc_topology *topology) look_chips = 0; continue; } - proc_osphysids[cpuid] = physid; - for (i = 0; i < numsockets; i++) - if (physid == osphysids[i]) + Pproc[cpuid].Psock = sockid; + for (i = 0; i < Lsock_num; i++) + if (sockid == Lsock[i].Psock) break; - proc_physids[cpuid] = i; - hwloc_debug("%u on socket %u (%u)\n", cpuid, i, physid); - if (i == numsockets) - osphysids[numsockets++] = physid; + Pproc[cpuid].Lsock = i; + hwloc_debug("%u on socket %u (%u)\n", cpuid, i, sockid); + if (i == Lsock_num) { + if (Lsock_num == Lsock_alloc) { + Lsock_alloc *= 2; + Lsock = realloc(Lsock, Lsock_alloc * sizeof(*Lsock)); + } + Lsock[Lsock_num++].Psock = sockid; + } } while(0); if (look_cores) do { @@ -570,7 +595,7 @@ hwloc_look_kstat(struct hwloc_topology *topology) stat = (kstat_named_t *) kstat_data_lookup(ksp, "core_id"); if (!stat) { - if (numcores) + if (Lcore_num) fprintf(stderr, "could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); else hwloc_debug("could not read core id for CPU%u: %s\n", cpuid, strerror(errno)); @@ -597,16 +622,19 @@ hwloc_look_kstat(struct hwloc_topology *topology) look_cores = 0; continue; } - for (i = 0; i < numcores; i++) - if (coreid == oscoreids[i] && proc_osphysids[cpuid] == core_osphysids[i]) + for (i = 0; i < Lcore_num; i++) + if (coreid == Lcore[i].Pcore && Pproc[cpuid].Psock == Lcore[i].Psock) break; - proc_coreids[cpuid] = i; + Pproc[cpuid].Lcore = i; hwloc_debug("%u on core %u (%u)\n", cpuid, i, coreid); - if (i == numcores) - { - core_osphysids[numcores] = proc_osphysids[cpuid]; - oscoreids[numcores++] = coreid; + if (i == Lcore_num) { + if (Lcore_num == Lcore_alloc) { + Lcore_alloc *= 2; + Lcore = realloc(Lcore, Lcore_alloc * sizeof(*Lcore)); } + Lcore[Lcore_num].Psock = Pproc[cpuid].Psock; + Lcore[Lcore_num++].Pcore = coreid; + } } while(0); /* Note: there is also clog_id for the Thread ID (not unique) and @@ -616,30 +644,63 @@ hwloc_look_kstat(struct hwloc_topology *topology) if (look_chips) { struct hwloc_obj *obj; - unsigned j; - hwloc_debug("%d Sockets\n", numsockets); - for (j = 0; j < numsockets; j++) { - obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, osphysids[j]); + unsigned j,k; + hwloc_debug("%d Sockets\n", Lsock_num); + for (j = 0; j < Lsock_num; j++) { + obj = hwloc_alloc_setup_object(HWLOC_OBJ_SOCKET, Lsock[j].Psock); if (CPUType) hwloc_obj_add_info(obj, "CPUType", CPUType); if (CPUModel) hwloc_obj_add_info(obj, "CPUModel", CPUModel); - hwloc_object_cpuset_from_array(obj, j, proc_physids, procid_max); + obj->cpuset = hwloc_bitmap_alloc(); + for(k=0; k<Pproc_max; k++) + if (Pproc[k].Lsock == j) + hwloc_bitmap_set(obj->cpuset, k); hwloc_debug_1arg_bitmap("Socket %d has cpuset %s\n", j, obj->cpuset); hwloc_insert_object_by_cpuset(topology, obj); } hwloc_debug("%s", "\n"); } - if (look_cores) - hwloc_setup_level(procid_max, numcores, oscoreids, proc_coreids, topology, HWLOC_OBJ_CORE); - - if (numprocs) - hwloc_setup_level(procid_max, numprocs, osprocids, proc_procids, topology, HWLOC_OBJ_PU); + if (look_cores) { + struct hwloc_obj *obj; + unsigned j,k; + hwloc_debug("%d Cores\n", Lcore_num); + for (j = 0; j < Lcore_num; j++) { + obj = hwloc_alloc_setup_object(HWLOC_OBJ_CORE, Lcore[j].Pcore); + obj->cpuset = hwloc_bitmap_alloc(); + for(k=0; k<Pproc_max; k++) + if (Pproc[k].Lcore == j) + hwloc_bitmap_set(obj->cpuset, k); + hwloc_debug_1arg_bitmap("Core %d has cpuset %s\n", j, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + hwloc_debug("%s", "\n"); + } + if (Lproc_num) { + struct hwloc_obj *obj; + unsigned j,k; + hwloc_debug("%d PUs\n", Lproc_num); + for (j = 0; j < Lproc_num; j++) { + obj = hwloc_alloc_setup_object(HWLOC_OBJ_PU, Lproc[j].Pproc); + obj->cpuset = hwloc_bitmap_alloc(); + for(k=0; k<Pproc_max; k++) + if (Pproc[k].Lproc == j) + hwloc_bitmap_set(obj->cpuset, k); + hwloc_debug_1arg_bitmap("PU %d has cpuset %s\n", j, obj->cpuset); + hwloc_insert_object_by_cpuset(topology, obj); + } + hwloc_debug("%s", "\n"); + } kstat_close(kc); - return numprocs > 0; + free(Pproc); + free(Lproc); + free(Lcore); + free(Lsock); + + return Lproc_num > 0; } #endif /* LIBKSTAT */