Before branching v1.3 and doing RC1, I'd like to discuss how I/O devices
are stored in the topology. We currently have dedicated pci device and
OS device lists together with custom helpers (get_next_pcidev/osdev).
Having helpers to traverse hostbridges or bridges may help in some
cases, but I don't think we want to add that many custom things.
So instead I wroted the attached patch which puts I/O devices in levels
so that they are almost regular objects. The only caveat is that those
objects are still special so they can't have a real depth, that's why I
am adding some special depth for them:
HWLOC_TYPE_DEPTH_BRIDGE = -3
HWLOC_TYPE_DEPTH_PCI_DEVICE = -4
HWLOC_TYPE_DEPTH_OS_DEVICE = -5
There might a way to use normal depth for I/O devices too, but it would
make the code much more ugly and I am not sure we really need this. And
we can still change this after RC1 anyway.
Pros:
* Uniform interface
+ no need for custom get_next_<type> helpers anymore (I kept them in
hwloc/helpers.h but they could go away)
+ helpers such as get_obj_by_type/depth work fine
+ documentation is more simple, we just need to say that they have
special depth
* Easy to extend to other special types without adding new helpers (just
need to add new special depth for new types
* Changes in the core are actually small
Cons:
* Several internal functions need special care for this new special depths
* The bridge levels contains objects that may be parent/child of each other
Opinions ?
Brice
diff --git a/doc/hwloc.doxy b/doc/hwloc.doxy
index e3684df..989a33c 100644
--- a/doc/hwloc.doxy
+++ b/doc/hwloc.doxy
@@ -1053,10 +1053,9 @@ as a child of the corresponding hwloc socket object.
These new objects have neither CPU sets nor node sets (NULL pointers) because
they are not directly usable by the user applications.
Moreover I/O hierarchies may be highly complex (asymmetric trees of bridges).
-So I/O objects are not placed in a level with a well-defined depth as
-regular objects do.
-Hence, they have no notion of cousins and their list may not be traversed with
-helpers such as hwloc_get_next_obj_by_depth() or hwloc_get_next_obj_by_type().
+So I/O objects are placed in specific levels with custom depth.
+Their lists may still be traversed with regular helpers such as
+hwloc_get_next_obj_by_type().
However, hwloc offers some dedicated helpers such as hwloc_get_next_pcidev()
and hwloc_get_next_osdev() for convenience (see \ref hwlocality_iodev and
\ref hwlocality_advanced_io).
diff --git a/include/hwloc.h b/include/hwloc.h
index 0da9ecb..b8c2874 100644
--- a/include/hwloc.h
+++ b/include/hwloc.h
@@ -195,20 +195,17 @@ typedef enum {
HWLOC_OBJ_BRIDGE, /**< \brief Bridge.
* Any bridge that connects the host or an I/O bus,
* to another I/O bus.
- * Bridge objects have neither CPU sets, nor node sets,
- * nor any level depth.
+ * Bridge objects have neither CPU sets nor node sets.
* They are not added to the topology unless I/O discovery
* is enabled with hwloc_topology_set_flags().
*/
HWLOC_OBJ_PCI_DEVICE, /**< \brief PCI device.
- * These objects have neither CPU sets, nor node sets,
- * nor any level depth.
+ * These objects have neither CPU sets nor node sets.
* They are not added to the topology unless I/O discovery
* is enabled with hwloc_topology_set_flags().
*/
HWLOC_OBJ_OS_DEVICE, /**< \brief Operating system device.
- * These objects have neither CPU sets, nor node sets,
- * nor any level depth.
+ * These objects have neither CPU sets nor node sets.
* They are not added to the topology unless I/O discovery
* is enabled with hwloc_topology_set_flags().
*/
@@ -960,16 +957,17 @@ HWLOC_DECLSPEC unsigned hwloc_topology_get_depth(hwloc_topology_t __hwloc_restri
* If some objects of the given type exist in different levels, for instance
* L1 and L2 caches, the function returns HWLOC_TYPE_DEPTH_MULTIPLE.
*
- * If an I/O object type is given, the function returns HWLOC_TYPE_DEPTH_UNKNOWN
- * because I/O objects are not stored in levels as other CPU-related objects do.
- * If you ever need to traverse the list of PCI or OS devices, you should use
- * hwloc_get_next_pcidev() or hwloc_get_next_osdev().
+ * If an I/O object type is given, the function returns a special value
+ * because I/O objects are stored in special levels that are not CPU-related.
*/
HWLOC_DECLSPEC int hwloc_get_type_depth (hwloc_topology_t topology, hwloc_obj_type_t type);
enum hwloc_get_type_depth_e {
HWLOC_TYPE_DEPTH_UNKNOWN = -1, /**< \brief No object of given type exists in the topology. \hideinitializer */
- HWLOC_TYPE_DEPTH_MULTIPLE = -2 /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */
+ HWLOC_TYPE_DEPTH_MULTIPLE = -2, /**< \brief Objects of given type exist at different depth in the topology. \hideinitializer */
+ HWLOC_TYPE_DEPTH_BRIDGE = -3, /**< \brief Virtual depth for bridge object level. \hideinitializer */
+ HWLOC_TYPE_DEPTH_PCI_DEVICE = -4, /**< \brief Virtual depth for PCI device object level. \hideinitializer */
+ HWLOC_TYPE_DEPTH_OS_DEVICE = -5 /**< \brief Virtual depth for software device object level. \hideinitializer */
};
/** \brief Returns the type of objects at depth \p depth.
@@ -1828,32 +1826,6 @@ HWLOC_DECLSPEC int hwloc_free(hwloc_topology_t topology, void *addr, size_t len)
-/** \defgroup hwlocality_iodev Basic I/O Device Management
- * @{
- */
-
-/** \brief Get the next PCI device in the system.
- *
- * This is useful for enumerating PCI device objects because
- * those are not available through a common level or depth.
- *
- * \return the first PCI device if \p prev is \c NULL.
- */
-HWLOC_DECLSPEC struct hwloc_obj * hwloc_get_next_pcidev(struct hwloc_topology *topology, struct hwloc_obj *prev);
-
-/** \brief Get the next OS device in the system.
- *
- * This is useful for enumerating OS device objects because
- * those are not available through a common level or depth.
- *
- * \return the first OS device if \p prev is \c NULL.
- */
-HWLOC_DECLSPEC struct hwloc_obj * hwloc_get_next_osdev(struct hwloc_topology *topology, struct hwloc_obj *prev);
-
-/** @} */
-
-
-
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/include/hwloc/helper.h b/include/hwloc/helper.h
index 1052484..07d316d 100644
--- a/include/hwloc/helper.h
+++ b/include/hwloc/helper.h
@@ -518,10 +518,12 @@ hwloc_get_shared_cache_covering_obj (hwloc_topology_t topology __hwloc_attribute
/** \brief Do a depth-first traversal of the topology to find and sort
*
- * all objects that are at the same depth than \p src.
- * Report in \p objs up to \p max physically closest ones to \p src.
+ * all objects that are at the same depth than \p src.
+ * Report in \p objs up to \p max physically closest ones to \p src.
*
- * \return the number of objects returned in \p objs.
+ * \return the number of objects returned in \p objs.
+ *
+ * \return 0 if \p src is an I/O object.
*/
/* TODO: rather provide an iterator? Provide a way to know how much should be allocated? By returning the total number of objects instead? */
HWLOC_DECLSPEC unsigned hwloc_get_closest_objs (hwloc_topology_t topology, hwloc_obj_t src, hwloc_obj_t * __hwloc_restrict objs, unsigned max);
@@ -1079,6 +1081,16 @@ hwloc_get_latency(hwloc_topology_t topology,
* @{
*/
+/** \brief Get the next PCI device in the system.
+ *
+ * \return the first PCI device if \p prev is \c NULL.
+ */
+static __inline hwloc_obj_t
+hwloc_get_next_pcidev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+ return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_PCI_DEVICE, prev);
+}
+
/** \brief Find the PCI device object matching the PCI bus id
* given domain, bus device and function PCI bus id.
*/
@@ -1115,6 +1127,16 @@ hwloc_get_pcidev_by_busidstring(hwloc_topology_t topology, const char *busid)
return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, func);
}
+/** \brief Get the next OS device in the system.
+ *
+ * \return the first OS device if \p prev is \c NULL.
+ */
+static __inline hwloc_obj_t
+hwloc_get_next_osdev(hwloc_topology_t topology, hwloc_obj_t prev)
+{
+ return hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_OS_DEVICE, prev);
+}
+
/** @} */
diff --git a/include/hwloc/rename.h b/include/hwloc/rename.h
index e6e0067..2f7ca82 100644
--- a/include/hwloc/rename.h
+++ b/include/hwloc/rename.h
@@ -215,9 +215,9 @@ extern "C" {
#define hwloc_free HWLOC_NAME(free)
#define hwloc_get_next_pcidev HWLOC_NAME(get_next_pcidev)
-#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev)
#define hwloc_get_pcidev_by_busid HWLOC_NAME(get_pcidev_by_busid)
#define hwloc_get_pcidev_by_busidstring HWLOC_NAME(get_pcidev_by_busidstring)
+#define hwloc_get_next_osdev HWLOC_NAME(get_next_osdev)
/* hwloc/bitmap.h */
diff --git a/include/private/private.h b/include/private/private.h
index 8187b52..70c9c0a 100644
--- a/include/private/private.h
+++ b/include/private/private.h
@@ -67,7 +67,15 @@ struct hwloc_topology {
int is_loaded;
hwloc_pid_t pid; /* Process ID the topology is view from, 0 for self */
- struct hwloc_obj *first_pcidev, *last_pcidev, *first_osdev, *last_osdev;
+ unsigned bridge_nbobjects;
+ struct hwloc_obj **bridge_level;
+ struct hwloc_obj *first_bridge, *last_bridge;
+ unsigned pcidev_nbobjects;
+ struct hwloc_obj **pcidev_level;
+ struct hwloc_obj *first_pcidev, *last_pcidev;
+ unsigned osdev_nbobjects;
+ struct hwloc_obj **osdev_level;
+ struct hwloc_obj *first_osdev, *last_osdev;
int (*set_thisproc_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags);
int (*get_thisproc_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags);
diff --git a/src/topology.c b/src/topology.c
index 27c28c7..445bd41 100644
--- a/src/topology.c
+++ b/src/topology.c
@@ -810,7 +810,16 @@ append_iodevs(hwloc_topology_t topology, hwloc_obj_t obj)
{
hwloc_obj_t child, *temp;
- if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
+ if (obj->type == HWLOC_OBJ_BRIDGE) {
+ /* Insert in the main bridge list */
+ if (topology->first_bridge) {
+ obj->prev_cousin = topology->last_bridge;
+ obj->prev_cousin->next_cousin = obj;
+ topology->last_bridge = obj;
+ } else {
+ topology->first_bridge = topology->last_bridge = obj;
+ }
+ } else if (obj->type == HWLOC_OBJ_PCI_DEVICE) {
/* Insert in the main pcidev list */
if (topology->first_pcidev) {
obj->prev_cousin = topology->last_pcidev;
@@ -1519,6 +1528,33 @@ hwloc_level_take_objects(hwloc_topology_t topology,
return new_i;
}
+static unsigned
+hwloc_build_level_from_list(struct hwloc_obj *first, struct hwloc_obj ***levelp)
+{
+ unsigned i, nb;
+ struct hwloc_obj * obj;
+
+ /* count */
+ obj = first;
+ i = 0;
+ while (obj) {
+ i++;
+ obj = obj->next_cousin;
+ }
+ nb = i;
+
+ /* allocate and fill level */
+ *levelp = malloc(nb * sizeof(struct hwloc_obj *));
+ obj = first;
+ i = 0;
+ while (obj) {
+ (*levelp)[i++] = obj;
+ obj = obj->next_cousin;
+ }
+
+ return nb;
+}
+
/*
* Do the remaining work that hwloc_connect_children() did not do earlier.
*/
@@ -1540,11 +1576,25 @@ hwloc_connect_levels(hwloc_topology_t topology)
/* initialize all depth to unknown */
for (l = HWLOC_OBJ_SYSTEM; l < HWLOC_OBJ_TYPE_MAX; l++)
topology->type_depth[l] = HWLOC_TYPE_DEPTH_UNKNOWN;
+ /* initialize root type depth */
topology->type_depth[topology->levels[0][0]->type] = 0;
- /* initialize special I/O device levels */
+ /* initialize I/O special levels */
+ free(topology->bridge_level);
+ topology->bridge_level = NULL;
+ topology->bridge_nbobjects = 0;
+ topology->first_bridge = topology->last_bridge = NULL;
+ topology->type_depth[HWLOC_OBJ_BRIDGE] = HWLOC_TYPE_DEPTH_BRIDGE;
+ free(topology->pcidev_level);
+ topology->pcidev_level = NULL;
+ topology->pcidev_nbobjects = 0;
topology->first_pcidev = topology->last_pcidev = NULL;
+ topology->type_depth[HWLOC_OBJ_PCI_DEVICE] = HWLOC_TYPE_DEPTH_PCI_DEVICE;
+ free(topology->osdev_level);
+ topology->osdev_level = NULL;
+ topology->osdev_nbobjects = 0;
topology->first_osdev = topology->last_osdev = NULL;
+ topology->type_depth[HWLOC_OBJ_OS_DEVICE] = HWLOC_TYPE_DEPTH_OS_DEVICE;
/* Start with children of the whole system. */
l = 0;
@@ -1652,6 +1702,10 @@ hwloc_connect_levels(hwloc_topology_t topology)
/* It's empty now. */
free(objs);
+ topology->bridge_nbobjects = hwloc_build_level_from_list(topology->first_bridge, &topology->bridge_level);
+ topology->pcidev_nbobjects = hwloc_build_level_from_list(topology->first_pcidev, &topology->pcidev_level);
+ topology->osdev_nbobjects = hwloc_build_level_from_list(topology->first_osdev, &topology->osdev_level);
+
return 0;
}
@@ -2165,6 +2219,12 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology)
topology->level_nbobjects[0] = 1;
/* NULLify other levels so that we can detect and free old ones in hwloc_connect_levels() if needed */
memset(topology->levels+1, 0, (HWLOC_DEPTH_MAX-1)*sizeof(*topology->levels));
+ topology->bridge_level = NULL;
+ topology->pcidev_level = NULL;
+ topology->osdev_level = NULL;
+ topology->first_bridge = topology->last_bridge = NULL;
+ topology->first_pcidev = topology->last_pcidev = NULL;
+ topology->first_osdev = topology->last_osdev = NULL;
/* Create the actual machine object, but don't touch its attributes yet
* since the OS backend may still change the object into something else
@@ -2175,9 +2235,6 @@ hwloc_topology_setup_defaults(struct hwloc_topology *topology)
root_obj->logical_index = 0;
root_obj->sibling_rank = 0;
topology->levels[0][0] = root_obj;
-
- topology->first_pcidev = topology->last_pcidev = NULL;
- topology->first_osdev = topology->last_osdev = NULL;
}
int
@@ -2391,6 +2448,9 @@ hwloc_topology_clear (struct hwloc_topology *topology)
hwloc_topology_clear_tree (topology, topology->levels[0][0]);
for (l=0; l<topology->nb_levels; l++)
free(topology->levels[l]);
+ free(topology->bridge_level);
+ free(topology->pcidev_level);
+ free(topology->osdev_level);
}
void
diff --git a/src/traversal.c b/src/traversal.c
index 166bbe0..b0793cf 100644
--- a/src/traversal.c
+++ b/src/traversal.c
@@ -22,7 +22,16 @@ hwloc_obj_type_t
hwloc_get_depth_type (hwloc_topology_t topology, unsigned depth)
{
if (depth >= topology->nb_levels)
- return (hwloc_obj_type_t) -1;
+ switch (depth) {
+ case HWLOC_TYPE_DEPTH_BRIDGE:
+ return HWLOC_OBJ_BRIDGE;
+ case HWLOC_TYPE_DEPTH_PCI_DEVICE:
+ return HWLOC_OBJ_PCI_DEVICE;
+ case HWLOC_TYPE_DEPTH_OS_DEVICE:
+ return HWLOC_OBJ_OS_DEVICE;
+ default:
+ return (hwloc_obj_type_t) -1;
+ }
return topology->levels[depth][0]->type;
}
@@ -30,7 +39,16 @@ unsigned
hwloc_get_nbobjs_by_depth (struct hwloc_topology *topology, unsigned depth)
{
if (depth >= topology->nb_levels)
- return 0;
+ switch (depth) {
+ case HWLOC_TYPE_DEPTH_BRIDGE:
+ return topology->bridge_nbobjects;
+ case HWLOC_TYPE_DEPTH_PCI_DEVICE:
+ return topology->pcidev_nbobjects;
+ case HWLOC_TYPE_DEPTH_OS_DEVICE:
+ return topology->osdev_nbobjects;
+ default:
+ return 0;
+ }
return topology->level_nbobjects[depth];
}
@@ -38,36 +56,21 @@ struct hwloc_obj *
hwloc_get_obj_by_depth (struct hwloc_topology *topology, unsigned depth, unsigned idx)
{
if (depth >= topology->nb_levels)
- return NULL;
+ switch (depth) {
+ case HWLOC_TYPE_DEPTH_BRIDGE:
+ return idx < topology->bridge_nbobjects ? topology->bridge_level[idx] : NULL;
+ case HWLOC_TYPE_DEPTH_PCI_DEVICE:
+ return idx < topology->pcidev_nbobjects ? topology->pcidev_level[idx] : NULL;
+ case HWLOC_TYPE_DEPTH_OS_DEVICE:
+ return idx < topology->osdev_nbobjects ? topology->osdev_level[idx] : NULL;
+ default:
+ return NULL;
+ }
if (idx >= topology->level_nbobjects[depth])
return NULL;
return topology->levels[depth][idx];
}
-struct hwloc_obj *
-hwloc_get_next_pcidev(struct hwloc_topology *topology, struct hwloc_obj *prev)
-{
- if (prev) {
- if (prev->type != HWLOC_OBJ_PCI_DEVICE)
- return NULL;
- return prev->next_cousin;
- } else {
- return topology->first_pcidev;
- }
-}
-
-struct hwloc_obj *
-hwloc_get_next_osdev(struct hwloc_topology *topology, struct hwloc_obj *prev)
-{
- if (prev) {
- if (prev->type != HWLOC_OBJ_OS_DEVICE)
- return NULL;
- return prev->next_cousin;
- } else {
- return topology->first_osdev;
- }
-}
-
unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_obj *src, struct hwloc_obj **objs, unsigned max)
{
struct hwloc_obj *parent, *nextparent, **src_objs;
@@ -77,9 +80,6 @@ unsigned hwloc_get_closest_objs (struct hwloc_topology *topology, struct hwloc_o
if (!src->cpuset)
return 0;
- src_nbobjects = topology->level_nbobjects[src->depth];
- src_objs = topology->levels[src->depth];
-
parent = src;
while (stored < max) {
while (1) {
diff --git a/tests/hwloc_iodevs.c b/tests/hwloc_iodevs.c
index 6560b1c..65e3285 100644
--- a/tests/hwloc_iodevs.c
+++ b/tests/hwloc_iodevs.c
@@ -37,9 +37,9 @@ int main(void)
printf("Found OS device %s subtype %d\n", obj->name, obj->attr->osdev.type);
}
- assert(HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE));
- assert(HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE));
- assert(HWLOC_TYPE_DEPTH_UNKNOWN == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE));
+ assert(HWLOC_TYPE_DEPTH_BRIDGE == hwloc_get_type_depth(topology, HWLOC_OBJ_BRIDGE));
+ assert(HWLOC_TYPE_DEPTH_PCI_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_PCI_DEVICE));
+ assert(HWLOC_TYPE_DEPTH_OS_DEVICE == hwloc_get_type_depth(topology, HWLOC_OBJ_OS_DEVICE));
assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_PCI_DEVICE) < 0);
assert(hwloc_compare_types(HWLOC_OBJ_BRIDGE, HWLOC_OBJ_OS_DEVICE) < 0);
assert(hwloc_compare_types(HWLOC_OBJ_PCI_DEVICE, HWLOC_OBJ_OS_DEVICE) < 0);