opensm/osm_mesh.c: Improve VL utilization Add steps to the algorithm to further condition the netlist of switches that is handed to lash to analyze.
(1) The previous version sorted out all the links out of switches into dimension order’in the order the dimensions were found which is driven by the arbitrary choice of links out of the seed’switch. This takes the additional step of waiting until after the lengths along each dimension are measured then reordering the links out of each switch so that dimensions with the largest lengths are ordered first. i.e. if the mesh analysis discovers that the mesh is a 5x8x6 torus then the coordinates are reordered so that based on the order of links on each switch the mesh is an 8x6x5 torus. (2) The previous version left the order of switches in the switch list the same as when they were discovered by the SMPs which is some sort of depth first tree walk modified by the random return of MADs if multiples are outstanding. This takes the additional step of reordering the switches so that they are presented in odometer’order again with the dimension with the largest lengths changing fastest. With this algorithm, we always see what we believe is the optimal result for each size of toroidal mesh. As a footnote, when we later studied the effect of broken or imperfect meshes, we found that the results are dependent on the choice of origin for the mesh which clearly doesn't matter for a perfect torus with nodes sorted as described here. Thus there is some additional work required to try to understand how to select the optimal origin for meshes with defects. We saw that the number of VLs required was higher when the origin was near a defect so choosing one as far as possible from the defects seems to be the right idea. This still needs to be done since it will remove some fairly rare defect cases which now require 9 VLs to route. Signed-off-by: Robert Pearson <[email protected]> Signed-off-by: Hal Rosenstock <[email protected]> --- Changes from v1: Move dim_order array into mesh struct Remove unneeded dim_reverse array Pointed out by Sasha diff --git a/opensm/opensm/osm_mesh.c b/opensm/opensm/osm_mesh.c index 1867876..23fad87 100644 --- a/opensm/opensm/osm_mesh.c +++ b/opensm/opensm/osm_mesh.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008,2009 System Fabric Works, Inc. + * Copyright (c) 2008,2009 System Fabric Works, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -182,6 +182,7 @@ typedef struct _mesh { int *class_count; /* population of each class */ int dimension; /* mesh dimension */ int *size; /* an array to hold size of mesh */ + int dim_order[MAX_DIMENSION]; } mesh_t; /* @@ -1027,11 +1028,11 @@ static inline int ltmag(int a, int b) } /* - * reorder_links + * reorder_node_links * * reorder the links out of a switch in sign/dimension order */ -static int reorder_links(lash_t *p_lash, int sw) +static int reorder_node_links(lash_t *p_lash, mesh_t *mesh, int sw) { osm_log_t *p_log = &p_lash->p_osm->log; switch_t *s = p_lash->switches[sw]; @@ -1039,9 +1040,10 @@ static int reorder_links(lash_t *p_lash, int sw) int n = node->num_links; link_t **links; int *axes; - int i, j; + int i, j, k, l; int c; int next = 0; + int dimension = mesh->dimension; if (!(links = calloc(n, sizeof(link_t *)))) { OSM_LOG(p_log, OSM_LOG_ERROR, "Failed allocating temp array - out of memory\n"); @@ -1057,19 +1059,23 @@ static int reorder_links(lash_t *p_lash, int sw) /* * find the links with axes */ - for (j = 1; j <= 2*node->dimension; j++) { - c = j; - if (node->coord[(c-1)/2] > 0) - c = opposite(s, c); + for (i = 0; i < dimension; i++) { + j = mesh->dim_order[i]; + for (k = 1; k <= 2; k++) { + c = 2*j + k; - for (i = 0; i < n; i++) { - if (!node->links[i]) - continue; - if (node->axes[i] == c) { - links[next] = node->links[i]; - axes[next] = node->axes[i]; - node->links[i] = NULL; - next++; + if (node->coord[j] > 0) + c = opposite(s, c); + + for (l = 0; l < n; l++) { + if (!node->links[l]) + continue; + if (node->axes[l] == c) { + links[next] = node->links[l]; + axes[next] = node->axes[l]; + node->links[l] = NULL; + next++; + } } } } @@ -1099,9 +1105,9 @@ static int reorder_links(lash_t *p_lash, int sw) } /* - * measure geometry + * make_coord */ -static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed) +static int make_coord(lash_t *p_lash, mesh_t *mesh, int seed) { osm_log_t *p_log = &p_lash->p_osm->log; int i, j, k; @@ -1111,8 +1117,6 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed) int dimension = mesh->dimension; int num_switches = p_lash->num_switches; int assigned_axes = 0, unassigned_axes = 0; - int max[MAX_DIMENSION]; - int min[MAX_DIMENSION]; OSM_LOG_ENTER(p_log); @@ -1120,6 +1124,13 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed) s = p_lash->switches[sw]; s->node->coord = calloc(dimension, sizeof(int)); + if (!s->node->coord) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating coord - out of memory\n"); + OSM_LOG_EXIT(p_log); + return -1; + } + for (i = 0; i < dimension; i++) s->node->coord[i] = (sw == seed) ? 0 : LARGE; @@ -1163,14 +1174,36 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed) } } while (change); - for (sw = 0; sw < num_switches; sw++) { - if (reorder_links(p_lash, sw)) { - OSM_LOG_EXIT(p_log); - return -1; - } - } + OSM_LOG_EXIT(p_log); + return 0; +} + +/* + * measure geometry + */ +static int measure_geometry(lash_t *p_lash, mesh_t *mesh) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int i, j; + int sw; + switch_t *s; + int dimension = mesh->dimension; + int num_switches = p_lash->num_switches; + int max[MAX_DIMENSION]; + int min[MAX_DIMENSION]; + int size[MAX_DIMENSION]; + int max_size; + int max_index; + + OSM_LOG_ENTER(p_log); mesh->size = calloc(dimension, sizeof(int)); + if (!mesh->size) { + OSM_LOG(p_log, OSM_LOG_ERROR, + "Failed allocating size - out of memory\n"); + OSM_LOG_EXIT(p_log); + return -1; + } for (i = 0; i < dimension; i++) { max[i] = -LARGE; @@ -1191,7 +1224,48 @@ static int measure_geometry(lash_t *p_lash, mesh_t *mesh, int seed) } for (i = 0; i < dimension; i++) - mesh->size[i] = max[i] - min[i] + 1; + mesh->size[i] = size[i] = max[i] - min[i] + 1; + + /* + * find an order of dimensions that places largest + * sizes first since this seems to work best with LASH + */ + for (j = 0; j < dimension; j++) { + max_size = -1; + max_index = -1; + + for (i = 0; i < dimension; i++) { + if (size[i] > max_size) { + max_size = size[i]; + max_index = i; + } + } + + mesh->dim_order[j] = max_index; + size[max_index] = -1; + } + + OSM_LOG_EXIT(p_log); + return 0; +} + +/* + * reorder links + */ +static int reorder_links(lash_t *p_lash, mesh_t *mesh) +{ + osm_log_t *p_log = &p_lash->p_osm->log; + int sw; + int num_switches = p_lash->num_switches; + + OSM_LOG_ENTER(p_log); + + for (sw = 0; sw < num_switches; sw++) { + if (reorder_node_links(p_lash, mesh, sw)) { + OSM_LOG_EXIT(p_log); + return -1; + } + } OSM_LOG_EXIT(p_log); return 0; @@ -1387,7 +1461,13 @@ int osm_do_mesh_analysis(lash_t *p_lash) if (s->node->type) { make_geometry(p_lash, max_class_type); - if (measure_geometry(p_lash, mesh, max_class_type)) + if (make_coord(p_lash, mesh, max_class_type)) + goto err; + + if (measure_geometry(p_lash, mesh)) + goto err; + + if (reorder_links(p_lash, mesh)) goto err; p = buf; _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
