Hi Hal,
On 10:14 Thu 18 Jun , Hal Rosenstock wrote:
>
> rather than starting from VL 0
>
> Signed-off-by: Robert Pearson <[email protected]>
> Signed-off-by: Hal Rosenstock <[email protected]>
See the comments below.
<snip...>
> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index 12b5e34..540214c 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -478,7 +478,7 @@ static void balance_virtual_lanes(lash_t * p_lash,
> unsigned lanes_needed)
> cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
> int *num_mst_in_lane = p_lash->num_mst_in_lane;
> int ***virtual_location = p_lash->virtual_location;
> - int min_filled_lane, max_filled_lane, trials;
> + int min_filled_lane, max_filled_lane, trials, max_vl;
> int old_min_filled_lane, old_max_filled_lane, new_num_min_lane,
> new_num_max_lane;
> unsigned int i, j;
> @@ -486,9 +486,13 @@ static void balance_virtual_lanes(lash_t * p_lash,
> unsigned lanes_needed)
> int next_switch2, output_link2;
> int stop = 0, cycle_found;
> int cycle_found2;
> + osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
In this function 'opt' is used only to get 'lash_start_vl', so wouldn't
it be better to do something like:
unsigned start_vl = &p_lash->p_osm->subn.opt.lash_start_vl;
?
This is relevant for all other similar places in the patch.
>
> - max_filled_lane = 0;
> - min_filled_lane = lanes_needed - 1;
> + max_filled_lane = opt->lash_start_vl;
> + max_vl = lanes_needed + opt->lash_start_vl;
> + if (max_vl > IB_MAX_NUM_VLS)
> + max_vl = IB_MAX_NUM_VLS;
Isn't this case where LASH requires more VLs than available in IB?
> + min_filled_lane = max_vl - 1;
>
> trials = num_mst_in_lane[max_filled_lane];
> if (lanes_needed == 1)
> @@ -590,7 +594,7 @@ static void balance_virtual_lanes(lash_t * p_lash,
> unsigned lanes_needed)
> new_num_min_lane = MAX_INT;
> new_num_max_lane = 0;
>
> - for (i = 0; i < lanes_needed; i++) {
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
>
> if (num_mst_in_lane[i] < new_num_min_lane) {
> new_num_min_lane = num_mst_in_lane[i];
> @@ -673,12 +677,18 @@ static void free_lash_structures(lash_t * p_lash)
> {
> unsigned int i, j, k;
> unsigned num_switches = p_lash->num_switches;
> + int max_vl;
> osm_log_t *p_log = &p_lash->p_osm->log;
> + osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>
> OSM_LOG_ENTER(p_log);
>
> + max_vl = opt->lash_start_vl + p_lash->vl_min;
> + if (max_vl > IB_MAX_NUM_VLS)
> + max_vl = IB_MAX_NUM_VLS;
> +
> /* free cdg_vertex_matrix */
> - for (i = 0; i < p_lash->vl_min; i++) {
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
> for (j = 0; j < num_switches; j++) {
> for (k = 0; k < num_switches; k++)
> if (p_lash->cdg_vertex_matrix[i][j][k])
> @@ -715,13 +725,19 @@ static int init_lash_structures(lash_t * p_lash)
> osm_log_t *p_log = &p_lash->p_osm->log;
> int status = 0;
> unsigned int i, j, k;
> + int max_vl;
> + osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>
> OSM_LOG_ENTER(p_log);
>
> + max_vl = vl_min + opt->lash_start_vl;
I'm not following...
vl_min is total number of VLs available in a fabric, lash_start_vl is
VL number where LASH may start to use VLs, so VLs available for LASH are
lash_start_vl..vl_min. Assuming so what does 'max_vl = vl_min +
lash_start_vl' represent? It is always overflow in terms of available
VLs, no?
> + if (max_vl > IB_MAX_NUM_VLS)
> + max_vl = IB_MAX_NUM_VLS;
> +
> /* initialise
> cdg_vertex_matrix[num_switches][num_switches][num_switches] */
> p_lash->cdg_vertex_matrix =
> - (cdg_vertex_t ****) malloc(vl_min * sizeof(cdg_vertex_t ****));
> - for (i = 0; i < vl_min; i++) {
> + (cdg_vertex_t ****) calloc(max_vl, sizeof(cdg_vertex_t ****));
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
> p_lash->cdg_vertex_matrix[i] =
> (cdg_vertex_t ***) malloc(num_switches *
> sizeof(cdg_vertex_t ***));
> @@ -730,7 +746,7 @@ static int init_lash_structures(lash_t * p_lash)
> goto Exit_Mem_Error;
> }
>
> - for (i = 0; i < vl_min; i++) {
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
> for (j = 0; j < num_switches; j++) {
> p_lash->cdg_vertex_matrix[i][j] =
> (cdg_vertex_t **) malloc(num_switches *
> @@ -763,7 +779,7 @@ static int init_lash_structures(lash_t * p_lash)
> for (i = 0; i < num_switches; i++) {
> for (j = 0; j < num_switches; j++) {
> p_lash->virtual_location[i][j] =
> - (int *)malloc(vl_min * sizeof(int *));
> + (int *)calloc(max_vl, sizeof(int *));
> if (p_lash->virtual_location[i][j] == NULL)
> goto Exit_Mem_Error;
> for (k = 0; k < vl_min; k++) {
> @@ -804,10 +820,12 @@ static int lash_core(lash_t * p_lash)
> int cycle_found2 = 0;
> int status = 0;
> int *switch_bitmap = NULL; /* Bitmap to check if we have processed
> this pair */
> + int max_vl;
> + osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>
> OSM_LOG_ENTER(p_log);
>
> - if (p_lash->p_osm->subn.opt.do_mesh_analysis &&
> osm_do_mesh_analysis(p_lash)) {
> + if (opt->do_mesh_analysis && osm_do_mesh_analysis(p_lash)) {
> OSM_LOG(p_log, OSM_LOG_ERROR, "Mesh analysis failed\n");
> goto Exit;
> }
> @@ -838,11 +856,14 @@ static int lash_core(lash_t * p_lash)
> }
>
> for (i = 0; i < num_switches; i++) {
> - for (dest_switch = 0; dest_switch < num_switches; dest_switch++)
> + for (dest_switch = 0; dest_switch < num_switches;
> dest_switch++) {
> + max_vl = lanes_needed + opt->lash_start_vl;
> + if (max_vl > IB_MAX_NUM_VLS)
> + max_vl = IB_MAX_NUM_VLS;
Shouldn't comparison be done against lsh->vl_min (number VLs available
in a fabric)? And return an error in case if it is exceeded (which means
LASH requires more VLs than available)?
> if (dest_switch != i && switch_bitmap[i * num_switches
> + dest_switch] == 0) {
> - v_lane = 0;
> + v_lane = opt->lash_start_vl;
> stop = 0;
> - while (v_lane < lanes_needed && stop == 0) {
> + while (v_lane < max_vl && stop == 0) {
> generate_cdg_for_sp(p_lash, i,
> dest_switch, v_lane);
> generate_cdg_for_sp(p_lash,
> dest_switch, i, v_lane);
>
> @@ -906,7 +927,9 @@ static int lash_core(lash_t * p_lash)
> switches[dest_switch]->routing_table[i].lane =
> v_lane;
>
> if (cycle_found == 1 || cycle_found2 == 1) {
> - if (++lanes_needed > p_lash->vl_min)
> + lanes_needed++;
> + if (lanes_needed > p_lash->vl_min ||
> + opt->lash_start_vl + lanes_needed -
> 1 >= IB_DROP_VL)
Shouldn't be just
if (start_vl + lanes_needed > p_lash->vl_min)
?
> goto Error_Not_Enough_Lanes;
>
> generate_cdg_for_sp(p_lash, i,
> dest_switch, v_lane);
> @@ -926,19 +949,24 @@ static int lash_core(lash_t * p_lash)
> switch_bitmap[i * num_switches + dest_switch] =
> 1;
> switch_bitmap[dest_switch * num_switches + i] =
> 1;
> }
> + }
> }
>
> OSM_LOG(p_log, OSM_LOG_INFO,
> "Lanes needed: %d, Balancing\n", lanes_needed);
>
> - for (i = 0; i < lanes_needed; i++) {
> + max_vl = lanes_needed + opt->lash_start_vl;
> + if (max_vl > IB_MAX_NUM_VLS)
Shouldn't this be compared with p_lash->vl_min (which is maximum number
of VLs available in a fabric) and not with IB_MAX_NUM_VLS?
> + max_vl = IB_MAX_NUM_VLS;
When max_vl is larger than number of available VLs (IOW lash needs more
VLs than fabric has) shouldn't it fallback to another routing algorithm?
> +
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
> OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
> i, p_lash->num_mst_in_lane[i]);
> }
>
> balance_virtual_lanes(p_lash, lanes_needed);
>
> - for (i = 0; i < lanes_needed; i++) {
> + for (i = opt->lash_start_vl; i < max_vl; i++) {
> OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
> i, p_lash->num_mst_in_lane[i]);
> }
> @@ -1271,6 +1299,7 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, const
> osm_port_t * p_src_port,
> unsigned dst_id;
> unsigned src_id;
> osm_switch_t *p_sw;
> + osm_subn_opt_t *opt = &p_osm->subn.opt;
>
> if (p_osm->routing_engine_used != OSM_ROUTING_ENGINE_TYPE_LASH)
> return OSM_DEFAULT_SL;
> @@ -1286,7 +1315,7 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, const
> osm_port_t * p_src_port,
>
> src_id = get_lash_id(p_sw);
> if (src_id == dst_id)
> - return OSM_DEFAULT_SL;
> + return opt->lash_start_vl;
Is this correct? As far as I understand this is SL for paths between
CA ports connected to the same switch (which should be safe in sense of
credit loops). Should lash be involved here?
Sasha
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general
To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general