Hi Hal,

On 10:14 Thu 18 Jun     , Hal Rosenstock wrote:
> 
> rather than starting from VL 0
> 
> Signed-off-by: Robert Pearson <[email protected]>
> Signed-off-by: Hal Rosenstock <[email protected]>

See the comments below.

<snip...>

> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index 12b5e34..540214c 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -478,7 +478,7 @@ static void balance_virtual_lanes(lash_t * p_lash, 
> unsigned lanes_needed)
>       cdg_vertex_t ****cdg_vertex_matrix = p_lash->cdg_vertex_matrix;
>       int *num_mst_in_lane = p_lash->num_mst_in_lane;
>       int ***virtual_location = p_lash->virtual_location;
> -     int min_filled_lane, max_filled_lane, trials;
> +     int min_filled_lane, max_filled_lane, trials, max_vl;
>       int old_min_filled_lane, old_max_filled_lane, new_num_min_lane,
>           new_num_max_lane;
>       unsigned int i, j;
> @@ -486,9 +486,13 @@ static void balance_virtual_lanes(lash_t * p_lash, 
> unsigned lanes_needed)
>       int next_switch2, output_link2;
>       int stop = 0, cycle_found;
>       int cycle_found2;
> +     osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;

In this function 'opt' is used only to get 'lash_start_vl', so wouldn't
it be better to do something like:

        unsigned start_vl = &p_lash->p_osm->subn.opt.lash_start_vl;
?

This is relevant for all other similar places in the patch.

>  
> -     max_filled_lane = 0;
> -     min_filled_lane = lanes_needed - 1;
> +     max_filled_lane = opt->lash_start_vl;
> +     max_vl = lanes_needed + opt->lash_start_vl;
> +     if (max_vl > IB_MAX_NUM_VLS)
> +             max_vl = IB_MAX_NUM_VLS;

Isn't this case where LASH requires more VLs than available in IB?

> +     min_filled_lane = max_vl - 1;
>  
>       trials = num_mst_in_lane[max_filled_lane];
>       if (lanes_needed == 1)
> @@ -590,7 +594,7 @@ static void balance_virtual_lanes(lash_t * p_lash, 
> unsigned lanes_needed)
>               new_num_min_lane = MAX_INT;
>               new_num_max_lane = 0;
>  
> -             for (i = 0; i < lanes_needed; i++) {
> +             for (i = opt->lash_start_vl; i < max_vl; i++) {
>  
>                       if (num_mst_in_lane[i] < new_num_min_lane) {
>                               new_num_min_lane = num_mst_in_lane[i];
> @@ -673,12 +677,18 @@ static void free_lash_structures(lash_t * p_lash)
>  {
>       unsigned int i, j, k;
>       unsigned num_switches = p_lash->num_switches;
> +     int max_vl;
>       osm_log_t *p_log = &p_lash->p_osm->log;
> +     osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>  
>       OSM_LOG_ENTER(p_log);
>  
> +     max_vl = opt->lash_start_vl + p_lash->vl_min;
> +     if (max_vl > IB_MAX_NUM_VLS)
> +             max_vl = IB_MAX_NUM_VLS;
> +
>       /* free cdg_vertex_matrix */
> -     for (i = 0; i < p_lash->vl_min; i++) {
> +     for (i = opt->lash_start_vl; i < max_vl; i++) {
>               for (j = 0; j < num_switches; j++) {
>                       for (k = 0; k < num_switches; k++)
>                               if (p_lash->cdg_vertex_matrix[i][j][k])
> @@ -715,13 +725,19 @@ static int init_lash_structures(lash_t * p_lash)
>       osm_log_t *p_log = &p_lash->p_osm->log;
>       int status = 0;
>       unsigned int i, j, k;
> +     int max_vl;
> +     osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>  
>       OSM_LOG_ENTER(p_log);
>  
> +     max_vl = vl_min + opt->lash_start_vl;

I'm not following...

vl_min is total number of VLs available in a fabric, lash_start_vl is
VL number where LASH may start to use VLs, so VLs available for LASH are
lash_start_vl..vl_min. Assuming so what does 'max_vl = vl_min +
lash_start_vl' represent? It is always overflow in terms of available
VLs, no?

> +     if (max_vl > IB_MAX_NUM_VLS)
> +             max_vl = IB_MAX_NUM_VLS;
> +
>       /* initialise 
> cdg_vertex_matrix[num_switches][num_switches][num_switches] */
>       p_lash->cdg_vertex_matrix =
> -         (cdg_vertex_t ****) malloc(vl_min * sizeof(cdg_vertex_t ****));
> -     for (i = 0; i < vl_min; i++) {
> +         (cdg_vertex_t ****) calloc(max_vl, sizeof(cdg_vertex_t ****));
> +     for (i = opt->lash_start_vl; i < max_vl; i++) {
>               p_lash->cdg_vertex_matrix[i] =
>                   (cdg_vertex_t ***) malloc(num_switches *
>                                             sizeof(cdg_vertex_t ***));
> @@ -730,7 +746,7 @@ static int init_lash_structures(lash_t * p_lash)
>                       goto Exit_Mem_Error;
>       }
>  
> -     for (i = 0; i < vl_min; i++) {
> +     for (i = opt->lash_start_vl; i < max_vl; i++) {
>               for (j = 0; j < num_switches; j++) {
>                       p_lash->cdg_vertex_matrix[i][j] =
>                           (cdg_vertex_t **) malloc(num_switches *
> @@ -763,7 +779,7 @@ static int init_lash_structures(lash_t * p_lash)
>       for (i = 0; i < num_switches; i++) {
>               for (j = 0; j < num_switches; j++) {
>                       p_lash->virtual_location[i][j] =
> -                         (int *)malloc(vl_min * sizeof(int *));
> +                         (int *)calloc(max_vl, sizeof(int *));
>                       if (p_lash->virtual_location[i][j] == NULL)
>                               goto Exit_Mem_Error;
>                       for (k = 0; k < vl_min; k++) {
> @@ -804,10 +820,12 @@ static int lash_core(lash_t * p_lash)
>       int cycle_found2 = 0;
>       int status = 0;
>       int *switch_bitmap = NULL;      /* Bitmap to check if we have processed 
> this pair */
> +     int max_vl;
> +     osm_subn_opt_t *opt = &p_lash->p_osm->subn.opt;
>  
>       OSM_LOG_ENTER(p_log);
>  
> -     if (p_lash->p_osm->subn.opt.do_mesh_analysis && 
> osm_do_mesh_analysis(p_lash)) {
> +     if (opt->do_mesh_analysis && osm_do_mesh_analysis(p_lash)) {
>               OSM_LOG(p_log, OSM_LOG_ERROR, "Mesh analysis failed\n");
>               goto Exit;
>       }
> @@ -838,11 +856,14 @@ static int lash_core(lash_t * p_lash)
>       }
>  
>       for (i = 0; i < num_switches; i++) {
> -             for (dest_switch = 0; dest_switch < num_switches; dest_switch++)
> +             for (dest_switch = 0; dest_switch < num_switches; 
> dest_switch++) {
> +                     max_vl = lanes_needed + opt->lash_start_vl;
> +                     if (max_vl > IB_MAX_NUM_VLS)
> +                             max_vl = IB_MAX_NUM_VLS;

Shouldn't comparison be done against lsh->vl_min (number VLs available
in a fabric)? And return an error in case if it is exceeded (which means
LASH requires more VLs than available)?

>                       if (dest_switch != i && switch_bitmap[i * num_switches 
> + dest_switch] == 0) {
> -                             v_lane = 0;
> +                             v_lane = opt->lash_start_vl;
>                               stop = 0;
> -                             while (v_lane < lanes_needed && stop == 0) {
> +                             while (v_lane < max_vl && stop == 0) {
>                                       generate_cdg_for_sp(p_lash, i, 
> dest_switch, v_lane);
>                                       generate_cdg_for_sp(p_lash, 
> dest_switch, i, v_lane);
>  
> @@ -906,7 +927,9 @@ static int lash_core(lash_t * p_lash)
>                               switches[dest_switch]->routing_table[i].lane = 
> v_lane;
>  
>                               if (cycle_found == 1 || cycle_found2 == 1) {
> -                                     if (++lanes_needed > p_lash->vl_min)
> +                                     lanes_needed++;
> +                                     if (lanes_needed > p_lash->vl_min ||
> +                                         opt->lash_start_vl + lanes_needed - 
> 1 >= IB_DROP_VL)

Shouldn't be just

        if (start_vl + lanes_needed > p_lash->vl_min)

?

>                                               goto Error_Not_Enough_Lanes;
>  
>                                       generate_cdg_for_sp(p_lash, i, 
> dest_switch, v_lane);
> @@ -926,19 +949,24 @@ static int lash_core(lash_t * p_lash)
>                               switch_bitmap[i * num_switches + dest_switch] = 
> 1;
>                               switch_bitmap[dest_switch * num_switches + i] = 
> 1;
>                       }
> +             }
>       }
>  
>       OSM_LOG(p_log, OSM_LOG_INFO,
>               "Lanes needed: %d, Balancing\n", lanes_needed);
>  
> -     for (i = 0; i < lanes_needed; i++) {
> +     max_vl = lanes_needed + opt->lash_start_vl;
> +     if (max_vl > IB_MAX_NUM_VLS)

Shouldn't this be compared with p_lash->vl_min (which is maximum number
of VLs available in a fabric) and not with IB_MAX_NUM_VLS?

> +             max_vl = IB_MAX_NUM_VLS;

When max_vl is larger than number of available VLs (IOW lash needs more
VLs than fabric has) shouldn't it fallback to another routing algorithm?

> +
> +     for (i = opt->lash_start_vl; i < max_vl; i++) {
>               OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
>                       i, p_lash->num_mst_in_lane[i]);
>       }
>  
>       balance_virtual_lanes(p_lash, lanes_needed);
>  
> -     for (i = 0; i < lanes_needed; i++) {
> +     for (i = opt->lash_start_vl; i < max_vl; i++) {
>               OSM_LOG(p_log, OSM_LOG_INFO, "Lanes in layer %d: %d\n",
>                       i, p_lash->num_mst_in_lane[i]);
>       }
> @@ -1271,6 +1299,7 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, const 
> osm_port_t * p_src_port,
>       unsigned dst_id;
>       unsigned src_id;
>       osm_switch_t *p_sw;
> +     osm_subn_opt_t *opt = &p_osm->subn.opt;
>  
>       if (p_osm->routing_engine_used != OSM_ROUTING_ENGINE_TYPE_LASH)
>               return OSM_DEFAULT_SL;
> @@ -1286,7 +1315,7 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, const 
> osm_port_t * p_src_port,
>  
>       src_id = get_lash_id(p_sw);
>       if (src_id == dst_id)
> -             return OSM_DEFAULT_SL;
> +             return opt->lash_start_vl;

Is this correct? As far as I understand this is SL for paths between
CA ports connected to the same switch (which should be safe in sense of
credit loops). Should lash be involved here?

Sasha
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to