Hi Hal,

On 12:41 Fri 07 Aug     , Hal Rosenstock wrote:
> 
> Similar to previous patch to "Parallelize (Stripe) LFT sets across switches".
> Currently, MADs are pipelined to a single switch first which effectively
> serializes these requests. This patch pipelines the MFT set MADs across
> switches first (before cycling to the next MFT block) so that multiple
> switches can be responding concurrently. Speedup is dependent on number
> of MFT blocks in use (number of MLIDs) which is dependent on the number
> of multicast groups.
> 
> Signed-off-by: Hal Rosenstock <[email protected]>
> ---
> diff --git a/opensm/include/opensm/osm_switch.h 
> b/opensm/include/opensm/osm_switch.h
> index 7ce28c5..e281842 100644
> --- a/opensm/include/opensm/osm_switch.h
> +++ b/opensm/include/opensm/osm_switch.h
> @@ -1,6 +1,6 @@
>  /*
>   * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> - * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   *
>   * This software is available to you under a choice of one of two
> @@ -103,6 +103,8 @@ typedef struct osm_switch {
>       uint8_t *lft;
>       uint8_t *new_lft;
>       osm_mcast_tbl_t mcast_tbl;
> +     uint32_t mft_block_num;
> +     uint32_t mft_position;
>       unsigned endport_links;
>       unsigned need_update;
>       void *priv;
> diff --git a/opensm/opensm/osm_mcast_mgr.c b/opensm/opensm/osm_mcast_mgr.c
> index 4dbbaa0..f91c6b6 100644
> --- a/opensm/opensm/osm_mcast_mgr.c
> +++ b/opensm/opensm/osm_mcast_mgr.c
> @@ -1,6 +1,6 @@
>  /*
>   * Copyright (c) 2004-2008 Voltaire, Inc. All rights reserved.
> - * Copyright (c) 2002-2006 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 2002-2009 Mellanox Technologies LTD. All rights reserved.
>   * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
>   * Copyright (c) 2008 Xsigo Systems Inc.  All rights reserved.
>   *
> @@ -325,15 +325,12 @@ static int mcast_mgr_set_tbl(osm_sm_t * sm, IN 
> osm_switch_t * p_sw)
>  {
>       osm_node_t *p_node;
>       osm_dr_path_t *p_path;
> -     osm_madw_context_t mad_context;
> +     osm_madw_context_t context;
>       ib_api_status_t status;
> -     uint32_t block_id_ho = 0;
> -     int16_t block_num = 0;
> -     uint32_t position = 0;
> -     uint32_t max_position;
> +     uint32_t block_id_ho;
>       osm_mcast_tbl_t *p_tbl;
>       ib_net16_t block[IB_MCAST_BLOCK_SIZE];
> -     int ret = 0;
> +     int ret = -1;
>  
>       CL_ASSERT(sm);
>  
> @@ -353,36 +350,34 @@ static int mcast_mgr_set_tbl(osm_sm_t * sm, IN 
> osm_switch_t * p_sw)
>          configuration.
>        */
>  
> -     mad_context.mft_context.node_guid = osm_node_get_node_guid(p_node);
> -     mad_context.mft_context.set_method = TRUE;
> +     context.mft_context.node_guid = osm_node_get_node_guid(p_node);
> +     context.mft_context.set_method = TRUE;
>  
>       p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
> -     max_position = p_tbl->max_position;
>  
> -     while (osm_mcast_tbl_get_block(p_tbl, block_num,
> -                                    (uint8_t) position, block)) {
> -             OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> -                     "Writing MFT block 0x%X\n", block_id_ho);
> +     if (p_sw->mft_position <= p_tbl->max_position &&
> +         osm_mcast_tbl_get_block(p_tbl, p_sw->mft_block_num,
> +                                 (uint8_t) p_sw->mft_position, block)) {
> +
> +             block_id_ho = p_sw->mft_block_num + (p_sw->mft_position << 28);
>  
> -             block_id_ho = block_num + (position << 28);
> +             OSM_LOG(sm->p_log, OSM_LOG_DEBUG,
> +                     "Writing MFT block %u position %u to switch 0x%" PRIx64 
> "\n",
> +                     p_sw->mft_block_num, p_sw->mft_position,
> +                     cl_ntoh64(context.lft_context.node_guid));
>  
>               status = osm_req_set(sm, p_path, (void *)block, sizeof(block),
>                                    IB_MAD_ATTR_MCAST_FWD_TBL,
>                                    cl_hton32(block_id_ho), CL_DISP_MSGID_NONE,
> -                                  &mad_context);
> +                                  &context);
>  
> -             if (status != IB_SUCCESS) {
> +             if (status != IB_SUCCESS)
>                       OSM_LOG(sm->p_log, OSM_LOG_ERROR, "ERR 0A02: "
> -                             "Sending multicast fwd. tbl. block failed 
> (%s)\n",
> +                             "Sending MFT block failed (%s)\n",
>                               ib_get_err_str(status));
> -                     ret = -1;
> -             }
>  
> -             if (++position > max_position) {
> -                     position = 0;
> -                     block_num++;
> -             }
> -     }
> +     } else
> +             ret = 0;
>  
>       OSM_LOG_EXIT(sm->p_log);
>       return ret;
> @@ -1077,7 +1072,8 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
>       cl_qmap_t *p_sw_tbl;
>       cl_qlist_t *p_list = &sm->mgrp_list;
>       osm_mgrp_t *p_mgrp;
> -     int i, ret = 0;
> +     osm_mcast_tbl_t *p_tbl;
> +     int sws_notdone, i, ret = 0;
>  
>       OSM_LOG_ENTER(sm->p_log);
>  
> @@ -1114,11 +1110,30 @@ int osm_mcast_mgr_process(osm_sm_t * sm)
>        */
>       p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
>       while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
> -             if (mcast_mgr_set_tbl(sm, p_sw))
> -                     ret = -1;
> +             p_sw->mft_block_num = 0;
> +             p_sw->mft_position = 0;
>               p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
>       }
>  
> +     while (1) {
> +             p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
> +             sws_notdone = 0;
> +             while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
> +                     if (mcast_mgr_set_tbl(sm, p_sw))
> +                             sws_notdone++;
> +                     p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
> +                     if (++p_sw->mft_position > p_tbl->max_position) {
> +                             p_sw->mft_position = 0;
> +                             p_sw->mft_block_num++;
> +                     }
> +                     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
> +             }
> +             if (!sws_notdone) {
> +                     ret = -1;
> +                     break;
> +             }

So osm_mcast_mgr_process() will always return -1 value? Why?

> +     }
> +
>       while (!cl_is_qlist_empty(p_list)) {
>               cl_list_item_t *p = cl_qlist_remove_head(p_list);
>               free(p);
> @@ -1142,9 +1157,10 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>       osm_switch_t *p_sw;
>       cl_qmap_t *p_sw_tbl;
>       osm_mgrp_t *p_mgrp;
> +     osm_mcast_tbl_t *p_tbl;
>       ib_net16_t mlid;
>       osm_mcast_mgr_ctxt_t *ctx;
> -     int ret = 0;
> +     int sws_notdone, ret = 0;
>  
>       OSM_LOG_ENTER(sm->p_log);
>  
> @@ -1195,11 +1211,30 @@ int osm_mcast_mgr_process_mgroups(osm_sm_t * sm)
>       p_sw_tbl = &sm->p_subn->sw_guid_tbl;
>       p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
>       while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
> -             if (mcast_mgr_set_tbl(sm, p_sw))
> -                     ret = -1;
> +             p_sw->mft_block_num = 0;
> +             p_sw->mft_position = 0;
>               p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
>       }
>  
> +     while (1) {
> +             p_sw = (osm_switch_t *) cl_qmap_head(p_sw_tbl);
> +             sws_notdone = 0;
> +             while (p_sw != (osm_switch_t *) cl_qmap_end(p_sw_tbl)) {
> +                     if (mcast_mgr_set_tbl(sm, p_sw))
> +                             sws_notdone++;
> +                     p_tbl = osm_switch_get_mcast_tbl_ptr(p_sw);
> +                     if (++p_sw->mft_position > p_tbl->max_position) {
> +                             p_sw->mft_position = 0;
> +                             p_sw->mft_block_num++;
> +                     }
> +                     p_sw = (osm_switch_t *) cl_qmap_next(&p_sw->map_item);
> +             }
> +             if (!sws_notdone) {
> +                     ret = -1;
> +                     break;

Ditto.

> +             }
> +     }
> +

Could you consolidate this code which is equivalent with one in
osm_mcast_mgr_process() in single function say
mcast_mgr_set_mftables()?

Also similar to LFTs case it would be nice to simplify this tables setup
loop.

Sasha

>       osm_dump_mcast_routes(sm->p_subn->p_osm);
>  
>  exit:
> 
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to