Hi Al,

Sorry about huge delay. Some questions are below.

On 14:12 Thu 10 Apr     , Al Chu wrote:
> This is the primary patch that fiddles with the path recommendation
> code.  A few notes:
> 
> 1) b/c I want to keep track of how many remote destinations there can
> be, the 'remote_guids' array now stores all remote destinations, not
> just the ones we have already forwarded to.
> 
> 2) b/c I may need to free memory, I now "goto Exit" instead of just
> calling 'return' many times.
> 
> 3) Although the option is called 'port_offsetting', I actually "offset"
> both the remote destination I send to and the port pointing towards that
> remote destination.
> 
> Al
> 
> -- 
> Albert Chu
> [EMAIL PROTECTED]
> 925-422-5311
> Computer Scientist
> High Performance Systems Division
> Lawrence Livermore National Laboratory

> From 57eb4d9bf55fbbbf39dc1c7ddfeeb2cae4776ef0 Mon Sep 17 00:00:00 2001
> From: Albert L. Chu <[EMAIL PROTECTED]>
> Date: Thu, 20 Mar 2008 16:23:13 -0700
> Subject: [PATCH] implement port_offsetting
> 
> 
> Signed-off-by: Albert L. Chu <[EMAIL PROTECTED]>
> ---
>  opensm/include/opensm/osm_switch.h |    7 +-
>  opensm/opensm/osm_dump.c           |    3 +-
>  opensm/opensm/osm_switch.c         |  246 
> ++++++++++++++++++++++++++++++++----
>  opensm/opensm/osm_ucast_mgr.c      |   14 ++-
>  4 files changed, 239 insertions(+), 31 deletions(-)
> 
> diff --git a/opensm/include/opensm/osm_switch.h 
> b/opensm/include/opensm/osm_switch.h
> index 2624d5f..45f4718 100644
> --- a/opensm/include/opensm/osm_switch.h
> +++ b/opensm/include/opensm/osm_switch.h
> @@ -997,7 +997,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                         IN const boolean_t dor,
>                         IN OUT osm_switch_guid_count_t * remote_guids,
>                         IN OUT uint16_t * p_num_remote_guids,
> -                       IN OUT osm_switch_guid_count_t ** 
> p_remote_guid_count_used);
> +                       IN OUT osm_switch_guid_count_t ** 
> p_remote_guid_count_used,
> +                       IN uint16_t port_offsetting_lids_per_port);
>  /*
>  * PARAMETERS
>  *    p_sw
> @@ -1031,6 +1032,10 @@ osm_switch_recommend_path(IN const osm_switch_t * 
> const p_sw,
>  *            [in out] The specific osm_switch_guid_count_t used
>  *            in switch recommendations.
>  *
> +*    port_offsetting_lids_per_port
> +*            [in] If > 0, indicates lids_per_port to use with
> +*            port_offsetting option.
> +*
>  * RETURN VALUE
>  *    Returns the recommended port on which to route this LID.
>  *
> diff --git a/opensm/opensm/osm_dump.c b/opensm/opensm/osm_dump.c
> index 2bac75a..37842ac 100644
> --- a/opensm/opensm/osm_dump.c
> +++ b/opensm/opensm/osm_dump.c
> @@ -219,7 +219,8 @@ static void dump_ucast_routes(cl_map_item_t *p_map_item, 
> FILE *file, void *cxt)
>                       /* No LMC Optimization */
>                       best_port = osm_switch_recommend_path(p_sw, p_port,
>                                                             lid_ho, TRUE, dor,
> -                                                           NULL, NULL, NULL);
> +                                                           NULL, NULL,
> +                                                           NULL, 0);
>                       fprintf(file, "No %u hop path possible via port %u!",
>                               best_hops, best_port);
>               }
> diff --git a/opensm/opensm/osm_switch.c b/opensm/opensm/osm_switch.c
> index f346b25..8e3342b 100644
> --- a/opensm/opensm/osm_switch.c
> +++ b/opensm/opensm/osm_switch.c
> @@ -55,6 +55,15 @@
>  #include <iba/ib_types.h>
>  #include <opensm/osm_switch.h>
>  
> +/* Local structs */
> +struct osm_switch_remote_dest {
> +     uint32_t total_paths;   
> +     uint32_t min_paths;
> +     uint8_t ports[256]; /* 256 b/c max_ports is a uint8_t */
> +     unsigned int ports_count;
> +     osm_switch_guid_count_t *p_remote_guid;
> +};
> +
>  /**********************************************************************
>   **********************************************************************/
>  cl_status_t
> @@ -304,9 +313,32 @@ osm_switch_find_guid_count(IN const osm_switch_t * const 
> p_sw,
>                                          1);
>  }
>  
> +/**********************************************************************
> + **********************************************************************/
> +
> +/* greatest common divisor */
> +static unsigned int
> +_gcd(unsigned int a, unsigned int b)
> +{
> +     unsigned int t;
> +     while (b != 0) {
> +             t = b;
> +             b = a % b;
> +             a = t;
> +     }
> +     return a;
> +}
> +
> +/* least common multiple */
> +static unsigned int
> +_lcm(unsigned int a, unsigned int b)
> +{
> +     return ((a*b) / _gcd(a,b));
> +}
>  
>  /**********************************************************************
>   **********************************************************************/
> +
>  uint8_t
>  osm_switch_recommend_path(IN const osm_switch_t * const p_sw,
>                         IN osm_port_t * p_port,
> @@ -315,7 +347,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                         IN const boolean_t dor,
>                         IN OUT osm_switch_guid_count_t * remote_guids,
>                         IN OUT uint16_t * p_num_remote_guids,
> -                       IN OUT osm_switch_guid_count_t ** 
> p_remote_guid_count_used)
> +                       IN OUT osm_switch_guid_count_t ** 
> p_remote_guid_count_used,
> +                       IN uint16_t port_offsetting_lids_per_port)
>  {
>       /*
>          We support an enhanced LMC aware routing mode:
> @@ -356,6 +389,20 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>       osm_node_t *p_rem_node;
>       osm_node_t *p_rem_node_first = NULL;
>       osm_switch_guid_count_t *p_remote_guid = NULL;
> +     /*
> +        These vars track information for port offsetting.
> +      */
> +     boolean_t port_offsetting = remote_guids && p_num_remote_guids
> +             && p_remote_guid_count_used && port_offsetting_lids_per_port;
> +     struct osm_switch_remote_dest * remote_dests = NULL;
> +     struct osm_switch_remote_dest * p_remote_dest = NULL;
> +     uint32_t num_remote_dests = 0;
> +     uint32_t total_paths_count = 0;
> +     uint32_t num_potential_ports = 0;
> +     uint32_t lcm = 0;
> +     uint32_t indx = 0;
> +     boolean_t dest_found = FALSE;
> +     unsigned int i;
>  
>       CL_ASSERT(lid_ho > 0);
>  
> @@ -378,9 +425,22 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>  
>       num_ports = p_sw->num_ports;
>  
> +     if (port_offsetting) {
> +             remote_dests = malloc(sizeof(struct osm_switch_remote_dest) * 
> num_ports);
> +             if (remote_dests == NULL) {
> +                     osm_log(p_sw->p_log, OSM_LOG_ERROR,
> +                             "osm_switch_recommend_path: "
> +                             "Cannot allocate array. Insufficient memory: "
> +                             "Disabling port_offsetting\n");
> +                     port_offsetting = 0;
> +             }
> +     }
> +
>       least_hops = osm_switch_get_least_hops(p_sw, base_lid);
> -     if (least_hops == OSM_NO_PATH)
> -             return (OSM_NO_PATH);
> +     if (least_hops == OSM_NO_PATH) {
> +             best_port = OSM_NO_PATH;
> +             goto Exit;
> +     }
>  
>       /*
>          First, inquire with the forwarding table for an existing
> @@ -417,8 +477,10 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                                  in the forwarding tables that he wants to be 
> overridden by the
>                                  minimum hop function.
>                                */
> -                             if (hops == least_hops)
> -                                     return (port_num);
> +                             if (hops == least_hops) {
> +                                     best_port = port_num;
> +                                     goto Exit;
> +                             }
>                       }
>               }
>       }

Maybe to move remote_dests allocation to be here and so minimize needed
'goto Exit' changes?

> @@ -475,7 +537,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                                                                      
> port_num);
>  
>                       /* If not update the least hops for this case */
> -                     if (!p_remote_guid) {
> +                     if (!p_remote_guid
> +                         || !p_remote_guid->forwarded_to) {
>                               if (check_count < least_paths_other_sys) {
>                                       least_paths_other_sys = check_count;
>                                       best_port_other_sys = port_num;
> @@ -489,7 +552,8 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                                                                               
> port_num);
>  
>                               /* If not update the least hops for this case */
> -                             if (!p_remote_guid
> +                             if ((!p_remote_guid
> +                                  || !p_remote_guid->forwarded_to)
>                                   && check_count < least_paths_other_nodes) {
>                                       least_paths_other_nodes = check_count;
>                                       best_port_other_node = port_num;
> @@ -498,6 +562,50 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                               /* else prior sys and node guid already used */
>  
>                       }       /* same sys found */
> +
> +                     /* Store the new sys/node guid that we haven't seen yet 
> */
> +                     if (!p_remote_guid) {
> +                             p_rem_physp = osm_physp_get_remote(p_physp);
> +                             p_rem_node = 
> osm_physp_get_node_ptr(p_rem_physp);
> +                             
> memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
> +                                    &(p_rem_node->node_info.sys_guid),
> +                                    sizeof(uint64_t));
> +                             
> memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
> +                                    &(p_rem_node->node_info.node_guid),
> +                                    sizeof(uint64_t));
> +                             remote_guids[*p_num_remote_guids].forwarded_to 
> = 0;
> +                             p_remote_guid = 
> &remote_guids[*p_num_remote_guids];
> +                             (*p_num_remote_guids)++;
> +                     }
> +             }
> +
> +             if (port_offsetting) {
> +                     /* Keep track of the destinations we've seen so far */
> +                     p_remote_dest = NULL;
> +                     for (i = 0; i < num_remote_dests; i++) {
> +                             if (!memcmp(p_remote_guid,
> +                                         (&remote_dests[i])->p_remote_guid,
> +                                         sizeof(struct 
> osm_switch_remote_dest))) {

Should there be sizeof(osm_switch_guid_count_t)?

Sasha

> +                                     p_remote_dest = &remote_dests[i];
> +                                     break;
> +                             }
> +                     }
> +                     if (!p_remote_dest) {
> +                             p_remote_dest = &remote_dests[num_remote_dests];
> +                             p_remote_dest->p_remote_guid = p_remote_guid;
> +                             p_remote_dest->total_paths = 0;
> +                             p_remote_dest->min_paths = 0xFFFFFFFF;
> +                             p_remote_dest->ports_count = 0;
> +                             num_remote_dests++;
> +                     }
> +                     p_remote_dest->total_paths += check_count;
> +                     if (check_count < p_remote_dest->min_paths)
> +                             p_remote_dest->min_paths = check_count;
> +                     p_remote_dest->ports[p_remote_dest->ports_count] = 
> port_num;
> +                     p_remote_dest->ports_count++;
> +
> +                     total_paths_count += check_count;
> +                     num_potential_ports++;
>               }
>  
>               /* routing for LMC mode */
> @@ -523,21 +631,23 @@ osm_switch_recommend_path(IN const osm_switch_t * const 
> p_sw,
>                       best_port = port_num;
>                       least_paths = check_count;
>                       if (routing_for_lmc
> -                         && p_remote_guid
> +                         && p_remote_guid->forwarded_to
>                           && p_remote_guid->forwarded_to < least_forwarded_to)
>                               least_forwarded_to = 
> p_remote_guid->forwarded_to;
>               }
>               else if (routing_for_lmc
> -                      && p_remote_guid
>                        && check_count == least_paths
> +                      && p_remote_guid->forwarded_to
>                        && p_remote_guid->forwarded_to < least_forwarded_to) {
>                       least_forwarded_to = p_remote_guid->forwarded_to;
>                       best_port = port_num;
>               }
>       }
>  
> -     if (port_found == FALSE)
> -             return (OSM_NO_PATH);
> +     if (port_found == FALSE) {
> +             best_port = OSM_NO_PATH;
> +             goto Exit;
> +     }
>  
>       /*
>          if we are in enhanced routing mode and the best port is not
> @@ -555,24 +665,110 @@ osm_switch_recommend_path(IN const osm_switch_t * 
> const p_sw,
>                                                          remote_guids,
>                                                          p_num_remote_guids,
>                                                          best_port);
> +             /* Must be stored */
> +             CL_ASSERT(p_remote_guid);
> +             *p_remote_guid_count_used = p_remote_guid;
> +     }
>  
> -             if (!p_remote_guid) {
> -                     /* track the remote node and system of the port used. */
> -                     p_physp = osm_node_get_physp_ptr(p_sw->p_node, 
> best_port);
> -                     p_rem_physp = osm_physp_get_remote(p_physp);
> -                     p_rem_node = osm_physp_get_node_ptr(p_rem_physp);
> -                     memcpy(&(remote_guids[*p_num_remote_guids].sys_guid),
> -                            &(p_rem_node->node_info.sys_guid),
> -                            sizeof(uint64_t));
> -                     memcpy(&(remote_guids[*p_num_remote_guids].node_guid),
> -                                    &(p_rem_node->node_info.node_guid),
> -                            sizeof(uint64_t));
> -                     remote_guids[*p_num_remote_guids].forwarded_to = 0;
> -                     (*p_num_remote_guids)++;
> +     /*
> +      * As an example of what we're trying to do with port
> +      * offsetting, assume LMC = 2 and we are trying to route
> +      * the lids of 4 ports. The lids of these 4 ports are:
> +      *
> +      * (1,2,3,4)
> +      * (5,6,7,8)
> +      * (9,10,11,12)
> +      * (13,14,15,16)
> +      *
> +      * Suppose forwarding to all these lids goes through
> +      * 4 specific switch ports.  If we just cycle through
> +      * ports and lids in a normal iterative fashion, we would
> +      * normally forward out ports in this manner.
> +      *
> +      * switch port 1: 1, 5, 9, 13
> +      * switch port 2: 2, 6, 10, 14
> +      * switch port 3: 3, 7, 11, 15
> +      * switch port 4: 4, 8, 12, 1
> +      *
> +      * Note that the base lid of each port (lids 1, 5, 9, 13)
> +      * are all routed out of switch port 1.  Thus, if the user
> +      * only uses the base lid of each port, they will get pretty
> +      * bad performance.  We will try to get this layout instead.
> +      *
> +      * switch port 1: 1, 8, 11, 14
> +      * switch port 2: 2, 5, 12, 15
> +      * switch port 3: 3, 6, 9,  16
> +      * switch port 4: 4, 7, 10, 13
> +      *
> +      * where switch ports are distributed in a more even manner.
> +      * The base lid of each port is now distributed evenly
> +      * across all 4 switch ports.  The remaining lids are still
> +      * distributed evenly across all the remaining switch ports.
> +      *
> +      * In order to accomplish this, we (effectively) will iterate
> +      * through all ports like before, but instead of iterating from
> +      * 0 to N-1 all the time, we will select the starting index
> +      * based on the number of paths we have routed thus far.
> +      */
> +
> +     /* We will not do port offsetting if num_potential_ports == 1
> +      * b/c there is no offsetting that can be done.
> +      */
> +     if (port_offsetting
> +         && best_port
> +         && num_potential_ports > 1) {
> +             /* Select which destination we want to forward to with our
> +              * offsetting loop.
> +              */
> +             lcm = _lcm(port_offsetting_lids_per_port,
> +                        num_remote_dests);
> +             indx = (total_paths_count / lcm) % num_remote_dests;
> +             for (i = 0; i < num_remote_dests; i++) {
> +                     p_remote_dest = &remote_dests[indx];
> +                     p_remote_guid = p_remote_dest->p_remote_guid;
> +
> +                     if (p_remote_guid->forwarded_to == least_forwarded_to
> +                         && p_remote_dest->min_paths == least_paths) {
> +                             dest_found = TRUE;
> +                             break;
> +                     }
> +
> +                     indx++;
> +                     if (indx >= num_remote_dests)
> +                             indx = 0;
> +             }
> +
> +             /* Then we "offset" within the potential ports we could
> +              * forward out of for this specific destination.
> +              */
> +             if (dest_found && p_remote_dest) {
> +                     lcm = _lcm(port_offsetting_lids_per_port,
> +                                p_remote_dest->ports_count);
> +                     indx = (p_remote_dest->total_paths / lcm) % 
> p_remote_dest->ports_count;
> +                     for (i = 0; i < p_remote_dest->ports_count; i++) {
> +                             port_num = p_remote_dest->ports[indx];
> +                             check_count =
> +                                     
> osm_port_prof_path_count_get(&p_sw->p_prof[port_num]);
> +                             if (check_count == least_paths) {
> +                                     if (best_port != port_num)
> +                                             osm_log(p_sw->p_log, 
> OSM_LOG_DEBUG,
> +                                                     
> "osm_switch_recommend_path: "
> +                                                     "best port offsetted: 
> %d to %d\n",
> +                                                     best_port, port_num);
> +                                     best_port = port_num;
> +                                     *p_remote_guid_count_used = 
> p_remote_dest->p_remote_guid;
> +                                     break;
> +                             }
> +                             indx++;
> +                             if (indx >= p_remote_dest->ports_count)
> +                                     indx = 0;
> +                     }
>               }
> -             *p_remote_guid_count_used = p_remote_guid;
>       }
>  
> +Exit:
> +     if (remote_dests)
> +             free(remote_dests);
>       return (best_port);
>  }
>  
> diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
> index 938db84..501c2c7 100644
> --- a/opensm/opensm/osm_ucast_mgr.c
> +++ b/opensm/opensm/osm_ucast_mgr.c
> @@ -212,18 +212,23 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const 
> p_mgr,
>       osm_switch_guid_count_t *remote_guids = NULL;
>       uint16_t num_used_guids = 0;
>       osm_switch_guid_count_t *p_remote_guid_used = NULL;
> +     uint16_t port_offsetting_lids_per_port = 0;
>  
>       OSM_LOG_ENTER(p_mgr->p_log);
>  
>       if (lids_per_port > 1) {
> -             remote_guids = malloc(sizeof(osm_switch_guid_count_t) * 
> lids_per_port);
> +             uint8_t num_ports = p_sw->num_ports;
> +             remote_guids = malloc(sizeof(osm_switch_guid_count_t) * 
> num_ports);
>               if (remote_guids == NULL) {
>                       osm_log(p_mgr->p_log, OSM_LOG_ERROR,
>                               "__osm_ucast_mgr_process_port: ERR 3A09: "
>                               "Cannot allocate array. Insufficient memory\n");
>                       goto Exit;
>               }
> -             memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * 
> lids_per_port);
> +             memset(remote_guids, 0, sizeof(osm_switch_guid_count_t) * 
> num_ports);
> +
> +             if (p_mgr->p_subn->opt.port_offsetting)
> +                     port_offsetting_lids_per_port = lids_per_port;
>       }
>  
>       osm_port_get_lid_range_ho(p_port, &min_lid_ho, &max_lid_ho);
> @@ -270,14 +275,15 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const 
> p_mgr,
>                                                        p_mgr->is_dor,
>                                                        remote_guids,
>                                                        &num_used_guids,
> -                                                      &p_remote_guid_used);
> +                                                      &p_remote_guid_used,
> +                                                      
> port_offsetting_lids_per_port);
>               }
>               else
>                       port = osm_switch_recommend_path(p_sw, p_port, lid_ho,
>                                                        p_mgr->p_subn->
>                                                        ignore_existing_lfts,
>                                                        p_mgr->is_dor,
> -                                                      NULL, NULL, NULL);
> +                                                      NULL, NULL, NULL, 0);
>  
>               /*
>                  There might be no path to the target
> -- 
> 1.5.1
> 

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to