Hey Sasha,

Comments inlined.

On Sun, 2008-09-28 at 23:42 +0300, Sasha Khapyorsky wrote:
> From: Albert Chu <[EMAIL PROTECTED]>
> 
> Routing chaining is the ability to configure the order in which routing
> algorithms are applied in opensm, i.e.
> 
>       -R ftree,updn,minhop
> 
> Try using ftree routing. If ftree fails, try updn. If updn fails, try
> minhop.
> 
> In order to get this done, some rearchitecture of the routing code had
> to be done b/c there is no longer an assumption that only one routing
> engine can be specified.
> 
> Always setup a routing engine, assume no default "fallthrough" minhop
> routing engine.  On configured routing engine failure, do minhop as
> a last resort. Stick a *next pointer into struct osm_routing_engine.
> Rearchitect routing engine usage as a list instead of a single struct.
> 
> Signed-off-by: Sasha Khapyorsky <[EMAIL PROTECTED]>
> ---
>  opensm/include/opensm/osm_opensm.h    |   10 ++-
>  opensm/include/opensm/osm_subnet.h    |    7 +-
>  opensm/include/opensm/osm_ucast_mgr.h |    2 +-
>  opensm/man/opensm.8.in                |    8 ++-
>  opensm/opensm/main.c                  |   10 ++-
>  opensm/opensm/osm_opensm.c            |  121 
> +++++++++++++++++++++++----------
>  opensm/opensm/osm_subnet.c            |   11 ++-
>  opensm/opensm/osm_ucast_file.c        |   19 ++---
>  opensm/opensm/osm_ucast_ftree.c       |   35 ++++------
>  opensm/opensm/osm_ucast_lash.c        |   16 ++--
>  opensm/opensm/osm_ucast_mgr.c         |  119 +++++++++++++++++++++-----------
>  opensm/opensm/osm_ucast_updn.c        |   10 ++--
>  12 files changed, 226 insertions(+), 142 deletions(-)
> 
> diff --git a/opensm/include/opensm/osm_opensm.h 
> b/opensm/include/opensm/osm_opensm.h
> index 5d45724..c121be4 100644
> --- a/opensm/include/opensm/osm_opensm.h
> +++ b/opensm/include/opensm/osm_opensm.h
> @@ -126,6 +126,7 @@ struct osm_routing_engine {
>       int (*ucast_build_fwd_tables) (void *context);
>       void (*ucast_dump_tables) (void *context);
>       void (*delete) (void *context);
> +     struct osm_routing_engine *next;
>  };
>  /*
>  * FIELDS
> @@ -148,6 +149,9 @@ struct osm_routing_engine {
>  *    delete
>  *            The delete method, may be used for routing engine
>  *            internals cleanup.
> +*
> +*    next
> +*            Pointer to next routing engine in the list.
>  */
>  
>  /****s* OpenSM: OpenSM/osm_opensm_t
> @@ -178,7 +182,7 @@ typedef struct osm_opensm {
>       osm_log_t log;
>       cl_dispatcher_t disp;
>       cl_plock_t lock;
> -     struct osm_routing_engine routing_engine;
> +     struct osm_routing_engine *routing_engine_list;
>       osm_routing_engine_type_t routing_engine_used;
>       osm_stats_t stats;
>       osm_console_t console;
> @@ -221,8 +225,8 @@ typedef struct osm_opensm {
>  *    lock
>  *            Shared lock guarding most OpenSM structures.
>  *
> -*    routing_engine
> -*            Routing engine; will be initialized then used.
> +*    routing_engine_list
> +*            List of routing engines that should be tried for use.
>  *
>  *    routing_engine_used
>  *            Indicates which routing engine was used to route a subnet.
> diff --git a/opensm/include/opensm/osm_subnet.h 
> b/opensm/include/opensm/osm_subnet.h
> index f90f7ea..0c7f3b9 100644
> --- a/opensm/include/opensm/osm_subnet.h
> +++ b/opensm/include/opensm/osm_subnet.h
> @@ -182,7 +182,7 @@ typedef struct osm_subn_opt {
>       char *port_prof_ignore_file;
>       boolean_t port_profile_switch_nodes;
>       boolean_t sweep_on_trap;
> -     char *routing_engine_name;
> +     char *routing_engine_names;
>       boolean_t connect_roots;
>       char *lid_matrix_dump_file;
>       char *lfts_file;
> @@ -353,9 +353,8 @@ typedef struct osm_subn_opt {
>  *    sweep_on_trap
>  *            Received traps will initiate a new sweep.
>  *
> -*    routing_engine_name
> -*            Name of used routing engine
> -*            (other than default Min Hop Algorithm)
> +*    routing_engine_names
> +*            Name of routing engine(s) to use.
>  *
>  *    connect_roots
>  *            The option which will enforce root to root connectivity with
> diff --git a/opensm/include/opensm/osm_ucast_mgr.h 
> b/opensm/include/opensm/osm_ucast_mgr.h
> index 1dc9a37..59ba9fa 100644
> --- a/opensm/include/opensm/osm_ucast_mgr.h
> +++ b/opensm/include/opensm/osm_ucast_mgr.h
> @@ -264,7 +264,7 @@ osm_ucast_mgr_set_fwd_table(IN osm_ucast_mgr_t * const 
> p_mgr,
>  *
>  * SYNOPSIS
>  */
> -void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
> +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr);
>  /*
>  * PARAMETERS
>  *    p_mgr
> diff --git a/opensm/man/opensm.8.in b/opensm/man/opensm.8.in
> index 565c5f8..6790d11 100644
> --- a/opensm/man/opensm.8.in
> +++ b/opensm/man/opensm.8.in
> @@ -9,7 +9,7 @@ opensm \- InfiniBand subnet manager and administration (SM/SA)
>  [\-F | \-\-config <file_name>] [\-c(reate-config) <file_name>]
>  [\-g(uid) <GUID in hex>] [\-l(mc) <LMC>]
>  [\-p(riority) <PRIORITY>] [\-smkey <SM_Key>] [\-r(eassign_lids)]
> -[\-R <engine name> | \-\-routing_engine <engine name>]
> +[\-R <engine name(s)> | \-\-routing_engine <engine name(s)>]
>  [\-z | \-\-connect_roots]
>  [\-M <file name> | \-\-lid_matrix_file <file name>]
>  [\-U <file name> | \-\-lfts_file <file name>]
> @@ -116,8 +116,10 @@ Without -r, OpenSM attempts to preserve existing
>  LID assignments resolving multiple use of same LID.
>  .TP
>  \fB\-R\fR, \fB\-\-routing_engine\fR
> -This option chooses routing engine instead of Min Hop
> -algorithm (default).
> +This option chooses routing engine(s) to use instead of Min Hop
> +algorithm (default).  Multiple routing engines can be specified
> +separated by commas so that specific ordering of routing algorithms
> +will be tried if earlier routing engines fail.
>  Supported engines: minhop, updn, file, ftree, lash, dor
>  .TP
>  \fB\-z\fR, \fB\-\-connect_roots\fR
> diff --git a/opensm/opensm/main.c b/opensm/opensm/main.c
> index 01bfddf..2f53157 100644
> --- a/opensm/opensm/main.c
> +++ b/opensm/opensm/main.c
> @@ -177,8 +177,10 @@ static void show_usage(void)
>              "          LID assignments resolving multiple use of same 
> LID.\n\n");
>       printf("-R\n"
>              "--routing_engine <engine name>\n"
> -            "          This option chooses routing engine instead of Min 
> Hop\n"
> -            "          algorithm (default).\n"
> +            "          This option chooses routing engine(s) to use instead 
> of default\n"
> +            "          Min Hop algorithm.  Multiple routing engines can be 
> specified\n"
> +            "          separated by commas so that specific ordering of 
> routing\n"
> +            "          algorithms will be tried if earlier routing engines 
> fail.\n"
>              "          Supported engines: updn, file, ftree, lash, dor\n\n");
>       printf("-z\n"
>              "--connect_roots\n"
> @@ -851,8 +853,8 @@ int main(int argc, char *argv[])
>                       break;
>  
>               case 'R':
> -                     opt.routing_engine_name = optarg;
> -                     printf(" Activate \'%s\' routing engine\n", optarg);
> +                     opt.routing_engine_names = optarg;
> +                     printf(" Activate \'%s\' routing engine(s)\n", optarg);
>                       break;
>  
>               case 'z':
> diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
> index d17fed3..4970d0c 100644
> --- a/opensm/opensm/osm_opensm.c
> +++ b/opensm/opensm/osm_opensm.c
> @@ -61,24 +61,23 @@
>  
>  struct routing_engine_module {
>       const char *name;
> -     int (*setup) (osm_opensm_t * p_osm);
> +     int (*setup) (struct osm_routing_engine *, osm_opensm_t *);
>  };
>  
> -extern int osm_ucast_updn_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_file_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_ftree_setup(osm_opensm_t * p_osm);
> -extern int osm_ucast_lash_setup(osm_opensm_t * p_osm);
> -
> -static int osm_ucast_null_setup(osm_opensm_t * p_osm);
> +extern int osm_ucast_minhop_setup(struct osm_routing_engine *, osm_opensm_t 
> *);
> +extern int osm_ucast_updn_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_file_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_ftree_setup(struct osm_routing_engine *, osm_opensm_t 
> *);
> +extern int osm_ucast_lash_setup(struct osm_routing_engine *, osm_opensm_t *);
> +extern int osm_ucast_dor_setup(struct osm_routing_engine *, osm_opensm_t *);
>  
>  const static struct routing_engine_module routing_modules[] = {
> -     {"null", osm_ucast_null_setup},

Not sure how much legacy opensm.opts files are out there, but I kept the
"null" routing engine in there just for safety.  Is it ok to remove?

> -     {"minhop", osm_ucast_null_setup},
> +     {"minhop", osm_ucast_minhop_setup},
>       {"updn", osm_ucast_updn_setup},
>       {"file", osm_ucast_file_setup},
>       {"ftree", osm_ucast_ftree_setup},
>       {"lash", osm_ucast_lash_setup},
> -     {"dor", osm_ucast_null_setup},
> +     {"dor", osm_ucast_dor_setup},
>       {NULL, NULL}
>  };
>  
> @@ -135,33 +134,77 @@ osm_routing_engine_type_t osm_routing_engine_type(IN 
> const char *str)
>  
>  /**********************************************************************
>   **********************************************************************/
> -static int setup_routing_engine(osm_opensm_t * p_osm, const char *name)
> +static void append_routing_engine(osm_opensm_t *osm,
> +                               struct osm_routing_engine *routing_engine)
>  {
> -     const struct routing_engine_module *r;
> +     struct osm_routing_engine *r;
> +
> +     routing_engine->next = NULL;
> +
> +     if (!osm->routing_engine_list) {
> +             osm->routing_engine_list = routing_engine;
> +             return;
> +     }
> +
> +     r = osm->routing_engine_list;
> +     while (r->next)
> +             r = r->next;
>  
> -     for (r = routing_modules; r->name && *r->name; r++) {
> -             if (!strcmp(r->name, name)) {
> -                     p_osm->routing_engine.name = r->name;
> -                     if (r->setup(p_osm)) {
> -                             OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> +     r->next = routing_engine;
> +}
> +
> +static void setup_routing_engine(osm_opensm_t *osm, const char *name)
> +{
> +     struct osm_routing_engine *re;
> +     const struct routing_engine_module *m;
> +
> +     for (m = routing_modules; m->name && *m->name; m++) {
> +             if (!strcmp(m->name, name)) {
> +                     re = malloc(sizeof(struct osm_routing_engine));
> +                     if (!re) {
> +                             OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
> +                                     "memory allocation failed\n");
> +                             return;
> +                     }
> +                     memset(re, 0, sizeof(struct osm_routing_engine));
> +
> +                     re->name = m->name;
> +                     if (m->setup(re, osm)) {
> +                             OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
>                                       "setup of routing"
>                                       " engine \'%s\' failed\n", name);
> -                             return -2;
> +                             return;
>                       }
> -                     OSM_LOG(&p_osm->log, OSM_LOG_DEBUG,
> -                             "\'%s\' routing engine set up\n",
> -                             p_osm->routing_engine.name);
> -                     return 0;
> +                     OSM_LOG(&osm->log, OSM_LOG_DEBUG,
> +                             "\'%s\' routing engine set up\n", re->name);
> +                     append_routing_engine(osm, re);
> +                     return;
>               }
>       }
> -     return -1;
> +
> +     OSM_LOG(&osm->log, OSM_LOG_ERROR,
> +             "cannot find or setup routing engine \'%s\'", name);
>  }
>  
> -static int osm_ucast_null_setup(osm_opensm_t * p_osm)
> +static void setup_routing_engines(osm_opensm_t *osm, const char 
> *engine_names)
>  {
> -     OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> -             "nothing yet - using default (minhop) routing engine\n");
> -     return 0;
> +     char *name, *str, *p;
> +
> +     if (!engine_names || !*engine_names) {
> +             setup_routing_engine(osm, "minhop");
> +             return;
> +     }
> +
> +     str = strdup(engine_names);
> +     name = strtok_r(str, ", \t\n", &p);
> +     while (name && *name) {
> +             setup_routing_engine(osm, name);
> +             name = strtok_r(NULL, ", \t\n", &p);
> +     }
> +     free(str);
> +
> +     if (!osm->routing_engine_list)
> +             setup_routing_engine(osm, "minhop");
>  }
>  
>  /**********************************************************************
> @@ -181,6 +224,20 @@ void osm_opensm_construct(IN osm_opensm_t * const p_osm)
>  
>  /**********************************************************************
>   **********************************************************************/
> +static void destroy_routing_engines(osm_opensm_t *osm)
> +{
> +     struct osm_routing_engine *r, *next;
> +
> +     next = osm->routing_engine_list;
> +     while (next) {
> +             r = next;
> +             next = r->next;
> +             if (r->delete)
> +                     r->delete(r->context);
> +             free(r);
> +     }
> +}
> +
>  void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
>  {
>       /* in case of shutdown through exit proc - no ^C */
> @@ -218,8 +275,7 @@ void osm_opensm_destroy(IN osm_opensm_t * const p_osm)
>       osm_sa_db_file_dump(p_osm);
>  
>       /* do the destruction in reverse order as init */
> -     if (p_osm->routing_engine.delete)
> -             p_osm->routing_engine.delete(p_osm->routing_engine.context);
> +     destroy_routing_engines(p_osm);
>       osm_sa_destroy(&p_osm->sa);
>       osm_sm_destroy(&p_osm->sm);
>  #ifdef ENABLE_OSM_PERF_MGR
> @@ -371,12 +427,7 @@ osm_opensm_init(IN osm_opensm_t * const p_osm,
>               goto Exit;
>  #endif                               /* ENABLE_OSM_PERF_MGR */
>  
> -     if (p_opt->routing_engine_name &&
> -         setup_routing_engine(p_osm, p_opt->routing_engine_name))
> -             OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
> -                     "cannot find or setup routing engine"
> -                     " \'%s\'. Default will be used instead\n",
> -                     p_opt->routing_engine_name);
> +     setup_routing_engines(p_osm, p_opt->routing_engine_names);
>  
>       p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
>  
> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
> index 278aa3d..a39ce75 100644
> --- a/opensm/opensm/osm_subnet.c
> +++ b/opensm/opensm/osm_subnet.c
> @@ -442,7 +442,7 @@ void osm_subn_set_default_opt(IN osm_subn_opt_t * const 
> p_opt)
>       p_opt->port_prof_ignore_file = NULL;
>       p_opt->port_profile_switch_nodes = FALSE;
>       p_opt->sweep_on_trap = TRUE;
> -     p_opt->routing_engine_name = NULL;
> +     p_opt->routing_engine_names = NULL;
>       p_opt->connect_roots = FALSE;
>       p_opt->lid_matrix_dump_file = NULL;
>       p_opt->lfts_file = NULL;
> @@ -1264,7 +1264,7 @@ int osm_subn_parse_conf_file(char *file_name, 
> osm_subn_opt_t * const p_opts)
>                                   p_key, p_val, &p_opts->sweep_on_trap);
>  
>               opts_unpack_charp("routing_engine",
> -                               p_key, p_val, &p_opts->routing_engine_name);
> +                               p_key, p_val, &p_opts->routing_engine_names);
>  
>               opts_unpack_boolean("connect_roots",
>                                   p_key, p_val, &p_opts->connect_roots);
> @@ -1521,9 +1521,12 @@ int osm_subn_write_conf_file(char *file_name, IN 
> osm_subn_opt_t *const p_opts)
>  
>       fprintf(opts_file,
>               "# Routing engine\n"
> +             "# Multiple routing engines can be specified separated by\n"
> +             "# commas so that specific ordering of routing algorithms 
> will\n"
> +             "# be tried if earlier routing engines fail.\n"
>               "# Supported engines: minhop, updn, file, ftree, lash, dor\n"
> -             "routing_engine %s\n\n", p_opts->routing_engine_name ?
> -             p_opts->routing_engine_name : null_str);
> +             "routing_engine %s\n\n", p_opts->routing_engine_names ?
> +             p_opts->routing_engine_names : null_str);
>  
>       fprintf(opts_file,
>               "# Connect roots (use FALSE if unsure)\n"
> diff --git a/opensm/opensm/osm_ucast_file.c b/opensm/opensm/osm_ucast_file.c
> index 3d00cb2..cbd65c1 100644
> --- a/opensm/opensm/osm_ucast_file.c
> +++ b/opensm/opensm/osm_ucast_file.c
> @@ -135,14 +135,13 @@ static int do_ucast_file_load(void *context)
>               OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
>                       "LFTs file name is not given; "
>                       "using default routing algorithm\n");
> -             return -1;
> +             return 1;
>       }
>  
>       file = fopen(file_name, "r");
>       if (!file) {
>               OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6302: "
> -                     "cannot open ucast dump file \'%s\'; "
> -                     "using default routing algorithm\n", file_name);
> +                     "cannot open ucast dump file \'%s\': %m\n", file_name);
>               return -1;
>       }
>  
> @@ -270,15 +269,13 @@ static int do_lid_matrix_file_load(void *context)
>               OSM_LOG(&p_osm->log, OSM_LOG_VERBOSE,
>                       "lid matrix file name is not given; "
>                       "using default lid matrix generation algorithm\n");
> -             return -1;
> +             return 1;
>       }
>  
>       file = fopen(file_name, "r");
>       if (!file) {
>               OSM_LOG(&p_osm->log, OSM_LOG_ERROR | OSM_LOG_SYS, "ERR 6305: "
> -                     "cannot open lid matrix file \'%s\'; "
> -                     "using default lid matrix generation algorithm\n",
> -                     file_name);
> +                     "cannot open lid matrix file \'%s\': %m\n", file_name);
>               return -1;
>       }
>  
> @@ -389,10 +386,10 @@ static int do_lid_matrix_file_load(void *context)
>       return 0;
>  }
>  
> -int osm_ucast_file_setup(osm_opensm_t * p_osm)
> +int osm_ucast_file_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
>  {
> -     p_osm->routing_engine.context = (void *)p_osm;
> -     p_osm->routing_engine.build_lid_matrices = do_lid_matrix_file_load;
> -     p_osm->routing_engine.ucast_build_fwd_tables = do_ucast_file_load;
> +     r->context = osm;
> +     r->build_lid_matrices = do_lid_matrix_file_load;
> +     r->ucast_build_fwd_tables = do_ucast_file_load;
>       return 0;
>  }
> diff --git a/opensm/opensm/osm_ucast_ftree.c b/opensm/opensm/osm_ucast_ftree.c
> index 1d3233c..15168b7 100644
> --- a/opensm/opensm/osm_ucast_ftree.c
> +++ b/opensm/opensm/osm_ucast_ftree.c
> @@ -3552,8 +3552,7 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>       OSM_LOG(&p_ftree->p_osm->log, OSM_LOG_VERBOSE, "Ranking FatTree\n");
>       if (__osm_ftree_fabric_rank(p_ftree) != 0) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Failed ranking the tree  - "
> -                     "fat-tree routing falls back to default routing\n");
> +                     "Failed ranking the tree\n");
>               status = -1;
>               goto Exit;
>       }
> @@ -3567,14 +3566,12 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>               "Populating CA & switch ports\n");
>       if (__osm_ftree_fabric_populate_ports(p_ftree) != 0) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric topology is not a fat-tree - "
> -                     "routing falls back to default routing\n");
> +                     "Fabric topology is not a fat-tree\n");
>               status = -1;
>               goto Exit;
>       } else if (p_ftree->cn_num == 0) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric has no valid compute nodes - "
> -                     "routing falls back to default routing\n");
> +                     "Fabric has no valid compute nodes\n");
>               status = -1;
>               goto Exit;
>       }
> @@ -3586,8 +3583,7 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>       if (__osm_ftree_fabric_get_rank(p_ftree) > FAT_TREE_MAX_RANK ||
>           __osm_ftree_fabric_get_rank(p_ftree) < FAT_TREE_MIN_RANK) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric rank is %u (should be between %u and %u) - "
> -                     "fat-tree routing falls back to default routing\n",
> +                     "Fabric rank is %u (should be between %u and %u)\n",
>                       __osm_ftree_fabric_get_rank(p_ftree), FAT_TREE_MIN_RANK,
>                       FAT_TREE_MAX_RANK);
>               status = -1;
> @@ -3600,8 +3596,7 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>          validation - it checks that all the CNs are at the same rank. */
>       if (__osm_ftree_fabric_mark_leaf_switches(p_ftree)) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric topology is not a fat-tree - "
> -                     "routing falls back to default routing\n");
> +                     "Fabric topology is not a fat-tree\n");
>               status = -1;
>               goto Exit;
>       }
> @@ -3619,8 +3614,7 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>          In any case, the first and the last switches in the array are REAL 
> leafs. */
>       if (__osm_ftree_fabric_create_leaf_switch_array(p_ftree)) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric topology is not a fat-tree - "
> -                     "routing falls back to default routing\n");
> +                     "Fabric topology is not a fat-tree\n");
>               status = -1;
>               goto Exit;
>       }
> @@ -3640,8 +3634,7 @@ static int __osm_ftree_construct_fabric(IN void 
> *context)
>       if (!__osm_ftree_fabric_roots_provided(p_ftree) &&
>           !__osm_ftree_fabric_validate_topology(p_ftree)) {
>               osm_log(&p_ftree->p_osm->log, OSM_LOG_SYS,
> -                     "Fabric topology is not a fat-tree - "
> -                     "routing falls back to default routing\n");
> +                     "Fabric topology is not a fat-tree\n");
>               status = -1;
>               goto Exit;
>       }
> @@ -3726,7 +3719,7 @@ static void __osm_ftree_delete(IN void *context)
>  /***************************************************
>   ***************************************************/
>  
> -int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
> +int osm_ucast_ftree_setup(struct osm_routing_engine *r, osm_opensm_t * p_osm)
>  {
>       ftree_fabric_t *p_ftree = __osm_ftree_fabric_create();
>       if (!p_ftree)
> @@ -3734,12 +3727,10 @@ int osm_ucast_ftree_setup(osm_opensm_t * p_osm)
>  
>       p_ftree->p_osm = p_osm;
>  
> -     p_osm->routing_engine.context = (void *)p_ftree;
> -     p_osm->routing_engine.build_lid_matrices = __osm_ftree_construct_fabric;
> -     p_osm->routing_engine.ucast_build_fwd_tables = __osm_ftree_do_routing;
> -     p_osm->routing_engine.delete = __osm_ftree_delete;
> +     r->context = (void *)p_ftree;
> +     r->build_lid_matrices = __osm_ftree_construct_fabric;
> +     r->ucast_build_fwd_tables = __osm_ftree_do_routing;
> +     r->delete = __osm_ftree_delete;
> +
>       return 0;
>  }
> -
> -/***************************************************
> - ***************************************************/
> diff --git a/opensm/opensm/osm_ucast_lash.c b/opensm/opensm/osm_ucast_lash.c
> index b985e9a..ce3982f 100644
> --- a/opensm/opensm/osm_ucast_lash.c
> +++ b/opensm/opensm/osm_ucast_lash.c
> @@ -785,7 +785,7 @@ static int init_lash_structures(lash_t * p_lash)
>       unsigned vl_min = p_lash->vl_min;
>       unsigned num_switches = p_lash->num_switches;
>       osm_log_t *p_log = &p_lash->p_osm->log;
> -     int status = IB_SUCCESS;
> +     int status = 0;
>       unsigned int i, j, k;
>  
>       OSM_LOG_ENTER(p_log);
> @@ -852,7 +852,7 @@ static int init_lash_structures(lash_t * p_lash)
>       goto Exit;
>  
>  Exit_Mem_Error:
> -     status = IB_ERROR;
> +     status = -1;
>       OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D01: "
>               "Could not allocate required memory for LASH errno %d, errno %d 
> for lack of memory\n",
>               errno, ENOMEM);
> @@ -875,7 +875,7 @@ static int lash_core(lash_t * p_lash)
>       int stop = 0, output_link, i_next_switch;
>       int output_link2, i_next_switch2;
>       int cycle_found2 = 0;
> -     int status = IB_SUCCESS;
> +     int status = 0;
>       int *switch_bitmap = NULL;      /* Bitmap to check if we have processed 
> this pair */
>  
>       OSM_LOG_ENTER(p_log);
> @@ -1028,7 +1028,7 @@ static int lash_core(lash_t * p_lash)
>       goto Exit;
>  
>  Error_Not_Enough_Lanes:
> -     status = IB_ERROR;
> +     status = -1;
>       OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 4D02: "
>               "Lane requirements (%d) exceed available lanes (%d)\n",
>               p_lash->vl_min, lanes_needed);
> @@ -1360,15 +1360,15 @@ uint8_t osm_get_lash_sl(osm_opensm_t * p_osm, 
> osm_port_t * p_src_port,
>       return (uint8_t) ((switch_t *) p_sw->priv)->routing_table[dst_id].lane;
>  }
>  
> -int osm_ucast_lash_setup(osm_opensm_t * p_osm)
> +int osm_ucast_lash_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
>  {
>       lash_t *p_lash = lash_create(p_osm);
>       if (!p_lash)
>               return -1;
>  
> -     p_osm->routing_engine.context = p_lash;
> -     p_osm->routing_engine.ucast_build_fwd_tables = lash_process;
> -     p_osm->routing_engine.delete = lash_delete;
> +     r->context = p_lash;
> +     r->ucast_build_fwd_tables = lash_process;
> +     r->delete = lash_delete;
>  
>       return 0;
>  }
> diff --git a/opensm/opensm/osm_ucast_mgr.c b/opensm/opensm/osm_ucast_mgr.c
> index 9d0ad13..935846c 100644
> --- a/opensm/opensm/osm_ucast_mgr.c
> +++ b/opensm/opensm/osm_ucast_mgr.c
> @@ -216,7 +216,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const 
> p_mgr,
>       uint8_t port;
>       boolean_t is_ignored_by_port_prof;
>       ib_net64_t node_guid;
> -     struct osm_routing_engine *p_routing_eng;
>       unsigned start_from = 1;
>  
>       OSM_LOG_ENTER(p_mgr->p_log);
> @@ -253,8 +252,6 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const 
> p_mgr,
>  
>       node_guid = osm_node_get_node_guid(p_sw->p_node);
>  
> -     p_routing_eng = &p_mgr->p_subn->p_osm->routing_engine;
> -
>       /*
>          The lid matrix contains the number of hops to each
>          lid from each port.  From this information we determine
> @@ -269,18 +266,9 @@ __osm_ucast_mgr_process_port(IN osm_ucast_mgr_t * const 
> p_mgr,
>               /* do not try to overwrite the ppro of non existing port ... */
>               is_ignored_by_port_prof = TRUE;
>  
> -             /* Up/Down routing can cause unreachable routes between some
> -                switches so we do not report that as an error in that case */
> -             if (!p_routing_eng->build_lid_matrices) {
> -                     OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A08: "
> -                             "No path to get to LID %u from switch 0x%"
> -                             PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
> -                     /* trigger a new sweep - try again ... */
> -                     p_mgr->p_subn->subnet_initialization_error = TRUE;
> -             } else
> -                     OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
> -                             "No path to get to LID %u from switch 0x%"
> -                             PRIx64 "\n", lid_ho, cl_ntoh64(node_guid));
> +             OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
> +                     "No path to get to LID %u from switch 0x%" PRIx64 "\n",
> +                     lid_ho, cl_ntoh64(node_guid));
>       } else {
>               osm_physp_t *p = osm_node_get_physp_ptr(p_sw->p_node, port);
>  
> @@ -583,7 +571,7 @@ __osm_ucast_mgr_process_neighbors(IN cl_map_item_t * 
> const p_map_item,
>  
>  /**********************************************************************
>   **********************************************************************/
> -void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
> +int osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t * const p_mgr)
>  {
>       uint32_t i;
>       uint32_t iteration_max;
> @@ -646,6 +634,8 @@ void osm_ucast_mgr_build_lid_matrices(IN osm_ucast_mgr_t 
> * const p_mgr)
>               OSM_LOG(p_mgr->p_log, OSM_LOG_DEBUG,
>                       "Min-hop propagated in %d steps\n", i);
>       }
> +
> +     return 0;
>  }
>  
>  /**********************************************************************
> @@ -752,7 +742,7 @@ static void clear_prof_ignore_flag(cl_map_item_t * const 
> p_map_item, void *ctx)
>       }
>  }
>  
> -static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
> +static int ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
>  {
>       cl_qlist_init(&p_mgr->port_order_list);
>  
> @@ -786,27 +776,56 @@ static void ucast_mgr_build_lfts(osm_ucast_mgr_t *p_mgr)
>                          __osm_ucast_mgr_process_tbl, p_mgr);
>  
>       cl_qlist_remove_all(&p_mgr->port_order_list);
> +
> +     return 0;
>  }
>  
>  /**********************************************************************
>   **********************************************************************/
> +static int ucast_mgr_route(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> +     int ret;
> +
> +     OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
> +             "building routing with \'%s\' routing algorithm...\n", r->name);
> +
> +     if (!r->build_lid_matrices ||
> +         (ret = r->build_lid_matrices(r->context)) > 0)
> +             ret = osm_ucast_mgr_build_lid_matrices(&osm->sm.ucast_mgr);
> +
> +     if (ret < 0) {
> +             OSM_LOG(&osm->log, OSM_LOG_ERROR,
> +                     "%s: cannot build lid matrices.\n", r->name);
> +             return ret;
> +     }
> +
> +     if (!r->ucast_build_fwd_tables ||
> +         (ret = r->ucast_build_fwd_tables(r->context)) > 0)
> +             ret = ucast_mgr_build_lfts(&osm->sm.ucast_mgr);
> +
> +     if (ret < 0) {
> +             OSM_LOG(&osm->log, OSM_LOG_ERROR,
> +                     "%s: cannot build fwd tables.\n", r->name);
> +             return ret;
> +     }
> +
> +     osm->routing_engine_used = osm_routing_engine_type(r->name);
> +
> +     return 0;
> +}
> +
>  osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * const p_mgr)
>  {
>       osm_opensm_t *p_osm;
>       struct osm_routing_engine *p_routing_eng;
>       osm_signal_t signal = OSM_SIGNAL_DONE;
>       cl_qmap_t *p_sw_guid_tbl;
> -     int blm = 0;
> -     int ubft = 0;
>  
>       OSM_LOG_ENTER(p_mgr->p_log);
>  
>       p_sw_guid_tbl = &p_mgr->p_subn->sw_guid_tbl;
>       p_osm = p_mgr->p_subn->p_osm;
> -     p_routing_eng = &p_osm->routing_engine;
> -
> -     p_mgr->is_dor = p_routing_eng->name
> -         && (strcmp(p_routing_eng->name, "dor") == 0);
> +     p_routing_eng = p_osm->routing_engine_list;
>  
>       CL_PLOCK_EXCL_ACQUIRE(p_mgr->p_lock);
>  
> @@ -819,28 +838,19 @@ osm_signal_t osm_ucast_mgr_process(IN osm_ucast_mgr_t * 
> const p_mgr)
>  
>       p_mgr->any_change = FALSE;
>  
> -     if (!p_routing_eng->build_lid_matrices ||
> -         (blm = p_routing_eng->build_lid_matrices(p_routing_eng->context)))
> -             osm_ucast_mgr_build_lid_matrices(p_mgr);
> +     p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
> +     while (p_routing_eng) {
> +             if (!ucast_mgr_route(p_routing_eng, p_osm))
> +                     break;
> +             p_routing_eng = p_routing_eng->next;
> +     }
>  
> -     /*
> -        Now that the lid matrices have been built, we can
> -        build and download the switch forwarding tables.
> -      */
> -     if (!p_routing_eng->ucast_build_fwd_tables ||
> -         (ubft =
> -          p_routing_eng->ucast_build_fwd_tables(p_routing_eng->context)))
> +     if (p_osm->routing_engine_used == OSM_ROUTING_ENGINE_TYPE_NONE) {
> +             /* If configured routing algorithm failed, use default MinHop */
> +             osm_ucast_mgr_build_lid_matrices(p_mgr);
>               ucast_mgr_build_lfts(p_mgr);
> -
> -     /* 'file' routing engine has one unique logic corner case */
> -     if (p_routing_eng->name && (strcmp(p_routing_eng->name, "file") == 0)
> -         && (!blm || !ubft))
> -             p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_FILE;
> -     else if (!blm && !ubft)
> -             p_osm->routing_engine_used =
> -                 osm_routing_engine_type(p_routing_eng->name);
> -     else
>               p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_MINHOP;
> +     }
>  
>       OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
>               "%s tables configured on all switches\n",
> @@ -861,3 +871,28 @@ Exit:
>       OSM_LOG_EXIT(p_mgr->p_log);
>       return (signal);
>  }
> +
> +static int ucast_build_lid_matrices(void *context)
> +{
> +     return osm_ucast_mgr_build_lid_matrices(context);
> +}
> +
> +static int ucast_build_lfts(void *context)
> +{
> +     return ucast_mgr_build_lfts(context);
> +}
> +
> +int osm_ucast_minhop_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> +     r->context = &osm->sm.ucast_mgr;
> +     r->build_lid_matrices = ucast_build_lid_matrices;
> +     r->ucast_build_fwd_tables = ucast_build_lfts;
> +     return 0;
> +}
> +
> +int osm_ucast_dor_setup(struct osm_routing_engine *r, osm_opensm_t *osm)
> +{
> +     osm_ucast_minhop_setup(r, osm);
> +     osm->sm.ucast_mgr.is_dor = 1;

If dor is listed in the routing chain, all other algorithms that may
fall-through into minhop's build_lfts callback (minhop, updn, file),
will be affected by the is_dor flag.  Is this intended?

If we don't want to abstract it for this round, perhaps we could stick
the "is_dor" flag set/unset into ucast_mgr_route() so that is_dor is set
only when dor is being routed.

> +     return 0;
> +}
> diff --git a/opensm/opensm/osm_ucast_updn.c b/opensm/opensm/osm_ucast_updn.c
> index 90e9af8..4fdcc78 100644
> --- a/opensm/opensm/osm_ucast_updn.c
> +++ b/opensm/opensm/osm_ucast_updn.c
> @@ -643,7 +643,7 @@ static int __osm_updn_call(void *ctx)
>       } else {
>               OSM_LOG(&p_updn->p_osm->log, OSM_LOG_INFO,
>                       "disabling UPDN algorithm, no root nodes were found\n");
> -             ret = 1;
> +             ret = -1;
>       }
>  
>       if (osm_log_is_active(&p_updn->p_osm->log, OSM_LOG_ROUTING))
> @@ -669,7 +669,7 @@ static void __osm_updn_delete(void *context)
>       free(context);
>  }
>  
> -int osm_ucast_updn_setup(osm_opensm_t * p_osm)
> +int osm_ucast_updn_setup(struct osm_routing_engine *r, osm_opensm_t *p_osm)
>  {
>       updn_t *p_updn;
>  
> @@ -680,9 +680,9 @@ int osm_ucast_updn_setup(osm_opensm_t * p_osm)
>  
>       p_updn->p_osm = p_osm;
>  
> -     p_osm->routing_engine.context = p_updn;
> -     p_osm->routing_engine.delete = __osm_updn_delete;
> -     p_osm->routing_engine.build_lid_matrices = __osm_updn_call;
> +     r->context = p_updn;
> +     r->delete = __osm_updn_delete;
> +     r->build_lid_matrices = __osm_updn_call;
>  
>       return 0;
>  }

The patch looks fine as whole.

Thanks,
Al

-- 
Albert Chu
[EMAIL PROTECTED]
Computer Scientist
High Performance Systems Division
Lawrence Livermore National Laboratory

_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to