Slava Strebkov wrote:
> HI!
> That was my misunderstanding - upon heavy sweep SM will try to load
> routing engines as defined in the SM.conf file (ftree & updn - in that
> order).
> So ftree will be loaded when switch comes back from reboot.
> 
> Slava
> 
> -----Original Message-----
> From: linux-rdma-ow...@vger.kernel.org
> [mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Yevgeny Kliteynik
> Sent: Monday, December 07, 2009 10:22 AM
> To: Slava Strebkov
> Cc: linux-rdma@vger.kernel.org
> Subject: Re: [PATCH v3] opensm: support routing engine update
> 
> Slava,
> 
> Slava Strebkov wrote:
>> Hi Yevgeny,
>> In that case SM will use updn and will not come back to ftree
>> automatically.
> 
> I think that this is a bad thing.
> 
> I wouldn't want *temporary* change of fabric to cause
> *permanent* change of SM mode of operation. Such changes
> do happen, and I'd prefer SM to continue functioning
> in accordance to the user's configuration once the
> fabric is settled again.
> 
> I do see the cases were the change that you propose is
> beneficial - if fabric topology doesn't fits the chosen
> routing, SM will waste time on retrying the wrong routing
> at every heavy sweep, but this happens due to suboptimal
> SM configuration and not as a result of some event that
> user has no control of.

For every heavy sweep the SM will try to configure the routing engines as 
specified in the conf file.
So when a switch goes up and ftree configuration is valid, the SM will 
configure ftree instead of updn.

Eli

> 
> -- Yevgeny
> 
> 
>  
>> Slava
>>
>> -----Original Message-----
>> From: linux-rdma-ow...@vger.kernel.org
>> [mailto:linux-rdma-ow...@vger.kernel.org] On Behalf Of Yevgeny
> Kliteynik
>> Sent: Sunday, December 06, 2009 6:03 PM
>> To: Slava Strebkov
>> Cc: linux-rdma@vger.kernel.org
>> Subject: Re: [PATCH v3] opensm: support routing engine update
>>
>> Slava,
>>
>> Slava Strebkov wrote:
>>> setup routing engine when in use and delete when failed.
>>> setup routing engine before use.
>>> delete resources when routing algorithm fails.
>>> this will save allocation for routing algorithms that are not used.
>> Suppose a user runs SM with ftree & updn routings (in that order),
>> and SM manages to route the fabric with ftree. At some point 
>> some switch reboots and causes ftree to fail and SM routes the
>> fabric with updn.
>> Does this mean that ftree will be removed from the list, and
>> when the switch comes back, SM won't try ftree any more?
>>
>> -- Yevgeny
>>  
>>> Signed-off-by: Slava Strebkov <sla...@voltaire.com>
>>> ---
>>>  opensm/include/opensm/osm_opensm.h |    5 +++
>>>  opensm/opensm/osm_opensm.c         |   57
>> +++++++++++++++++++++++++++++++-----
>>>  opensm/opensm/osm_subnet.c         |    7 ++++-
>>>  opensm/opensm/osm_ucast_mgr.c      |   28 +++++++++++++++++
>>>  4 files changed, 88 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/opensm/include/opensm/osm_opensm.h
>> b/opensm/include/opensm/osm_opensm.h
>>> index c121be4..ca0fddb 100644
>>> --- a/opensm/include/opensm/osm_opensm.h
>>> +++ b/opensm/include/opensm/osm_opensm.h
>>> @@ -109,6 +109,7 @@ typedef enum _osm_routing_engine_type {
>>>  } osm_routing_engine_type_t;
>>>  /***********/
>>>  
>>> +struct osm_opensm;
>>>  /****s* OpenSM: OpenSM/osm_routing_engine
>>>  * NAME
>>>  *  struct osm_routing_engine
>>> @@ -122,6 +123,8 @@ typedef enum _osm_routing_engine_type {
>>>  struct osm_routing_engine {
>>>     const char *name;
>>>     void *context;
>>> +   int initialized;
>>> +   int (*setup) (struct osm_routing_engine *re, struct osm_opensm
>> *p_osm);
>>>     int (*build_lid_matrices) (void *context);
>>>     int (*ucast_build_fwd_tables) (void *context);
>>>     void (*ucast_dump_tables) (void *context);
>>> @@ -183,6 +186,7 @@ typedef struct osm_opensm {
>>>     cl_dispatcher_t disp;
>>>     cl_plock_t lock;
>>>     struct osm_routing_engine *routing_engine_list;
>>> +   struct osm_routing_engine *last_routing_engine;
>>>     osm_routing_engine_type_t routing_engine_used;
>>>     osm_stats_t stats;
>>>     osm_console_t console;
>>> @@ -522,6 +526,7 @@ extern volatile unsigned int osm_exit_flag;
>>>  * DESCRIPTION
>>>  *  Set to one to cause all threads to leave
>>>  *********/
>>> +void osm_update_routing_engines(osm_opensm_t *osm, const char
>> *engine_names);
>>>  
>>>  END_C_DECLS
>>>  #endif                             /* _OSM_OPENSM_H_ */
>>> diff --git a/opensm/opensm/osm_opensm.c b/opensm/opensm/osm_opensm.c
>>> index 50d1349..f90584d 100644
>>> --- a/opensm/opensm/osm_opensm.c
>>> +++ b/opensm/opensm/osm_opensm.c
>>> @@ -169,14 +169,7 @@ static void setup_routing_engine(osm_opensm_t
>> *osm, const char *name)
>>>                     memset(re, 0, sizeof(struct
>> osm_routing_engine));
>>>  
>>>                     re->name = m->name;
>>> -                   if (m->setup(re, osm)) {
>>> -                           OSM_LOG(&osm->log, OSM_LOG_VERBOSE,
>>> -                                   "setup of routing"
>>> -                                   " engine \'%s\' failed\n",
>> name);
>>> -                           return;
>>> -                   }
>>> -                   OSM_LOG(&osm->log, OSM_LOG_DEBUG,
>>> -                           "\'%s\' routing engine set up\n",
>> re->name);
>>> +                   re->setup = m->setup;
>>>                     append_routing_engine(osm, re);
>>>                     return;
>>>             }
>>> @@ -236,6 +229,54 @@ static void destroy_routing_engines(osm_opensm_t
>> *osm)
>>>                     r->delete(r->context);
>>>             free(r);
>>>     }
>>> +    osm->routing_engine_list = NULL;
>>> +}
>>> +
>>> +static void update_routing_engine(
>>> +       struct osm_routing_engine *cur,
>>> +       struct osm_routing_engine *last)
>>> +{
>>> +   struct osm_routing_engine *next = cur->next;
>>> +   if (!last)
>>> +           return; /* no last routing engine */
>>> +   memcpy(cur, last, sizeof(*cur));
>>> +   /* restore next */
>>> +   cur->next = next;
>>> +}
>>> +
>>> +void osm_update_routing_engines(osm_opensm_t *osm, const char
>> *engine_names)
>>> +{
>>> +   struct osm_routing_engine *r, *l;
>>> +   /* find used routing engine and save as last */
>>> +   l = r = osm->routing_engine_list;
>>> +   if (r && osm->routing_engine_used ==
>> osm_routing_engine_type(r->name)) {
>>> +           osm->last_routing_engine = r;
>>> +           osm->routing_engine_list = r->next;
>>> +   }
>>> +   else while ((r = r->next)) {
>>> +           if (osm->routing_engine_used ==
>>> +                   osm_routing_engine_type(r->name)) {
>>> +                           osm->last_routing_engine = r;
>>> +                           l->next = r->next;
>>> +                           break;
>>> +           }
>>> +           l = r;
>>> +   }
>>> +   /* cleanup prev routing engine list and replace with current
>> list */
>>> +   destroy_routing_engines(osm);
>>> +   setup_routing_engines(osm, engine_names);
>>> +   /* check if last routing engine exist in new list and update
>> callbacks */
>>> +   r = osm->routing_engine_list;
>>> +   while (r) {
>>> +           if (osm->routing_engine_used ==
>>> +                   osm_routing_engine_type(r->name)) {
>>> +                           update_routing_engine(r,
>> osm->last_routing_engine);
>>> +                           free(osm->last_routing_engine);
>>> +                           osm->last_routing_engine = NULL;
>>> +                           break;
>>> +           }
>>> +   r = r->next;
>>> +   }
>>>  }
>>>  
>>>
> /**********************************************************************
>>> diff --git a/opensm/opensm/osm_subnet.c b/opensm/opensm/osm_subnet.c
>>> index 8d63a75..742ae64 100644
>>> --- a/opensm/opensm/osm_subnet.c
>>> +++ b/opensm/opensm/osm_subnet.c
>>> @@ -152,6 +152,11 @@ static void opts_setup_sm_priority(osm_subn_t
>> *p_subn, void *p_val)
>>>     osm_set_sm_priority(p_sm, sm_priority);
>>>  }
>>>  
>>> +static void opts_setup_routing_engine(osm_subn_t *p_subn, void
>> *p_val)
>>> +{
>>> +   osm_update_routing_engines(p_subn->p_osm, p_val);
>>> +}
>>> +
>>>  static void opts_parse_net64(IN osm_subn_t *p_subn, IN char *p_key,
>>>                          IN char *p_val_str, void *p_v1, void *p_v2,
>>>                          void (*pfn)(osm_subn_t *, void *))
>>> @@ -324,7 +329,7 @@ static const opt_rec_t opt_tbl[] = {
>>>     { "hop_weights_file", OPT_OFFSET(hop_weights_file),
>> opts_parse_charp, NULL, 0 },
>>>     { "port_profile_switch_nodes",
>> OPT_OFFSET(port_profile_switch_nodes), opts_parse_boolean, NULL, 1 },
>>>     { "sweep_on_trap", OPT_OFFSET(sweep_on_trap),
>> opts_parse_boolean, NULL, 1 },
>>> -   { "routing_engine", OPT_OFFSET(routing_engine_names),
>> opts_parse_charp, NULL, 0 },
>>> +   { "routing_engine", OPT_OFFSET(routing_engine_names),
>> opts_parse_charp, opts_setup_routing_engine, 1 },
>>>     { "connect_roots", OPT_OFFSET(connect_roots),
>> opts_parse_boolean, NULL, 1 },
>>>     { "use_ucast_cache", OPT_OFFSET(use_ucast_cache),
>> opts_parse_boolean, NULL, 1 },
>>>     { "log_file", OPT_OFFSET(log_file), opts_parse_charp, NULL, 0 },
>>> diff --git a/opensm/opensm/osm_ucast_mgr.c
>> b/opensm/opensm/osm_ucast_mgr.c
>>> index 39d825c..d6294ac 100644
>>> --- a/opensm/opensm/osm_ucast_mgr.c
>>> +++ b/opensm/opensm/osm_ucast_mgr.c
>>> @@ -998,8 +998,23 @@ int osm_ucast_mgr_process(IN osm_ucast_mgr_t *
>> p_mgr)
>>>  
>>>     p_osm->routing_engine_used = OSM_ROUTING_ENGINE_TYPE_NONE;
>>>     while (p_routing_eng) {
>>> +           if (!p_routing_eng->initialized &&
>>> +                   p_routing_eng->setup(p_routing_eng, p_osm)) {
>>> +                   OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR,
>>> +                           "ERR 3A0F: setup of routing engine
>> \'%s\' failed\n",
>>> +                                   p_routing_eng->name);
>>> +                                   p_routing_eng =
>> p_routing_eng->next;
>>> +                                   continue;
>>> +           }
>>> +           OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
>>> +                   "\'%s\' routing engine set up\n",
>> p_routing_eng->name);
>>> +           p_routing_eng->initialized = 1;
>>>             if (!ucast_mgr_route(p_routing_eng, p_osm))
>>>                     break;
>>> +           /* delete unused routing engine */
>>> +           if (p_routing_eng->delete)
>>> +                   p_routing_eng->delete(p_routing_eng->context);
>>> +           p_routing_eng->initialized = 0;
>>>             p_routing_eng = p_routing_eng->next;
>>>     }
>>>  
>>> @@ -1011,6 +1026,19 @@ int osm_ucast_mgr_process(IN osm_ucast_mgr_t *
>> p_mgr)
>>>             p_osm->routing_engine_used =
>> OSM_ROUTING_ENGINE_TYPE_MINHOP;
>>>     }
>>>  
>>> +   /* if for some reason different routing engine is used */
>>> +   /* cleanup last unused routing engine */
>>> +   p_routing_eng = p_osm->last_routing_engine;
>>> +   if (p_routing_eng) {
>>> +                   if (p_routing_eng->initialized &&
>>> +                                   p_routing_eng->delete &&
>>> +                                   p_osm->routing_engine_used !=
>>> +
>> osm_routing_engine_type(p_routing_eng->name))
>>> +
>> p_routing_eng->delete(p_routing_eng->context);
>>> +                   free(p_routing_eng);
>>> +                   p_osm->last_routing_engine = NULL;
>>> +   }
>>> +
>>>     OSM_LOG(p_mgr->p_log, OSM_LOG_INFO,
>>>             "%s tables configured on all switches\n",
>>>
>> osm_routing_engine_type_str(p_osm->routing_engine_used));
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-rdma"
> in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>>
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to