Hi Yevgeny,

Sorry about huge delay with looking at this.

On 12:59 Sun 04 May     , Yevgeny Kliteynik wrote:
> Unicast routing cache implementation.
> 
> Unicast routing cache comprises the following:
>  - Topology: a data structure with all the switches and CAs of the fabric
>  - LFTs: each switch has an LFT cached
>  - Lid matrices: each switch has lid matrices cached, which is needed for
>    multicast routing (which is not cached).
> 
> There is also a topology matching function that compares the current topology
> with the cached one to find out whether the cache is usable (valid) or not.

As I wrote in another email, I believe that saving needless full-reroutig
cycle is a good idea and very needed for OpenSM, but I don't like this
implementation.

In order to understand it better I was need to pass over code, some
comments may be useful. It is below.

Sasha

> Signed-off-by: Yevgeny Kliteynik <[EMAIL PROTECTED]>
> ---
>  opensm/include/opensm/osm_ucast_cache.h |  319 ++++++++
>  opensm/opensm/osm_ucast_cache.c         | 1197 
> +++++++++++++++++++++++++++++++
>  2 files changed, 1516 insertions(+), 0 deletions(-)
>  create mode 100644 opensm/include/opensm/osm_ucast_cache.h
>  create mode 100644 opensm/opensm/osm_ucast_cache.c
> 
> diff --git a/opensm/include/opensm/osm_ucast_cache.h 
> b/opensm/include/opensm/osm_ucast_cache.h
> new file mode 100644
> index 0000000..a3b40f9
> --- /dev/null
> +++ b/opensm/include/opensm/osm_ucast_cache.h
> @@ -0,0 +1,319 @@
> +/*
> + * Copyright (c) 2002-2008 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + *   Declaration of osm_ucast_cache_t.
> + *   This object represents the Unicast Cache object.
> + *
> + * Environment:
> + *   Linux User Mode
> + *
> + * $Revision: 1.4 $
> + */
> +
> +#ifndef _OSM_UCAST_CACHE_H_
> +#define _OSM_UCAST_CACHE_H_
> +
> +#ifdef __cplusplus
> +#  define BEGIN_C_DECLS extern "C" {
> +#  define END_C_DECLS   }
> +#else                                /* !__cplusplus */
> +#  define BEGIN_C_DECLS
> +#  define END_C_DECLS
> +#endif                               /* __cplusplus */
> +
> +BEGIN_C_DECLS
> +
> +struct _osm_ucast_mgr;
> +
> +#define UCAST_CACHE_TOPOLOGY_MATCH                   0x0000
> +#define UCAST_CACHE_TOPOLOGY_LESS_SWITCHES           0x0001
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING 0x0002
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING      0x0004
> +#define UCAST_CACHE_TOPOLOGY_MORE_SWITCHES           0x0008
> +#define UCAST_CACHE_TOPOLOGY_NEW_LID                 0x0010
> +#define UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING      0x0020
> +#define UCAST_CACHE_TOPOLOGY_LINK_ADDED              0x0040
> +#define UCAST_CACHE_TOPOLOGY_NEW_SWITCH              0x0080
> +#define UCAST_CACHE_TOPOLOGY_NEW_CA                  0x0100
> +#define UCAST_CACHE_TOPOLOGY_NO_MATCH                0x0200
> +
> +/****h* OpenSM/Unicast Manager/Unicast Cache
> +* NAME
> +*    Unicast Cache
> +*
> +* DESCRIPTION
> +*    The Unicast Cache object encapsulates the information
> +*    needed to cache and write unicast routing of the subnet.
> +*
> +*    The Unicast Cache object is NOT thread safe.
> +*
> +*    This object should be treated as opaque and should be
> +*    manipulated only through the provided functions.
> +*
> +* AUTHOR
> +*    Yevgeny Kliteynik, Mellanox
> +*
> +*********/
> +
> +
> +/****s* OpenSM: Unicast Cache/osm_ucast_cache_t
> +* NAME
> +*    osm_ucast_cache_t
> +*
> +* DESCRIPTION
> +*    Unicast Cache structure.
> +*
> +*    This object should be treated as opaque and should
> +*    be manipulated only through the provided functions.
> +*
> +* SYNOPSIS
> +*/
> +typedef struct osm_ucast_cache_t_ {
> +     struct _osm_ucast_mgr * p_ucast_mgr;
> +     cl_qmap_t sw_tbl;
> +     cl_qmap_t ca_tbl;
> +     boolean_t topology_valid;
> +     boolean_t routing_valid;
> +     boolean_t need_update;
> +} osm_ucast_cache_t;
> +/*
> +* FIELDS
> +*    p_ucast_mgr
> +*            Pointer to the Unicast Manager for this subnet.
> +*
> +*    sw_tbl
> +*            Cached switches table.
> +*
> +*    ca_tbl
> +*            Cached CAs table.
> +*
> +*    topology_valid
> +*            TRUE if the cache is populated with the fabric topology.
> +*
> +*    routing_valid
> +*            TRUE if the cache is populated with the unicast routing
> +*            in addition to the topology.
> +*
> +*    need_update
> +*            TRUE if the cached routing needs to be updated.
> +*
> +* SEE ALSO
> +*    Unicast Manager object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_construct
> +* NAME
> +*    osm_ucast_cache_construct
> +*
> +* DESCRIPTION
> +*    This function constructs a Unicast Cache object.
> +*
> +* SYNOPSIS
> +*/
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(struct _osm_ucast_mgr * const p_mgr);
> +/*
> +* PARAMETERS
> +*    p_mgr
> +*            [in] Pointer to a Unicast Manager object.
> +*
> +* RETURN VALUE
> +*    This function return the created Ucast Cache object on success,
> +*    or NULL on any error.
> +*
> +* NOTES
> +*    Allows osm_ucast_cache_destroy
> +*
> +*    Calling osm_ucast_mgr_construct is a prerequisite to
> +*    calling any other method.
> +*
> +* SEE ALSO
> +*    Unicast Cache object, osm_ucast_cache_destroy
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_destroy
> +* NAME
> +*    osm_ucast_cache_destroy
> +*
> +* DESCRIPTION
> +*    The osm_ucast_cache_destroy function destroys the object,
> +*    releasing all resources.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*    p_cache
> +*            [in] Pointer to the object to destroy.
> +*
> +* RETURN VALUE
> +*    This function does not return any value.
> +*
> +* NOTES
> +*    Performs any necessary cleanup of the specified
> +*    Unicast Cache object.
> +*    Further operations should not be attempted on the
> +*    destroyed object.
> +*    This function should only be called after a call to
> +*    osm_ucast_cache_construct.
> +*
> +* SEE ALSO
> +*    Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_topo
> +* NAME
> +*    osm_ucast_cache_refresh_topo
> +*
> +* DESCRIPTION
> +*    The osm_ucast_cache_refresh_topo function re-reads the
> +*    updated topology.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*    p_cache
> +*            [in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +*    This function does not return any value.
> +*
> +* NOTES
> +*    This function invalidates the existing unicast cache
> +*    and re-reads the updated topology.
> +*
> +* SEE ALSO
> +*    Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_refresh_lid_matrices
> +* NAME
> +*    osm_ucast_cache_refresh_lid_matrices
> +*
> +* DESCRIPTION
> +*    The osm_ucast_cache_refresh_topo function re-reads the
> +*    updated lid matrices.
> +*
> +* SYNOPSIS
> +*/
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*    p_cache
> +*            [in] Pointer to the cache object to refresh.
> +*
> +* RETURN VALUE
> +*    This function does not return any value.
> +*
> +* NOTES
> +*    This function re-reads the updated lid matrices.
> +*
> +* SEE ALSO
> +*    Unicast Cache object, osm_ucast_cache_construct
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_apply
> +* NAME
> +*    osm_ucast_cache_apply
> +*
> +* DESCRIPTION
> +*    The osm_ucast_cache_apply function tries to apply
> +*    the cached unicast routing on the subnet switches.
> +*
> +* SYNOPSIS
> +*/
> +int osm_ucast_cache_apply(osm_ucast_cache_t * p_cache);
> +/*
> +* PARAMETERS
> +*    p_cache
> +*            [in] Pointer to the cache object to be used.
> +*
> +* RETURN VALUE
> +*    0 if unicast cache was successfully written to switches,
> +*    non-zero for any error.
> +*
> +* NOTES
> +*    Compares the current topology to the cached topology,
> +*    and if the topology matches, or if changes in topology
> +*    have no impact on routing tables, writes the cached
> +*    unicast routing to the subnet switches.
> +*
> +* SEE ALSO
> +*    Unicast Cache object
> +*********/
> +
> +/****f* OpenSM: Unicast Cache/osm_ucast_cache_set_sw_fwd_table
> +* NAME
> +*    osm_ucast_cache_set_sw_fwd_table
> +*
> +* DESCRIPTION
> +*    The osm_ucast_cache_set_sw_fwd_table function sets
> +*    (caches) linear forwarding table for the specified
> +*    switch.
> +*
> +* SYNOPSIS
> +*/
> +void
> +osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> +                              uint8_t * ucast_mgr_lft_buf,
> +                              osm_switch_t * p_osm_sw);
> +/*
> +* PARAMETERS
> +*    p_cache
> +*            [in] Pointer to the cache object to be used.
> +*
> +*    ucast_mgr_lft_buf
> +*            [in] LFT to set.
> +*
> +*    p_osm_sw
> +*            [in] pointer to the switch that the LFT refers to.
> +*
> +* RETURN VALUE
> +*    This function does not return any value.
> +*
> +* NOTES
> +*
> +* SEE ALSO
> +*    Unicast Cache object
> +*********/
> +
> +END_C_DECLS
> +#endif                               /* _OSM_UCAST_MGR_H_ */
> +
> diff --git a/opensm/opensm/osm_ucast_cache.c b/opensm/opensm/osm_ucast_cache.c
> new file mode 100644
> index 0000000..4ad7c30
> --- /dev/null
> +++ b/opensm/opensm/osm_ucast_cache.c
> @@ -0,0 +1,1197 @@
> +/*
> + * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
> + * Copyright (c) 2002-2008 Mellanox Technologies LTD. All rights reserved.
> + * Copyright (c) 1996-2003 Intel Corporation. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + *
> + */
> +
> +/*
> + * Abstract:
> + *    Implementation of OpenSM Cached routing
> + *
> + * Environment:
> + *    Linux User Mode
> + *
> + */
> +
> +#if HAVE_CONFIG_H
> +#  include <config.h>
> +#endif
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <ctype.h>
> +#include <errno.h>
> +#include <iba/ib_types.h>
> +#include <complib/cl_qmap.h>
> +#include <complib/cl_pool.h>
> +#include <complib/cl_debug.h>
> +#include <opensm/osm_opensm.h>
> +#include <opensm/osm_ucast_mgr.h>
> +#include <opensm/osm_ucast_cache.h>
> +#include <opensm/osm_switch.h>
> +#include <opensm/osm_node.h>
> +#include <opensm/osm_port.h>
> +
> +struct cache_sw_t_;
> +struct cache_ca_t_;
> +struct cache_port_t_;
> +
> +typedef union cache_sw_or_ca_ {
> +     struct cache_sw_t_ * p_sw;
> +     struct cache_ca_t_ * p_ca;
> +} cache_node_t;
> +
> +typedef struct cache_port_t_ {
> +     uint8_t remote_node_type;
> +     cache_node_t remote_node;
> +} cache_port_t;
> +
> +typedef struct cache_ca_t_ {
> +     cl_map_item_t map_item;
> +     uint16_t lid_ho;
> +} cache_ca_t;
> +
> +typedef struct cache_sw_t_ {
> +     cl_map_item_t map_item;
> +     uint16_t lid_ho;
> +     uint16_t max_lid_ho;
> +     osm_switch_t *p_osm_sw; /* pointer to the updated switch object */
> +     uint8_t num_ports;
> +     cache_port_t ** ports;
> +     uint8_t **lid_matrix;
> +        uint8_t * lft_buff;
> +        boolean_t is_leaf;

Please use tabs for indentation.

> +} cache_sw_t;
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static osm_switch_t *
> +__ucast_cache_get_starting_osm_sw(osm_ucast_cache_t * p_cache)
> +{
> +     osm_port_t * p_osm_port;
> +     osm_node_t * p_osm_node;
> +     osm_physp_t * p_osm_physp;
> +
> +     CL_ASSERT(p_cache->p_ucast_mgr);
> +
> +     /* find the OSM node */
> +     p_osm_port = osm_get_port_by_guid(
> +                     p_cache->p_ucast_mgr->p_subn,
> +                     p_cache->p_ucast_mgr->p_subn->sm_port_guid);
> +     CL_ASSERT(p_osm_port);
> +
> +     p_osm_node = p_osm_port->p_node;
> +     switch (osm_node_get_type(p_osm_node)) {
> +             case IB_NODE_TYPE_SWITCH:
> +                     /* OpenSM runs on switch - we're done */
> +                     return p_osm_node->sw;
> +
> +             case IB_NODE_TYPE_CA:
> +                     /* SM runs on CA - get the switch
> +                        that CA is connected to. */
> +                     p_osm_physp = p_osm_port->p_physp;
> +                     p_osm_physp = osm_physp_get_remote(p_osm_physp);
> +                     p_osm_node = osm_physp_get_node_ptr(p_osm_physp);
> +                     CL_ASSERT(p_osm_node);
> +                     return p_osm_node->sw;
> +
> +             default:
> +                     /* SM runs on some other node - not supported */
> +                     return NULL;
> +     }
> +} /* __ucast_cache_get_starting_osm_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_get_sw(osm_ucast_cache_t * p_cache,
> +                  uint16_t lid_ho)
> +{
> +     cache_sw_t * p_sw;
> +
> +     p_sw = (cache_sw_t *) cl_qmap_get(&p_cache->sw_tbl, lid_ho);
> +     if (p_sw == (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl))
> +             return NULL;
> +
> +     return p_sw;
> +} /* __ucast_cache_get_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_get_ca(osm_ucast_cache_t * p_cache,
> +                  uint16_t lid_ho)
> +{
> +     cache_ca_t * p_ca;
> +
> +     p_ca = (cache_ca_t *) cl_qmap_get(&p_cache->ca_tbl, lid_ho);
> +     if (p_ca == (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl))
> +             return NULL;
> +
> +     return p_ca;
> +} /* __ucast_cache_get_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_port_t *
> +__ucast_cache_add_port(osm_ucast_cache_t * p_cache,
> +                    uint8_t remote_node_type,
> +                    uint16_t lid_ho)
> +{
> +     cache_port_t * p_port = (cache_port_t *) malloc(sizeof(cache_port_t));
> +     memset(p_port, 0, sizeof(cache_port_t));
> +
> +     p_port->remote_node_type = remote_node_type;
> +     if (remote_node_type == IB_NODE_TYPE_SWITCH)
> +     {
> +             cache_sw_t * p_sw = __ucast_cache_get_sw(
> +                                     p_cache, lid_ho);
> +             CL_ASSERT(p_sw);
> +             p_port->remote_node.p_sw = p_sw;
> +     }
> +     else {
> +             cache_ca_t * p_ca = __ucast_cache_get_ca(
> +                                     p_cache, lid_ho);
> +             CL_ASSERT(p_ca);
> +             p_port->remote_node.p_ca = p_ca;
> +     }
> +
> +     return p_port;
> +} /* __ucast_cache_add_port() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_sw_t *
> +__ucast_cache_add_sw(osm_ucast_cache_t * p_cache,
> +                  osm_switch_t * p_osm_sw)
> +{
> +     cache_sw_t *p_sw = (cache_sw_t*)malloc(sizeof(cache_sw_t));
> +     memset(p_sw, 0, sizeof(cache_sw_t));
> +
> +     p_sw->p_osm_sw = p_osm_sw;
> +
> +     p_sw->lid_ho =
> +             cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> +
> +     p_sw->num_ports = osm_node_get_num_physp(p_osm_sw->p_node);
> +     p_sw->ports = (cache_port_t **)
> +             malloc(p_sw->num_ports * sizeof(cache_port_t *));
> +     memset(p_sw->ports, 0, p_sw->num_ports * sizeof(cache_port_t *));
> +
> +     cl_qmap_insert(&p_cache->sw_tbl, p_sw->lid_ho, &p_sw->map_item);
> +     return p_sw;
> +} /* __ucast_cache_add_sw() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static cache_ca_t *
> +__ucast_cache_add_ca(osm_ucast_cache_t * p_cache,
> +                  uint16_t lid_ho)
> +{
> +     cache_ca_t *p_ca = (cache_ca_t*)malloc(sizeof(cache_ca_t));
> +     memset(p_ca, 0, sizeof(cache_ca_t));
> +
> +     p_ca->lid_ho = lid_ho;
> +
> +     cl_qmap_insert(&p_cache->ca_tbl, p_ca->lid_ho, &p_ca->map_item);
> +     return p_ca;
> +} /* __ucast_cache_add_ca() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_port_destroy(cache_port_t * p_port)
> +{
> +     if (!p_port)
> +             return;
> +     free(p_port);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_sw_destroy(cache_sw_t * p_sw)
> +{
> +     int i;
> +
> +     if (!p_sw)
> +             return;
> +
> +     if (p_sw->ports) {
> +             for (i = 0; i < p_sw->num_ports; i++)
> +                     if (p_sw->ports[i])
> +                             __cache_port_destroy(p_sw->ports[i]);
> +             free(p_sw->ports);
> +     }
> +
> +     if (p_sw->lid_matrix) {
> +             for (i = 0; i <= p_sw->max_lid_ho; i++)
> +                     if (p_sw->lid_matrix[i])
> +                             free(p_sw->lid_matrix[i]);
> +             free(p_sw->lid_matrix);
> +     }
> +
> +     if (p_sw->lft_buff)
> +             free(p_sw->lft_buff);
> +
> +     free(p_sw);
> +} /* __cache_sw_destroy() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__cache_ca_destroy(cache_ca_t * p_ca)
> +{
> +     if (!p_ca)
> +             return;
> +     free(p_ca);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_populate(osm_ucast_cache_t * p_cache)
> +{
> +     cl_list_t sw_bfs_list;

cl_list, cl_map, etc. (w/out 'q') are slow. It is really better to use
cl_q* version.

> +     osm_switch_t * p_osm_sw;
> +     osm_switch_t * p_remote_osm_sw;

Seems that those variables (and maybe others) are never used together.
Use just one is so.

> +     osm_node_t   * p_osm_node;
> +     osm_node_t   * p_remote_osm_node;
> +     osm_physp_t  * p_osm_physp;
> +     osm_physp_t  * p_remote_osm_physp;
> +     cache_sw_t   * p_sw;
> +     cache_sw_t   * p_remote_sw;
> +     cache_ca_t   * p_remote_ca;
> +     uint16_t remote_lid_ho;
> +     unsigned num_ports;
> +     unsigned i;
> +     int res = 0;
> +     osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> +     OSM_LOG_ENTER(p_log);
> +
> +     cl_list_init(&sw_bfs_list, 10);
> +
> +     /* Use management switch or switch that is connected
> +        to management CA as a BFS scan starting point */
> +
> +     p_osm_sw = __ucast_cache_get_starting_osm_sw(p_cache);
> +     if (!p_osm_sw) {
> +             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A51: "
> +                     "failed getting cache population starting point\n");
> +             res = 1;
> +             goto Exit;
> +     }
> +
> +     /* switch is cached BEFORE entering to the BFS list,
> +        so we will know whether this switch was "visited" */
> +
> +     p_sw = __ucast_cache_add_sw(p_cache, p_osm_sw);
> +     cl_list_insert_tail(&sw_bfs_list, p_sw);
> +
> +     /* Create cached switches in the BFS order.
> +        This will ensure that the fabric scan is done each
> +        time the same way and will allow accurate matching
> +        between the current fabric and the cached one. */

Why BFSing is needed there? Would not it be simpler to run over
p_subn->sw_guid_tbl?

> +     while (!cl_is_list_empty(&sw_bfs_list)) {
> +             p_sw = (cache_sw_t *) cl_list_remove_head(&sw_bfs_list);
> +             p_osm_sw = p_sw->p_osm_sw;
> +             p_osm_node = p_osm_sw->p_node;
> +             num_ports = osm_node_get_num_physp(p_osm_node);
> +
> +             /* skipping port 0 on switches */
> +             for (i = 1; i < num_ports; i++) {
> +                     p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> +                     if (!p_osm_physp ||
> +                         !osm_physp_is_valid(p_osm_physp) ||
> +                         !osm_link_is_healthy(p_osm_physp))
> +                             continue;
> +
> +                     p_remote_osm_physp = osm_physp_get_remote(p_osm_physp);
> +                     if (!p_remote_osm_physp ||
> +                         !osm_physp_is_valid(p_remote_osm_physp) ||
> +                         !osm_link_is_healthy(p_remote_osm_physp))
> +                             continue;
> +
> +                     p_remote_osm_node =
> +                             osm_physp_get_node_ptr(p_remote_osm_physp);
> +                     if (!p_remote_osm_node) {
> +                             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A52: "
> +                                     "no node for remote port\n");
> +                             res = 1;
> +                             goto Exit;
> +                     }
> +
> +                     if (osm_node_get_type(p_remote_osm_node) ==
> +                         IB_NODE_TYPE_SWITCH) {
> +
> +                             remote_lid_ho = cl_ntoh16(
> +                                     osm_node_get_base_lid(
> +                                             p_remote_osm_node, 0));
> +
> +                             p_remote_osm_sw = p_remote_osm_node->sw;
> +                             CL_ASSERT(p_remote_osm_sw);
> +
> +                             p_remote_sw = __ucast_cache_get_sw(
> +                                     p_cache,
> +                                     remote_lid_ho);
> +
> +                             /* If the remote switch hasn't been
> +                                cached yet, add it to the cache
> +                                and insert it into the BFS list */
> +
> +                             if (!p_remote_sw) {
> +                                     p_remote_sw = __ucast_cache_add_sw(
> +                                             p_cache,
> +                                             p_remote_osm_sw);
> +                                     cl_list_insert_tail(&sw_bfs_list,
> +                                                 p_remote_sw);
> +                             }
> +                     }
> +                     else {

opensm/osn_indent will suggest about '} else {' style.

> +                             remote_lid_ho = cl_ntoh16(
> +                                     osm_physp_get_base_lid(
> +                                             p_remote_osm_physp));
> +
> +                             p_sw->is_leaf = TRUE;
> +                             p_remote_ca = __ucast_cache_add_ca(
> +                                     p_cache, remote_lid_ho);
> +
> +                             /* no need to add this node to BFS list */
> +                     }
> +
> +                     /* cache this port */
> +                     p_sw->ports[i] = __ucast_cache_add_port(
> +                             p_cache,
> +                             osm_node_get_type(p_remote_osm_node),
> +                             remote_lid_ho);
> +             }
> +     }
> +
> +        cl_list_destroy(&sw_bfs_list);

Tabs...

> +     p_cache->topology_valid = TRUE;
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "cache populated (%u SWs, %u CAs)\n",
> +             cl_qmap_count(&p_cache->sw_tbl),
> +             cl_qmap_count(&p_cache->ca_tbl));
> +
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +     return res;
> +} /* __ucast_cache_populate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_sw_lid_matrix(cl_map_item_t * const p_map_item,
> +                              void *context)
> +{
> +     cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +     uint16_t target_lid_ho;
> +     uint8_t port_num;
> +
> +     if (!p_sw->p_osm_sw)
> +             return;
> +
> +     /* allocate lid matrices buffer:
> +        lid_matrix[target_lids][port_nums] */
> +        CL_ASSERT(!p_sw->lid_matrix);
> +     p_sw->lid_matrix = (uint8_t **)
> +             malloc((p_sw->max_lid_ho + 1) * sizeof(uint8_t*));
> +
> +     for (target_lid_ho = 0;
> +          target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> +
> +             /* set hops for this target through every switch port */
> +
> +             p_sw->lid_matrix[target_lid_ho] =
> +                     (uint8_t *)malloc(p_sw->num_ports);
> +             memset(p_sw->lid_matrix[target_lid_ho],
> +                    OSM_NO_PATH, p_sw->num_ports);
> +
> +             for (port_num = 1; port_num < p_sw->num_ports; port_num++)
> +                     p_sw->lid_matrix[target_lid_ho][port_num] =
> +                             osm_switch_get_hop_count(p_sw->p_osm_sw,
> +                                                      target_lid_ho,
> +                                                      port_num);

Original switches keep lid matrices for switches only, and not for CAs,
it was done to sleep LID matrix generation and to save a lot of memory.

> +     }
> +} /* __ucast_cache_read_sw_lid_matrix() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_sw_routing(cl_map_item_t * const p_map_item,
> +                            void * context)
> +{
> +     cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +     osm_ucast_cache_t * p_cache = (osm_ucast_cache_t *) context;
> +     uint8_t *ucast_mgr_lft_buf = p_cache->p_ucast_mgr->lft_buf;
> +     uint16_t target_lid_ho;
> +     uint8_t port_num;
> +     uint8_t hops;
> +     osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +
> +     OSM_LOG_ENTER(p_log);
> +
> +     if (!p_sw->p_osm_sw) {
> +             /* some switches (leaf switches) may exist in the
> +                cache, but not exist in the current topology */
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "cached switch 0x%04x doesn't exist in the fabric\n",
> +                     p_sw->lid_ho);

Now we are using decimal format for unicast LIDs representation.

Also what about to use OSM_LOG_DEBUG for debug purposes? This file has 30
OSM_LOG_VERBOSE message, osm_ucast_mgr.c - only 3.

> +             goto Exit;
> +     }
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "writing routing for cached switch 0x%04x, "
> +             "max_lid_ho = 0x%04x\n",
> +             p_sw->lid_ho, p_sw->max_lid_ho);
> +
> +     /* write cached LFT to this switch: clear existing
> +        ucast mgr lft buffer, write the cached lft to the
> +        ucast mgr buffer, and set this lft on switch */
> +     CL_ASSERT(p_sw->lft_buff);
> +     memset(ucast_mgr_lft_buf, OSM_NO_PATH, IB_LID_UCAST_END_HO + 1);

Why memset()? Isn't it enough to memcpy() and max_lid_ho setup which you
do below anyway.

> +     if (p_sw->max_lid_ho > 0)
> +             memcpy(ucast_mgr_lft_buf, p_sw->lft_buff,
> +                    p_sw->max_lid_ho + 1);
> +
> +     p_sw->p_osm_sw->max_lid_ho = p_sw->max_lid_ho;
> +     osm_ucast_mgr_set_fwd_table(p_cache->p_ucast_mgr,p_sw->p_osm_sw);
> +
> +     /* write cached lid matrix to this switch */
> +
> +     osm_switch_prepare_path_rebuild(p_sw->p_osm_sw, p_sw->max_lid_ho);
> +
> +     /* set hops to itself */
> +     osm_switch_set_hops(p_sw->p_osm_sw,p_sw->lid_ho,0,0);
> +
> +     for (target_lid_ho = 0;
> +          target_lid_ho <= p_sw->max_lid_ho; target_lid_ho++){
> +             /* port 0 on switches lid matrices is used
> +                for storing minimal hops to the target
> +                lid, so we iterate from port 1 */
> +             for (port_num = 1; port_num < p_sw->num_ports; port_num++) {
> +                     hops = p_sw->lid_matrix[target_lid_ho][port_num];
> +                     if (hops != OSM_NO_PATH)
> +                             osm_switch_set_hops(p_sw->p_osm_sw,
> +                                 target_lid_ho, port_num, hops);
> +             }

As above - switches need lid matrices only for switch nodes.

> +     }
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +} /* __ucast_cache_write_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_sw_routing(cl_map_item_t * const p_map_item,
> +                            void *context)
> +{
> +     cache_sw_t *p_sw = (cache_sw_t * const)p_map_item;
> +     unsigned lid;
> +
> +     if(p_sw->lft_buff) {
> +             free(p_sw->lft_buff);
> +             p_sw->lft_buff = NULL;
> +     }
> +
> +     if(p_sw->lid_matrix) {
> +             for (lid = 0; lid < p_sw->max_lid_ho; lid++)
> +                     if (p_sw->lid_matrix[lid])
> +                             free(p_sw->lid_matrix[lid]);
> +             free(p_sw->lid_matrix);
> +             p_sw->lid_matrix = NULL;
> +     }
> +
> +     p_sw->max_lid_ho = 0;
> +} /* __ucast_cache_clear_sw_routing() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_clear_routing(osm_ucast_cache_t * p_cache)
> +{
> +     cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_clear_sw_routing,
> +                        (void *)p_cache);
> +     p_cache->routing_valid = FALSE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_invalidate(osm_ucast_cache_t * p_cache)
> +{
> +     cache_sw_t * p_sw;
> +     cache_sw_t * p_next_sw;
> +     cache_ca_t * p_ca;
> +     cache_ca_t * p_next_ca;
> +
> +     p_next_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> +     while (p_next_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl)) {
> +             p_sw = p_next_sw;
> +             p_next_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item);
> +             __cache_sw_destroy(p_sw);
> +     }
> +     cl_qmap_remove_all(&p_cache->sw_tbl);
> +
> +     p_next_ca = (cache_ca_t *) cl_qmap_head(&p_cache->ca_tbl);
> +     while (p_next_ca != (cache_ca_t *) cl_qmap_end(&p_cache->ca_tbl)) {
> +             p_ca = p_next_ca;
> +             p_next_ca = (cache_ca_t *) cl_qmap_next(&p_ca->map_item);
> +             __cache_ca_destroy(p_ca);
> +     }
> +     cl_qmap_remove_all(&p_cache->ca_tbl);
> +
> +     p_cache->routing_valid = FALSE;
> +     p_cache->topology_valid = FALSE;
> +     p_cache->need_update = FALSE;
> +} /* __ucast_cache_invalidate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_read_topology(osm_ucast_cache_t * p_cache)
> +{
> +     CL_ASSERT(p_cache && p_cache->p_ucast_mgr);
> +
> +     return __ucast_cache_populate(p_cache);
> +}

What is a reason to make this wrapper function?

> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_read_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> +     CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> +               p_cache->topology_valid);
> +
> +     if (p_cache->routing_valid)
> +             __ucast_cache_clear_routing(p_cache);

I see that this two lines are already presented in
osm_ucast_cache_refresh_lid_matrices() and it is only place where
__ucast_cache_read_lid_matrices() called.

For me it looks that whole logic could be simplified if you will have
separate reread_lfts() and reread_lid_matrices() primitives.

> +
> +     cl_qmap_apply_func(&p_cache->sw_tbl,
> +                        __ucast_cache_read_sw_lid_matrix,
> +                        (void *)p_cache);
> +     p_cache->routing_valid = TRUE;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_write_routing(osm_ucast_cache_t * p_cache)
> +{
> +     CL_ASSERT(p_cache && p_cache->p_ucast_mgr &&
> +               p_cache->topology_valid && p_cache->routing_valid);
> +
> +     cl_qmap_apply_func(&p_cache->sw_tbl,
> +                        __ucast_cache_write_sw_routing,
> +                        (void *)p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static void
> +__ucast_cache_sw_clear_osm_ptr(cl_map_item_t * const p_map_item,
> +                            void *context)
> +{
> +     ((cache_sw_t * const)p_map_item)->p_osm_sw = NULL;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +static int
> +__ucast_cache_validate(osm_ucast_cache_t * p_cache)

For me it seems that whole validation can be better (and faster)
performed during subnet discovery, many things are already there
(details are below).

Actually I thought already about having something like osm_sm.sweep_stat
bitmask instead of just osm_sm.master_sm_found to indicate various events
which were found during discovery.

> +{
> +     osm_switch_t * p_osm_sw;
> +     osm_node_t   * p_osm_node;
> +     osm_node_t   * p_remote_osm_node;
> +     osm_physp_t  * p_osm_physp;
> +     osm_physp_t  * p_remote_osm_physp;
> +     cache_sw_t   * p_sw;
> +     cache_sw_t   * p_remote_sw;
> +     cache_ca_t   * p_remote_ca;
> +     uint16_t lid_ho;
> +     uint16_t remote_lid_ho;
> +     uint8_t remote_node_type;
> +     unsigned num_ports;
> +     unsigned i;
> +     int res = UCAST_CACHE_TOPOLOGY_MATCH;
> +     boolean_t fabric_link_exists;
> +     osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +     cl_qmap_t * p_osm_sw_guid_tbl;
> +
> +     OSM_LOG_ENTER(p_log);
> +
> +     p_osm_sw_guid_tbl = &p_cache->p_ucast_mgr->p_subn->sw_guid_tbl;
> +
> +     if (cl_qmap_count(p_osm_sw_guid_tbl) >
> +         cl_qmap_count(&p_cache->sw_tbl)) {
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "current subnet has more switches than the cache - "
> +                     "cache is invalid\n");
> +             res |= UCAST_CACHE_TOPOLOGY_MORE_SWITCHES;
> +             goto Exit;
> +     }
> +
> +     if (cl_qmap_count(p_osm_sw_guid_tbl) <
> +         cl_qmap_count(&p_cache->sw_tbl)) {
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "current subnet has less switches than the cache - "
> +                     "continuing validation\n");
> +             res |= UCAST_CACHE_TOPOLOGY_LESS_SWITCHES;
> +     }

This handled already in drop manager.

> +
> +     /* Clear the pointers to osm switch on all the cached switches.
> +        These pointers might be invalid right now: some cached switch
> +        might be missing in the real subnet, and some missing switch
> +        might reappear, such as in case of switch reboot. */
> +     cl_qmap_apply_func(&p_cache->sw_tbl, __ucast_cache_sw_clear_osm_ptr,
> +                        NULL);
> +
> +
> +     for (p_osm_sw = (osm_switch_t *) cl_qmap_head(p_osm_sw_guid_tbl);
> +          p_osm_sw != (osm_switch_t *) cl_qmap_end(p_osm_sw_guid_tbl);
> +          p_osm_sw = (osm_switch_t *) cl_qmap_next(&p_osm_sw->map_item)) {
> +
> +             lid_ho = cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node,0));
> +             p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> +             if (!p_sw) {
> +                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                             "new lid (0x%04x)is in the fabric - "
> +                             "cache is invalid\n", lid_ho);
> +                     res |= UCAST_CACHE_TOPOLOGY_NEW_LID;
> +                     goto Exit;
> +             }

New ports are tracked already with 'is_new' field of osm_port structure
(it is necessary anyway for port in/out traps sending).

> +
> +             p_sw->p_osm_sw = p_osm_sw;
> +
> +             /* scan all the ports and check if the cache is valid */
> +
> +             p_osm_node = p_osm_sw->p_node;
> +             num_ports = osm_node_get_num_physp(p_osm_node);
> +
> +             /* skipping port 0 on switches */
> +             for (i = 1; i < num_ports; i++) {
> +                     p_osm_physp = osm_node_get_physp_ptr(p_osm_node, i);
> +
> +                     fabric_link_exists = FALSE;
> +                     if (p_osm_physp &&
> +                         osm_physp_is_valid(p_osm_physp) &&

osm_node_get_physp_ptr() returns NULL if port is not "valid".

> +                         osm_link_is_healthy(p_osm_physp)) {
> +                             p_remote_osm_physp =
> +                                     osm_physp_get_remote(p_osm_physp);
> +                             if (p_remote_osm_physp &&
> +                                 osm_physp_is_valid(p_remote_osm_physp) &&
> +                                 osm_link_is_healthy(p_remote_osm_physp))
> +                                     fabric_link_exists = TRUE;
> +                     }
> +
> +                     if (!fabric_link_exists && !p_sw->ports[i])
> +                             continue;
> +
> +                     if (fabric_link_exists && !p_sw->ports[i]) {
> +                             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                     "lid 0x%04x, port %d, link exists "
> +                                     "in the fabric, but not cached - "
> +                                     "cache is invalid\n",
> +                                     lid_ho, i);
> +                             res |= UCAST_CACHE_TOPOLOGY_LINK_ADDED;
> +                             goto Exit;
> +                     }
> +
> +                     if (!fabric_link_exists && p_sw->ports[i]){
> +                             /*
> +                              * link exists in cache, but missing
> +                              * in current fabric
> +                              */
> +                             if (p_sw->ports[i]->remote_node_type ==
> +                                 IB_NODE_TYPE_SWITCH) {
> +                                     p_remote_sw =
> +                                         p_sw->ports[i]->remote_node.p_sw;
> +                                     /* cache is allowed to have a
> +                                        leaf switch that is missing
> +                                        in the current subnet */
> +                                     if (!p_remote_sw->is_leaf) {
> +                                             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                                     "lid 0x%04x, port %d, "
> +                                                     "fabric is missing a 
> link "
> +                                                     "to non-leaf switch - "
> +                                                     "cache is invalid\n",
> +                                                     lid_ho, i);
> +                                             res |= 
> UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING;
> +                                             goto Exit;
> +                                     }
> +                                     else {
> +                                             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                                     "lid 0x%04x, port %d, "
> +                                                     "fabric is missing a 
> link "
> +                                                     "to leaf switch - "
> +                                                     "continuing 
> validation\n",
> +                                                     lid_ho, i);
> +                                             res |= 
> UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING;
> +                                             continue;
> +                                     }
> +                             }
> +                             else {
> +                                     /* this means that link to
> +                                        non-switch node is missing */
> +                                     CL_ASSERT(p_sw->is_leaf);
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, port %d, "
> +                                             "fabric is missing a link "
> +                                             "to CA - "
> +                                             "continuing validation\n",
> +                                             lid_ho, i);
> +                                     res |= 
> UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING;
> +                                     continue;
> +                             }
> +                     }

I think all this can be tracked in port_info.

> +
> +                     /*
> +                      * Link exists both in fabric and in cache.
> +                      * Compare remote nodes.
> +                      */
> +
> +                     p_remote_osm_node =
> +                             osm_physp_get_node_ptr(p_remote_osm_physp);
> +                     if (!p_remote_osm_node) {
> +                             /* No node for remote port!
> +                                Something wrong is going on here,
> +                                 so we better not use cache... */
> +                             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A53: "
> +                                     "lid 0x%04x, port %d, "
> +                                     "no node for remote port - "
> +                                     "cache mismatch\n",
> +                                     lid_ho, i);
> +                             res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                             goto Exit;
> +                     }
> +
> +                     remote_node_type =
> +                             osm_node_get_type(p_remote_osm_node);
> +
> +                     if (remote_node_type !=
> +                         p_sw->ports[i]->remote_node_type) {
> +                             /* remote node type in the current fabric
> +                                differs from the cached one - looks like
> +                                node was replaced by something else */
> +                             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                     "lid 0x%04x, port %d, "
> +                                     "remote node type mismatch - "
> +                                     "cache is invalid\n",
> +                                     lid_ho, i);
> +                             res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                             goto Exit;
> +                     }

Why are nodes and not ports compared? Will this handle the case when CA
port 1 was disconnected and port 2 connected by the same cable (and will
get another LID value)?

> +
> +                     if (remote_node_type == IB_NODE_TYPE_SWITCH) {
> +                             remote_lid_ho =
> +                                     cl_ntoh16(osm_node_get_base_lid(
> +                                             p_remote_osm_node, 0));
> +
> +                             p_remote_sw = __ucast_cache_get_sw(
> +                                     p_cache,
> +                                     remote_lid_ho);

And if switch was changed, but the same LID value reassigned for some
reason?

Wouldn't it be easier to compare port GUIDs?

> +
> +                             if (!p_remote_sw) {
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, "
> +                                             "new switch in the fabric - "
> +                                             "cache is invalid\n",
> +                                             remote_lid_ho);
> +                                     res |= UCAST_CACHE_TOPOLOGY_NEW_SWITCH;
> +                                     goto Exit;
> +                             }
> +
> +                             if (p_sw->ports[i]->remote_node.p_sw !=
> +                                 p_remote_sw) {
> +                                     /* remote cached switch that pointed
> +                                        by the port is not equal to the
> +                                        switch that was obtained for the
> +                                        remote lid - link was changed */
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, port %d, "
> +                                             "link location changed "
> +                                             "(remote node mismatch) - "
> +                                             "cache is invalid\n",
> +                                             lid_ho, i);
> +                                     res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                                     goto Exit;
> +                             }

Could you elaborate, when will this be possible? (I'm starting to miss
things :( )

> +                     }
> +                     else {
> +                             if (!p_sw->is_leaf) {
> +                                     /* remote node type is CA, but the
> +                                        cached switch is not marked as
> +                                        leaf - something has changed */
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, port %d, "
> +                                             "link changed - "
> +                                             "cache is invalid\n",
> +                                             lid_ho, i);
> +                                     res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                                     goto Exit;
> +                             }
> +
> +                             remote_lid_ho =
> +                                     cl_ntoh16(osm_physp_get_base_lid(
> +                                             p_remote_osm_physp));
> +
> +                             p_remote_ca = __ucast_cache_get_ca(
> +                                     p_cache, remote_lid_ho);
> +
> +                             if (!p_remote_ca) {
> +                                     /* new lid is in the fabric -
> +                                        cache is invalid */
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, port %d, "
> +                                             "new CA in the fabric "
> +                                             "(lid 0x%04x) - "
> +                                             "cache is invalid\n",
> +                                             lid_ho, i, remote_lid_ho);
> +                                     res |= UCAST_CACHE_TOPOLOGY_NEW_CA;
> +                                     goto Exit;
> +                             }
> +
> +                             if (p_sw->ports[i]->remote_node.p_ca !=
> +                                 p_remote_ca) {
> +                                     /* remote cached CA that pointed
> +                                        by the port is not equal to the
> +                                        CA that was obtained for the
> +                                        remote lid - link was changed */
> +                                     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                             "lid 0x%04x, port %d, "
> +                                             "link to CA (lid 0x%04x) "
> +                                             "has changed - "
> +                                             "cache is invalid\n",
> +                                             lid_ho, i, remote_lid_ho);
> +                                     res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                                     goto Exit;
> +                             }
> +                     }
> +             } /* done comparing the ports of the switch */

I think it will be *much* easies to track in osm_port_info.c - look at
osm_physp's and osm_switch's need_update flag setup, and also where
osm_node_unlink() called.

> +     } /* done comparing all the switches */
> +
> +     /* At this point we have four possible flags on:
> +        1. UCAST_CACHE_TOPOLOGY_MATCH
> +           We have a perfect topology match to the cache
> +        2. UCAST_CACHE_TOPOLOGY_LESS_SWITCHES
> +           Cached topology has one or more switches that do not exist
> +           in the current topology. There are two types of such switches:
> +           leaf switches and the regular switches. But if some regular
> +           switch was missing, we would exit the comparison with the
> +           UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING flag, so if some switch
> +           in the topology is missing, it has to be leaf switch.
> +        3. UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING
> +           One or more link to leaf switches are missing in the current
> +           topology.
> +        4. UCAST_CACHE_TOPOLOGY_LINK_TO_CA_MISSING
> +           One or more CAs are missing in the current topology.
> +        In all these cases the cache is perfectly usable - it just might
> +        have routing to unexisting lids. */
> +
> +     if (res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES) {
> +             /* if there are switches in the cache that don't exist
> +                in the current topology, make sure that they are
> +                all leaf switches, otherwise cache is useless */
> +             for (p_sw = (cache_sw_t *) cl_qmap_head(&p_cache->sw_tbl);
> +                  p_sw != (cache_sw_t *) cl_qmap_end(&p_cache->sw_tbl);
> +                  p_sw = (cache_sw_t *) cl_qmap_next(&p_sw->map_item)) {
> +                     if (!p_sw->p_osm_sw && !p_sw->is_leaf) {
> +                             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                                     "non-leaf switch in the fabric is "
> +                                     "missing - cache is invalid\n");
> +                             res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +                             goto Exit;
> +                     }
> +             }
> +     }
> +
> +     if ((res & UCAST_CACHE_TOPOLOGY_LINK_TO_LEAF_SW_MISSING) &&
> +         !(res & UCAST_CACHE_TOPOLOGY_LESS_SWITCHES)) {
> +             /* some link to leaf switch is missing, but there are
> +                no missing switches - link failure or topology
> +                changes, which means that we probably shouldn't
> +                use the cache here */
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "topology change - cache is invalid\n");
> +             res |= UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +             goto Exit;
> +     }
> +
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +     return res;
> +
> +} /* __ucast_cache_validate() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +int
> +osm_ucast_cache_apply(osm_ucast_cache_t * p_cache)
> +{
> +     int res = 0;
> +     osm_log_t * p_log;
> +
> +     if (!p_cache)
> +             return 1;
> +
> +     p_log = p_cache->p_ucast_mgr->p_log;
> +
> +     OSM_LOG_ENTER(p_log);
> +     if (!p_cache->topology_valid) {
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "unicast cache is empty - can't "
> +                     "use it on this sweep\n");
> +             res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +             goto Exit;
> +     }
> +
> +     if (!p_cache->routing_valid) {
> +             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A55: "
> +                     "cached routing invalid\n");
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "invalidating cache\n");
> +             __ucast_cache_invalidate(p_cache);
> +             res = UCAST_CACHE_TOPOLOGY_NO_MATCH;
> +             goto Exit;
> +     }
> +
> +     res = __ucast_cache_validate(p_cache);
> +
> +     if ((res & UCAST_CACHE_TOPOLOGY_NO_MATCH          ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_MORE_SWITCHES     ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_LINK_ADDED        ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING) ||
> +         (res & UCAST_CACHE_TOPOLOGY_NEW_SWITCH        ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_NEW_CA            ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_NEW_LID           ) ||
> +         (res & UCAST_CACHE_TOPOLOGY_LINK_TO_SW_MISSING)) {

Why to not make single return status?

> +             /* The change in topology doesn't allow us to use the.
> +                existing cache. Cache should be invalidated, and new
> +                cache should be built after the routing recalculation. */
> +             OSM_LOG(p_log, OSM_LOG_INFO,
> +                     "changes in topology (0x%x) - "
> +                     "invalidating cache\n", res);
> +             __ucast_cache_invalidate(p_cache);
> +             goto Exit;
> +     }
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "cache is valid (status 0x%04x) - using the cached 
> routing\n",res);
> +
> +     /* existing cache can be used - write back the cached routing */
> +     __ucast_cache_write_routing(p_cache);
> +
> +     /*
> +      * ToDo: Detailed result of the topology comparison will
> +      * ToDo: be needed later for the Incremental Routing,
> +      * ToDo: where based on this result, the routing algorithm
> +      * ToDo: will try to route "around" the missing components.
> +      * ToDo: For now - reset the result whenever the cache
> +      * ToDo: is valid.
> +      */
> +     res = 0;
> +
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +     return res;
> +} /* osm_ucast_cache_apply() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_set_sw_fwd_table(osm_ucast_cache_t * p_cache,
> +                                   uint8_t * ucast_mgr_lft_buf,
> +                                   osm_switch_t * p_osm_sw)
> +{
> +     uint16_t lid_ho =
> +             cl_ntoh16(osm_node_get_base_lid(p_osm_sw->p_node, 0));
> +     cache_sw_t * p_sw = __ucast_cache_get_sw(p_cache, lid_ho);
> +
> +     OSM_LOG_ENTER(p_cache->p_ucast_mgr->p_log);
> +
> +     OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_VERBOSE,
> +             "caching lft for switch 0x%04x\n",
> +             lid_ho);
> +
> +     if (!p_sw || !p_sw->p_osm_sw) {
> +             OSM_LOG(p_cache->p_ucast_mgr->p_log, OSM_LOG_ERROR,
> +                     "ERR 3A57: "
> +                     "fabric switch 0x%04x %s in the unicast cache\n",
> +                     lid_ho,
> +                     (p_sw) ? "is not initialized" : "doesn't exist");
> +             goto Exit;
> +     }
> +
> +     CL_ASSERT(p_sw->p_osm_sw == p_osm_sw);
> +     CL_ASSERT(!p_sw->lft_buff);
> +
> +     p_sw->max_lid_ho = p_osm_sw->max_lid_ho;
> +
> +     /* allocate linear forwarding table buffer and fill it */
> +     p_sw->lft_buff = (uint8_t *)malloc(IB_LID_UCAST_END_HO + 1);
> +     memcpy(p_sw->lft_buff, p_cache->p_ucast_mgr->lft_buf,
> +            IB_LID_UCAST_END_HO + 1);
> +
> +    Exit:
> +     OSM_LOG_EXIT(p_cache->p_ucast_mgr->p_log);
> +} /* osm_ucast_cache_set_sw_fwd_table() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_topo(osm_ucast_cache_t * p_cache)
> +{
> +     osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +     OSM_LOG_ENTER(p_log);
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "starting ucast cache topology refresh\n");
> +
> +     if (p_cache->topology_valid) {
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "invalidating existing ucast cache\n");
> +             __ucast_cache_invalidate(p_cache);
> +     }
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE, "caching topology\n");
> +
> +     if (__ucast_cache_read_topology(p_cache) != 0) {
> +             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A56: "
> +                     "cache population failed\n");
> +             __ucast_cache_invalidate(p_cache);
> +             goto Exit;
> +     }
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "ucast cache topology refresh done\n");
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_topo() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void osm_ucast_cache_refresh_lid_matrices(osm_ucast_cache_t * p_cache)
> +{
> +     osm_log_t * p_log = p_cache->p_ucast_mgr->p_log;
> +     OSM_LOG_ENTER(p_log);
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "starting ucast cache lid matrices refresh\n");
> +
> +     if (!p_cache->topology_valid) {
> +             OSM_LOG(p_log, OSM_LOG_ERROR, "ERR 3A54: "
> +                     "cached topology is invalid\n");
> +             goto Exit;
> +     }
> +
> +     if (p_cache->routing_valid) {
> +             OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +                     "invalidating existing ucast routing cache\n");
> +             __ucast_cache_clear_routing(p_cache);
> +     }
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "caching lid matrices\n");
> +
> +     __ucast_cache_read_lid_matrices(p_cache);
> +
> +     OSM_LOG(p_log, OSM_LOG_VERBOSE,
> +             "ucast cache lid matrices refresh done\n");
> +    Exit:
> +     OSM_LOG_EXIT(p_log);
> +} /* osm_ucast_cache_refresh_lid_matrices() */
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +osm_ucast_cache_t *
> +osm_ucast_cache_construct(osm_ucast_mgr_t * const p_mgr)
> +{
> +     if (p_mgr->p_subn->opt.lmc > 0) {
> +             OSM_LOG(p_mgr->p_log, OSM_LOG_ERROR, "ERR 3A50: "
> +                     "Unicast cache is not supported for LMC>0\n");
> +             return NULL;
> +     }
> +
> +     osm_ucast_cache_t * p_cache =
> +             (osm_ucast_cache_t*)malloc(sizeof(osm_ucast_cache_t));
> +     if (!p_cache)
> +             return NULL;
> +
> +     memset(p_cache, 0, sizeof(osm_ucast_cache_t));
> +
> +     cl_qmap_init(&p_cache->sw_tbl);
> +     cl_qmap_init(&p_cache->ca_tbl);
> +     p_cache->p_ucast_mgr = p_mgr;
> +
> +     return p_cache;
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> +
> +void
> +osm_ucast_cache_destroy(osm_ucast_cache_t * p_cache)
> +{
> +     if (!p_cache)
> +             return;
> +     __ucast_cache_invalidate(p_cache);
> +     free(p_cache);
> +}
> +
> +/**********************************************************************
> + **********************************************************************/
> -- 
> 1.5.1.4
> 
_______________________________________________
general mailing list
[email protected]
http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general

To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general

Reply via email to