Hi Sasha, On Fri, 2006-09-15 at 18:17, Sasha Khapyorsky wrote: > Hi Eitan, > > Some comments about the patch. > > On 14:45 Fri 15 Sep , Eitan Zahavi wrote: > > Hi Hal > > > > The following patch solves an issue with OpenSM preferring largest MTU > > for PathRecord/MultiPathRecord for paths going to or from MT23108 (Tavor) > > devices instead of using a 1K MTU which is best for this device. > > > > Since this is a device specific quirk I have added a configuration option > > named enable_quirks which is FALSE by default to enable this functionality. > > > > To summarize the functionality change: > > 1. Added enable_quirks option > > 2. If enable_quirks is FALSE do nothing > > I see those quirks are SA specific. Then should this option be called > 'enable_sa_quirks' instead?
Not sure what the right "granularity" is for this. Would all quirks be enabled at once or would this end up being a pick and choose ? > > 3. If a specific MTU is requested (either =2K or >1K) do nothing > > 4. If either source port or destination port is a Tavor device > > MTU is limited to 1K (can be further reduced by path traversal) > > > > Target is both trunk and OFED 1.1 > > > > Thanks > > > > Eitan > > > > Signed-off-by: Eitan Zahavi <[EMAIL PROTECTED]> > > > > Index: include/opensm/osm_subnet.h > > =================================================================== > > --- include/opensm/osm_subnet.h (revision 9493) > > +++ include/opensm/osm_subnet.h (working copy) > > @@ -286,6 +286,7 @@ typedef struct _osm_subn_opt > > osm_qos_options_t qos_sw0_options; > > osm_qos_options_t qos_swe_options; > > osm_qos_options_t qos_rtr_options; > > + boolean_t enable_quirks; > > } osm_subn_opt_t; > > /* > > * FIELDS > > @@ -469,6 +470,10 @@ typedef struct _osm_subn_opt > > * qos_rtr_options > > * QoS options for router ports > > * > > +* enable_quirks > > +* Enable high risk new features and not fully qualified > > +* hardware specific work arounds > > +* > > * SEE ALSO > > * Subnet object > > *********/ > > Index: include/opensm/osm_base.h > > =================================================================== > > --- include/opensm/osm_base.h (revision 9493) > > +++ include/opensm/osm_base.h (working copy) > > @@ -778,6 +778,34 @@ typedef enum _osm_mcast_req_type > > #define MAX_UPDN_GUID_FILE_LINE_LENGTH 120 > > /**********/ > > > > +/****s* OpenSM: Base/VendorOUIs > > +* NAME > > +* VendorOUIs > > +* > > +* DESCRIPTION > > +* Known device vendor ID and GUID OUIs > > +* > > +* SYNOPSIS > > +*/ > > +#define OSM_VENDOR_ID_INTEL 0x00D0B7 > > +#define OSM_VENDOR_ID_MELLANOX 0x0002C9 > > +#define OSM_VENDOR_ID_REDSWITCH 0x000617 > > +#define OSM_VENDOR_ID_SILVERSTORM 0x00066A > > +#define OSM_VENDOR_ID_TOPSPIN 0x0005AD > > +#define OSM_VENDOR_ID_FUJITSU 0x00E000 > > +#define OSM_VENDOR_ID_FUJITSU2 0x000B5D > > +#define OSM_VENDOR_ID_VOLTAIRE 0x0008F1 > > +#define OSM_VENDOR_ID_YOTTAYOTTA 0x000453 > > +#define OSM_VENDOR_ID_PATHSCALE 0x001175 > > +#define OSM_VENDOR_ID_IBM 0x000255 > > +#define OSM_VENDOR_ID_DIVERGENET 0x00084E > > +#define OSM_VENDOR_ID_FLEXTRONICS 0x000B8C > > +#define OSM_VENDOR_ID_AGILENT 0x0030D3 > > +#define OSM_VENDOR_ID_OBSIDIAN 0x001777 > > +#define OSM_VENDOR_ID_BAYMICRO 0x000BC1 > > +#define OSM_VENDOR_ID_LSILOGIC 0x00A0B8 > > +/**********/ > > + > > END_C_DECLS > > > > #endif /* _OSM_BASE_H_ */ > > Index: opensm/osm_sa_multipath_record.c > > =================================================================== > > --- opensm/osm_sa_multipath_record.c (revision 9493) > > +++ opensm/osm_sa_multipath_record.c (working copy) > > @@ -150,6 +150,75 @@ osm_mpr_rcv_init( > > > > /********************************************************************** > > **********************************************************************/ > > +static inline boolean_t > > +__osm_sa_multipath_rec_is_tavor_port( > > + IN const osm_port_t* const p_port) > > +{ > > + osm_node_t const* p_node; > > + ib_net32_t vend_id; > > + > > + p_node = osm_port_get_parent_node( p_port ); > > + vend_id = ib_node_info_get_vendor_id( &p_node->node_info ); > > + > > + return( (p_node->node_info.device_id == CL_HTON16(23108)) && > > + ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || > > + (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) > > || > > + (vend_id == > > CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || > > + (vend_id == > > CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); > > +} > > + > > +/********************************************************************** > > + **********************************************************************/ > > +boolean_t > > + __osm_sa_multipath_rec_apply_tavor_mtu_limit( > > + IN const ib_multipath_rec_t* const p_mpr, > > + IN const osm_port_t* const p_src_port, > > + IN const osm_port_t* const p_dest_port, > > + IN const ib_net64_t comp_mask) > > +{ > > + uint8_t required_mtu; > > + > > + /* only if one of the ports is a Tavor device */ > > + if (! __osm_sa_multipath_rec_is_tavor_port(p_src_port) && > > + ! __osm_sa_multipath_rec_is_tavor_port(p_dest_port) ) > > + return( FALSE ); > > + > > + /* > > + we can apply the patch if either: > > + 1. No MTU required > > + 2. Required MTU < > > + 3. Required MTU = 1K or 512 or 256 > > + 4. Required MTU > 256 or 512 > > + */ > > + required_mtu = ib_multipath_rec_mtu( p_mpr ); > > + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && > > + ( comp_mask & IB_PR_COMPMASK_MTU ) ) > > Should here be IB_MPR_COMPMASK_* instead of IB_PR_COMPMASK_*? Good catch. > > + { > > + switch( ib_multipath_rec_mtu_sel( p_mpr ) ) > > + { > > + case 0: /* must be greater than */ > > + case 2: /* exact match */ > > + if( IB_MTU_LEN_1024 < required_mtu ) > > + return(FALSE); > > + break; > > + > > + case 1: /* must be less than */ > > + case 3: /* largest available */ > > + /* can't be disqualified by this one */ > > + break; > > + > > + default: > > + /* if we're here, there's a bug in > > ib_path_rec_mtu_sel() */ > > + CL_ASSERT( FALSE ); > > + break; > > + } > > + } > > + > > + return(TRUE); > > +} > > + > > +/********************************************************************** > > + **********************************************************************/ > > static ib_api_status_t > > __osm_mpr_rcv_get_path_parms( > > IN osm_mpr_rcv_t* const p_rcv, > > @@ -195,6 +264,23 @@ __osm_mpr_rcv_get_path_parms( > > mtu = ib_port_info_get_mtu_cap( p_pi ); > > rate = ib_port_info_compute_rate( p_pi ); > > > > + /* > > + Mellanox Tavor device performance is better using 1K MTU. > > + If required MTU and MTU selector are such that 1K is OK > > + and one of the ends of the path is Tavor we override the > > + port MTU with 1K. > > + */ > > + if ( p_rcv->p_subn->opt.enable_quirks && > > + __osm_sa_multipath_rec_apply_tavor_mtu_limit( > > + p_mpr, p_src_port, p_dest_port, comp_mask) ) > > + if (mtu > IB_MTU_LEN_1024) > > + { > > + mtu = IB_MTU_LEN_1024; > > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > > + "__osm_mpr_rcv_get_path_parms: " > > + "Optimized Path MTU to 1K for > > Mellanox Tavor device\n"); > > + } > > + > > This part is pure hardcode, isn't it? Could this be at least isolated in > single call 'osm_*_do_quirks()' or like thin? Perhaps. This can be worked on the trunk. > > if ( comp_mask & IB_MPR_COMPMASK_RAWTRAFFIC && > > cl_ntoh32( p_mpr->hop_flow_raw ) & ( 1<<31 ) ) > > required_pkey = osm_physp_find_common_pkey( p_physp, p_dest_physp ); > > Index: opensm/osm_subnet.c > > =================================================================== > > --- opensm/osm_subnet.c (revision 9493) > > +++ opensm/osm_subnet.c (working copy) > > @@ -494,6 +494,7 @@ osm_subn_set_default_opt( > > p_opt->ucast_dump_file = NULL; > > p_opt->updn_guid_file = NULL; > > p_opt->exit_on_fatal = TRUE; > > + p_opt->enable_quirks = FALSE; > > subn_set_default_qos_options(&p_opt->qos_options); > > subn_set_default_qos_options(&p_opt->qos_ca_options); > > subn_set_default_qos_options(&p_opt->qos_sw0_options); > > @@ -979,6 +980,10 @@ osm_subn_parse_conf_file( > > subn_parse_qos_options("qos_rtr", > > p_key, p_val, &p_opts->qos_rtr_options); > > > > + __osm_subn_opts_unpack_boolean( > > + "enable_quirks", > > + p_key, p_val, &p_opts->enable_quirks); > > + > > } > > } > > fclose(opts_file); > > @@ -1179,11 +1184,15 @@ osm_subn_write_conf_file( > > "force_log_flush %s\n\n" > > "# Log file to be used\n" > > "log_file %s\n\n" > > + "# Limit the the size of the log file. If overrun log is restarted\n" > > "log_max_size %lu\n\n" > > + "# If TRUE will accumulate the log over multiple OpenSM sessions\n" > > "accum_log_file %s\n\n" > > "# The directory to hold the file OpenSM dumps\n" > > "dump_files_dir %s\n\n" > > - "# If TRUE if OpenSM should disable multicast support\n" > > + "# If TRUE enables new high risk options and hardware specific > > quirks\n" > > + "enable_quirks %s\n\n" > > + "# If TRUE OpenSM should disable multicast support\n" > > "no_multicast_option %s\n\n" > > "# No multicast routing is performed if TRUE\n" > > "disable_multicast %s\n\n" > > @@ -1195,6 +1204,7 @@ osm_subn_write_conf_file( > > p_opts->log_max_size, > > p_opts->accum_log_file ? "TRUE" : "FALSE", > > p_opts->dump_files_dir, > > + p_opts->enable_quirks ? "TRUE" : "FALSE", > > p_opts->no_multicast_option ? "TRUE" : "FALSE", > > p_opts->disable_multicast ? "TRUE" : "FALSE", > > p_opts->exit_on_fatal ? "TRUE" : "FALSE" > > Index: opensm/osm_helper.c > > =================================================================== > > --- opensm/osm_helper.c (revision 9493) > > +++ opensm/osm_helper.c (working copy) > > @@ -2289,24 +2289,6 @@ osm_get_node_type_str_fixed_width( > > return( __osm_node_type_str_fixed_width[node_type] ); > > } > > > > -#define OSM_VENDOR_ID_INTEL 0x00D0B7 > > -#define OSM_VENDOR_ID_MELLANOX 0x0002C9 > > -#define OSM_VENDOR_ID_REDSWITCH 0x000617 > > -#define OSM_VENDOR_ID_SILVERSTORM 0x00066A > > -#define OSM_VENDOR_ID_TOPSPIN 0x0005AD > > -#define OSM_VENDOR_ID_FUJITSU 0x00E000 > > -#define OSM_VENDOR_ID_FUJITSU2 0x000B5D > > -#define OSM_VENDOR_ID_VOLTAIRE 0x0008F1 > > -#define OSM_VENDOR_ID_YOTTAYOTTA 0x000453 > > -#define OSM_VENDOR_ID_PATHSCALE 0x001175 > > -#define OSM_VENDOR_ID_IBM 0x000255 > > -#define OSM_VENDOR_ID_DIVERGENET 0x00084E > > -#define OSM_VENDOR_ID_FLEXTRONICS 0x000B8C > > -#define OSM_VENDOR_ID_AGILENT 0x0030D3 > > -#define OSM_VENDOR_ID_OBSIDIAN 0x001777 > > -#define OSM_VENDOR_ID_BAYMICRO 0x000BC1 > > -#define OSM_VENDOR_ID_LSILOGIC 0x00A0B8 > > - > > /********************************************************************** > > **********************************************************************/ > > const char* > > Index: opensm/osm_sa_path_record.c > > =================================================================== > > --- opensm/osm_sa_path_record.c (revision 9493) > > +++ opensm/osm_sa_path_record.c (working copy) > > @@ -57,6 +57,7 @@ > > #include <complib/cl_passivelock.h> > > #include <complib/cl_debug.h> > > #include <complib/cl_qlist.h> > > +#include <opensm/osm_base.h> > > #include <opensm/osm_sa_path_record.h> > > #include <opensm/osm_port.h> > > #include <opensm/osm_node.h> > > @@ -150,6 +151,75 @@ osm_pr_rcv_init( > > > > /********************************************************************** > > **********************************************************************/ > > +static inline boolean_t > > +__osm_sa_path_rec_is_tavor_port( > > + IN const osm_port_t* const p_port) > > +{ > > + osm_node_t const* p_node; > > + ib_net32_t vend_id; > > + > > + p_node = osm_port_get_parent_node( p_port ); > > + vend_id = ib_node_info_get_vendor_id( &p_node->node_info ); > > + > > + return( (p_node->node_info.device_id == CL_HTON16(23108)) && > > + ((vend_id == CL_HTON32(OSM_VENDOR_ID_MELLANOX)) || > > + (vend_id == CL_HTON32(OSM_VENDOR_ID_TOPSPIN)) > > || > > + (vend_id == > > CL_HTON32(OSM_VENDOR_ID_SILVERSTORM)) || > > + (vend_id == > > CL_HTON32(OSM_VENDOR_ID_VOLTAIRE)))); > > +} > > + > > +/********************************************************************** > > + **********************************************************************/ > > +static boolean_t > > + __osm_sa_path_rec_apply_tavor_mtu_limit( > > + IN const ib_path_rec_t* const p_pr, > > + IN const osm_port_t* const p_src_port, > > + IN const osm_port_t* const p_dest_port, > > + IN const ib_net64_t comp_mask) > > +{ > > + uint8_t required_mtu; > > + > > + /* only if one of the ports is a Tavor device */ > > + if (! __osm_sa_path_rec_is_tavor_port(p_src_port) && > > + ! __osm_sa_path_rec_is_tavor_port(p_dest_port) ) > > + return( FALSE ); > > + > > + /* > > + we can apply the patch if either: > > + 1. No MTU required > > + 2. Required MTU < > > + 3. Required MTU = 1K or 512 or 256 > > + 4. Required MTU > 256 or 512 > > + */ > > + required_mtu = ib_path_rec_mtu( p_pr ); > > + if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && > > + ( comp_mask & IB_PR_COMPMASK_MTU ) ) > > + { > > + switch( ib_path_rec_mtu_sel( p_pr ) ) > > + { > > + case 0: /* must be greater than */ > > + case 2: /* exact match */ > > + if( IB_MTU_LEN_1024 < required_mtu ) > > + return(FALSE); > > + break; > > + > > + case 1: /* must be less than */ > > + case 3: /* largest available */ > > + /* can't be disqualified by this one */ > > + break; > > + > > + default: > > + /* if we're here, there's a bug in > > ib_path_rec_mtu_sel() */ > > + CL_ASSERT( FALSE ); > > + break; > > + } > > + } > > + > > + return(TRUE); > > +} > > + > > +/********************************************************************** > > + **********************************************************************/ > > static ib_api_status_t > > __osm_pr_rcv_get_path_parms( > > IN osm_pr_rcv_t* const p_rcv, > > @@ -191,6 +261,23 @@ __osm_pr_rcv_get_path_parms( > > mtu = ib_port_info_get_mtu_cap( p_pi ); > > rate = ib_port_info_compute_rate( p_pi ); > > > > + /* > > + Mellanox Tavor device performance is better using 1K MTU. > > + If required MTU and MTU selector are such that 1K is OK > > + and one of the ends of the path is Tavor we override the > > + port MTU with 1K. > > + */ > > + if ( p_rcv->p_subn->opt.enable_quirks && > > + __osm_sa_path_rec_apply_tavor_mtu_limit( > > + p_pr, p_src_port, p_dest_port, comp_mask) ) > > + if (mtu > IB_MTU_LEN_1024) > > + { > > + mtu = IB_MTU_LEN_1024; > > + osm_log( p_rcv->p_log, OSM_LOG_DEBUG, > > + "__osm_pr_rcv_get_path_parms: " > > + "Optimized Path MTU to 1K for > > Mellanox Tavor device\n"); > > + } > > + > > The same is here (about hardcodes). > > Also I see that tavor specific functions are pretty similar for PR and > MPR cases. Why not to share this in something like osm_sa_quirks.c? I think we can work on this on the trunk and see if there is an OFED 1.1 opening. -- Hal > Sasha > > > /* > > Walk the subnet object from source to destination, > > tracking the most restrictive rate and mtu values along the way... > > @@ -444,10 +531,10 @@ __osm_pr_rcv_get_path_parms( > > */ > > > > /* we silently ignore cases where only the MTU selector is defined */ > > + required_mtu = ib_path_rec_mtu( p_pr ); > > if ( ( comp_mask & IB_PR_COMPMASK_MTUSELEC ) && > > ( comp_mask & IB_PR_COMPMASK_MTU ) ) > > { > > - required_mtu = ib_path_rec_mtu( p_pr ); > > switch( ib_path_rec_mtu_sel( p_pr ) ) > > { > > case 0: /* must be greater than */ > > > > > > _______________________________________________ > > openib-general mailing list > > openib-general@openib.org > > http://openib.org/mailman/listinfo/openib-general > > > > To unsubscribe, please visit > > http://openib.org/mailman/listinfo/openib-general > > _______________________________________________ openib-general mailing list openib-general@openib.org http://openib.org/mailman/listinfo/openib-general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general